aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs_vfs.h2
-rw-r--r--fs/9p/vfs_dir.c8
-rw-r--r--fs/9p/vfs_file.c11
-rw-r--r--fs/9p/vfs_inode.c111
-rw-r--r--fs/9p/vfs_super.c55
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/dir.c6
-rw-r--r--fs/afs/file.c64
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/mntpt.c6
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/autofs4/root.c22
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/block_dev.c330
-rw-r--r--fs/btrfs/acl.c4
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/inode.c11
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/btrfs/xattr.h6
-rw-r--r--fs/buffer.c26
-rw-r--r--fs/ceph/addr.c11
-rw-r--r--fs/ceph/auth.c9
-rw-r--r--fs/ceph/auth.h2
-rw-r--r--fs/ceph/auth_none.c1
-rw-r--r--fs/ceph/auth_x.c19
-rw-r--r--fs/ceph/caps.c24
-rw-r--r--fs/ceph/ceph_fs.h62
-rw-r--r--fs/ceph/ceph_strings.c16
-rw-r--r--fs/ceph/debugfs.c13
-rw-r--r--fs/ceph/dir.c45
-rw-r--r--fs/ceph/export.c14
-rw-r--r--fs/ceph/file.c19
-rw-r--r--fs/ceph/inode.c97
-rw-r--r--fs/ceph/ioctl.c2
-rw-r--r--fs/ceph/mds_client.c385
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/messenger.c91
-rw-r--r--fs/ceph/messenger.h10
-rw-r--r--fs/ceph/mon_client.c257
-rw-r--r--fs/ceph/mon_client.h27
-rw-r--r--fs/ceph/msgpool.c180
-rw-r--r--fs/ceph/msgpool.h12
-rw-r--r--fs/ceph/msgr.h21
-rw-r--r--fs/ceph/osd_client.c98
-rw-r--r--fs/ceph/pagelist.c2
-rw-r--r--fs/ceph/rados.h23
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.c128
-rw-r--r--fs/ceph/super.h30
-rw-r--r--fs/ceph/xattr.c35
-rw-r--r--fs/cifs/asn1.c103
-rw-r--r--fs/cifs/cifs_debug.c48
-rw-r--r--fs/cifs/cifs_debug.h42
-rw-r--r--fs/cifs/cifs_dfs_ref.c34
-rw-r--r--fs/cifs/cifs_spnego.c6
-rw-r--r--fs/cifs/cifs_unicode.c5
-rw-r--r--fs/cifs/cifsacl.c76
-rw-r--r--fs/cifs/cifsencrypt.c10
-rw-r--r--fs/cifs/cifsfs.c163
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h9
-rw-r--r--fs/cifs/cifsproto.h30
-rw-r--r--fs/cifs/cifssmb.c437
-rw-r--r--fs/cifs/connect.c639
-rw-r--r--fs/cifs/dir.c91
-rw-r--r--fs/cifs/dns_resolve.c16
-rw-r--r--fs/cifs/export.c2
-rw-r--r--fs/cifs/file.c222
-rw-r--r--fs/cifs/inode.c123
-rw-r--r--fs/cifs/ioctl.c10
-rw-r--r--fs/cifs/link.c10
-rw-r--r--fs/cifs/misc.c81
-rw-r--r--fs/cifs/netmisc.c16
-rw-r--r--fs/cifs/readdir.c85
-rw-r--r--fs/cifs/sess.c81
-rw-r--r--fs/cifs/transport.c92
-rw-r--r--fs/cifs/xattr.c40
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/coda/pioctl.c76
-rw-r--r--fs/coda/psdev.c5
-rw-r--r--fs/dcache.c20
-rw-r--r--fs/devpts/inode.c9
-rw-r--r--fs/dlm/lock.c5
-rw-r--r--fs/dlm/user.c88
-rw-r--r--fs/drop_caches.c24
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h5
-rw-r--r--fs/ecryptfs/file.c4
-rw-r--r--fs/ecryptfs/inode.c48
-rw-r--r--fs/ecryptfs/main.c166
-rw-r--r--fs/ecryptfs/mmap.c19
-rw-r--r--fs/ecryptfs/read_write.c13
-rw-r--r--fs/ecryptfs/super.c22
-rw-r--r--fs/eventpoll.c3
-rw-r--r--fs/exofs/dir.c2
-rw-r--r--fs/exofs/inode.c41
-rw-r--r--fs/ext2/acl.c4
-rw-r--r--fs/ext2/balloc.c6
-rw-r--r--fs/ext2/ialloc.c21
-rw-r--r--fs/ext2/inode.c7
-rw-r--r--fs/ext2/super.c99
-rw-r--r--fs/ext2/xattr.c12
-rw-r--r--fs/ext2/xattr.h12
-rw-r--r--fs/ext2/xattr_security.c2
-rw-r--r--fs/ext2/xattr_trusted.c2
-rw-r--r--fs/ext2/xattr_user.c2
-rw-r--r--fs/ext3/acl.c4
-rw-r--r--fs/ext3/balloc.c6
-rw-r--r--fs/ext3/fsync.c23
-rw-r--r--fs/ext3/ialloc.c13
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/super.c77
-rw-r--r--fs/ext3/xattr.c10
-rw-r--r--fs/ext3/xattr.h12
-rw-r--r--fs/ext3/xattr_security.c2
-rw-r--r--fs/ext3/xattr_trusted.c2
-rw-r--r--fs/ext3/xattr_user.c2
-rw-r--r--fs/ext4/acl.c4
-rw-r--r--fs/ext4/fsync.c6
-rw-r--r--fs/ext4/ialloc.c12
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/xattr.c10
-rw-r--r--fs/ext4/xattr.h12
-rw-r--r--fs/ext4/xattr_security.c2
-rw-r--r--fs/ext4/xattr_trusted.c2
-rw-r--r--fs/ext4/xattr_user.c2
-rw-r--r--fs/fat/dir.c28
-rw-r--r--fs/fat/fat.h4
-rw-r--r--fs/fat/file.c19
-rw-r--r--fs/fat/inode.c6
-rw-r--r--fs/fcntl.c71
-rw-r--r--fs/fs-writeback.c104
-rw-r--r--fs/generic_acl.c4
-rw-r--r--fs/gfs2/acl.c2
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c17
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/incore.h11
-rw-r--r--fs/gfs2/inode.c101
-rw-r--r--fs/gfs2/inode.h5
-rw-r--r--fs/gfs2/log.c158
-rw-r--r--fs/gfs2/log.h1
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/main.c2
-rw-r--r--fs/gfs2/meta_io.c5
-rw-r--r--fs/gfs2/ops_fstype.c19
-rw-r--r--fs/gfs2/quota.c114
-rw-r--r--fs/gfs2/rgrp.c73
-rw-r--r--fs/gfs2/super.c11
-rw-r--r--fs/gfs2/super.h2
-rw-r--r--fs/gfs2/sys.c6
-rw-r--r--fs/gfs2/trans.c18
-rw-r--r--fs/gfs2/xattr.c6
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h3
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/ioctl.c12
-rw-r--r--fs/inode.c28
-rw-r--r--fs/internal.h2
-rw-r--r--fs/ioctl.c15
-rw-r--r--fs/jbd/commit.c8
-rw-r--r--fs/jbd/journal.c33
-rw-r--r--fs/jbd2/checkpoint.c3
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/acl.c4
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/background.c3
-rw-r--r--fs/jffs2/erase.c12
-rw-r--r--fs/jffs2/fs.c10
-rw-r--r--fs/jffs2/gc.c17
-rw-r--r--fs/jffs2/nodelist.h10
-rw-r--r--fs/jffs2/nodemgmt.c28
-rw-r--r--fs/jffs2/os-linux.h3
-rw-r--r--fs/jffs2/scan.c4
-rw-r--r--fs/jffs2/security.c2
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jffs2/wbuf.c8
-rw-r--r--fs/jffs2/xattr.c8
-rw-r--r--fs/jffs2/xattr.h8
-rw-r--r--fs/jffs2/xattr_trusted.c2
-rw-r--r--fs/jffs2/xattr_user.c2
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/jfs_dmap.c2
-rw-r--r--fs/jfs/jfs_inode.c12
-rw-r--r--fs/libfs.c35
-rw-r--r--fs/logfs/dev_bdev.c6
-rw-r--r--fs/logfs/dev_mtd.c26
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/logfs/file.c16
-rw-r--r--fs/logfs/gc.c49
-rw-r--r--fs/logfs/inode.c15
-rw-r--r--fs/logfs/journal.c7
-rw-r--r--fs/logfs/logfs.h15
-rw-r--r--fs/logfs/logfs_abi.h10
-rw-r--r--fs/logfs/readwrite.c19
-rw-r--r--fs/logfs/segment.c7
-rw-r--r--fs/logfs/super.c8
-rw-r--r--fs/minix/bitmap.c5
-rw-r--r--fs/minix/minix.h2
-rw-r--r--fs/minix/namei.c11
-rw-r--r--fs/namei.c5
-rw-r--r--fs/namespace.c13
-rw-r--r--fs/ncpfs/dir.c2
-rw-r--r--fs/ncpfs/file.c2
-rw-r--r--fs/ncpfs/ioctl.c27
-rw-r--r--fs/nfs/client.c55
-rw-r--r--fs/nfs/delegation.c2
-rw-r--r--fs/nfs/dir.c143
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfs/fscache.c3
-rw-r--r--fs/nfs/getroot.c191
-rw-r--r--fs/nfs/inode.c58
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/iostat.h6
-rw-r--r--fs/nfs/namespace.c20
-rw-r--r--fs/nfs/nfs3acl.c23
-rw-r--r--fs/nfs/nfs3proc.c128
-rw-r--r--fs/nfs/nfs3xdr.c2
-rw-r--r--fs/nfs/nfs4_fs.h8
-rw-r--r--fs/nfs/nfs4namespace.c12
-rw-r--r--fs/nfs/nfs4proc.c172
-rw-r--r--fs/nfs/nfs4state.c36
-rw-r--r--fs/nfs/nfs4xdr.c22
-rw-r--r--fs/nfs/nfsroot.c14
-rw-r--r--fs/nfs/pagelist.c14
-rw-r--r--fs/nfs/proc.c144
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfs/super.c147
-rw-r--r--fs/nfs/unlink.c4
-rw-r--r--fs/nfsd/export.c44
-rw-r--r--fs/nfsd/nfs4callback.c140
-rw-r--r--fs/nfsd/nfs4proc.c50
-rw-r--r--fs/nfsd/nfs4recover.c87
-rw-r--r--fs/nfsd/nfs4state.c376
-rw-r--r--fs/nfsd/nfs4xdr.c27
-rw-r--r--fs/nfsd/nfsctl.c64
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/state.h47
-rw-r--r--fs/nfsd/vfs.c13
-rw-r--r--fs/nfsd/vfs.h1
-rw-r--r--fs/nfsd/xdr4.h11
-rw-r--r--fs/nilfs2/alloc.c154
-rw-r--r--fs/nilfs2/alloc.h7
-rw-r--r--fs/nilfs2/btree.c91
-rw-r--r--fs/nilfs2/btree.h23
-rw-r--r--fs/nilfs2/inode.c15
-rw-r--r--fs/nilfs2/recovery.c2
-rw-r--r--fs/nilfs2/segbuf.c70
-rw-r--r--fs/nilfs2/segbuf.h10
-rw-r--r--fs/nilfs2/segment.c157
-rw-r--r--fs/nilfs2/segment.h6
-rw-r--r--fs/nilfs2/super.c218
-rw-r--r--fs/nilfs2/the_nilfs.c14
-rw-r--r--fs/notify/inotify/inotify.c88
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/ocfs2/alloc.c908
-rw-r--r--fs/ocfs2/alloc.h12
-rw-r--r--fs/ocfs2/aops.c3
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/cluster/tcp.c3
-rw-r--r--fs/ocfs2/dir.c75
-rw-r--r--fs/ocfs2/dlm/dlmast.c8
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c28
-rw-r--r--fs/ocfs2/dlm/dlmlock.c6
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c30
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c27
-rw-r--r--fs/ocfs2/dlm/dlmthread.c16
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c3
-rw-r--r--fs/ocfs2/dlmglue.c3
-rw-r--r--fs/ocfs2/file.c236
-rw-r--r--fs/ocfs2/inode.c45
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/journal.c26
-rw-r--r--fs/ocfs2/journal.h15
-rw-r--r--fs/ocfs2/localalloc.c275
-rw-r--r--fs/ocfs2/localalloc.h3
-rw-r--r--fs/ocfs2/mmap.c48
-rw-r--r--fs/ocfs2/namei.c100
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/ocfs2_fs.h144
-rw-r--r--fs/ocfs2/quota.h12
-rw-r--r--fs/ocfs2/quota_global.c351
-rw-r--r--fs/ocfs2/quota_local.c183
-rw-r--r--fs/ocfs2/refcounttree.c74
-rw-r--r--fs/ocfs2/refcounttree.h4
-rw-r--r--fs/ocfs2/reservations.c847
-rw-r--r--fs/ocfs2/reservations.h159
-rw-r--r--fs/ocfs2/resize.c19
-rw-r--r--fs/ocfs2/suballoc.c688
-rw-r--r--fs/ocfs2/suballoc.h21
-rw-r--r--fs/ocfs2/super.c92
-rw-r--r--fs/ocfs2/super.h7
-rw-r--r--fs/ocfs2/xattr.c115
-rw-r--r--fs/ocfs2/xattr.h12
-rw-r--r--fs/omfs/inode.c5
-rw-r--r--fs/open.c166
-rw-r--r--fs/partitions/acorn.c68
-rw-r--r--fs/partitions/acorn.h10
-rw-r--r--fs/partitions/amiga.c13
-rw-r--r--fs/partitions/amiga.h2
-rw-r--r--fs/partitions/atari.c8
-rw-r--r--fs/partitions/atari.h2
-rw-r--r--fs/partitions/check.c84
-rw-r--r--fs/partitions/check.h12
-rw-r--r--fs/partitions/efi.c93
-rw-r--r--fs/partitions/efi.h2
-rw-r--r--fs/partitions/ibm.c21
-rw-r--r--fs/partitions/ibm.h2
-rw-r--r--fs/partitions/karma.c4
-rw-r--r--fs/partitions/karma.h2
-rw-r--r--fs/partitions/ldm.c89
-rw-r--r--fs/partitions/ldm.h2
-rw-r--r--fs/partitions/mac.c13
-rw-r--r--fs/partitions/mac.h2
-rw-r--r--fs/partitions/msdos.c87
-rw-r--r--fs/partitions/msdos.h2
-rw-r--r--fs/partitions/osf.c4
-rw-r--r--fs/partitions/osf.h2
-rw-r--r--fs/partitions/sgi.c6
-rw-r--r--fs/partitions/sgi.h2
-rw-r--r--fs/partitions/sun.c6
-rw-r--r--fs/partitions/sun.h2
-rw-r--r--fs/partitions/sysv68.c6
-rw-r--r--fs/partitions/sysv68.h2
-rw-r--r--fs/partitions/ultrix.c4
-rw-r--r--fs/partitions/ultrix.h2
-rw-r--r--fs/pipe.c122
-rw-r--r--fs/proc/base.c10
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/kcore.c1
-rw-r--r--fs/proc/kmsg.c1
-rw-r--r--fs/proc/vmcore.c1
-rw-r--r--fs/quota/dquot.c275
-rw-r--r--fs/quota/quota.c95
-rw-r--r--fs/quota/quota_tree.c50
-rw-r--r--fs/quota/quota_tree.h6
-rw-r--r--fs/quota/quota_v1.c4
-rw-r--r--fs/quota/quota_v2.c6
-rw-r--r--fs/ramfs/inode.c22
-rw-r--r--fs/reiserfs/file.c3
-rw-r--r--fs/reiserfs/inode.c3
-rw-r--r--fs/reiserfs/namei.c18
-rw-r--r--fs/reiserfs/xattr.c16
-rw-r--r--fs/reiserfs/xattr_acl.c4
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/smbfs/dir.c2
-rw-r--r--fs/smbfs/file.c2
-rw-r--r--fs/smbfs/ioctl.c10
-rw-r--r--fs/smbfs/proto.h2
-rw-r--r--fs/splice.c151
-rw-r--r--fs/statfs.c196
-rw-r--r--fs/super.c321
-rw-r--r--fs/sync.c88
-rw-r--r--fs/sysfs/bin.c26
-rw-r--r--fs/sysfs/dir.c114
-rw-r--r--fs/sysfs/file.c17
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/sysfs/inode.c6
-rw-r--r--fs/sysfs/mount.c95
-rw-r--r--fs/sysfs/symlink.c36
-rw-r--r--fs/sysfs/sysfs.h34
-rw-r--r--fs/sysv/ialloc.c11
-rw-r--r--fs/timerfd.c25
-rw-r--r--fs/ubifs/dir.c9
-rw-r--r--fs/ubifs/io.c1
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/file.c45
-rw-r--r--fs/udf/ialloc.c11
-rw-r--r--fs/udf/namei.c10
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/ufs/ialloc.c10
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/ufs/namei.c2
-rw-r--r--fs/ufs/symlink.c8
-rw-r--r--fs/ufs/truncate.c10
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/xattr.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c8
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c10
-rw-r--r--fs/xfs/xfs_acl.h4
394 files changed, 10062 insertions, 7267 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index ed835836e0dc..32ef4009d030 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -40,7 +40,9 @@
40extern struct file_system_type v9fs_fs_type; 40extern struct file_system_type v9fs_fs_type;
41extern const struct address_space_operations v9fs_addr_operations; 41extern const struct address_space_operations v9fs_addr_operations;
42extern const struct file_operations v9fs_file_operations; 42extern const struct file_operations v9fs_file_operations;
43extern const struct file_operations v9fs_file_operations_dotl;
43extern const struct file_operations v9fs_dir_operations; 44extern const struct file_operations v9fs_dir_operations;
45extern const struct file_operations v9fs_dir_operations_dotl;
44extern const struct dentry_operations v9fs_dentry_operations; 46extern const struct dentry_operations v9fs_dentry_operations;
45extern const struct dentry_operations v9fs_cached_dentry_operations; 47extern const struct dentry_operations v9fs_cached_dentry_operations;
46 48
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 0adfd64dfcee..d61e3b28ce37 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -203,3 +203,11 @@ const struct file_operations v9fs_dir_operations = {
203 .open = v9fs_file_open, 203 .open = v9fs_file_open,
204 .release = v9fs_dir_release, 204 .release = v9fs_dir_release,
205}; 205};
206
207const struct file_operations v9fs_dir_operations_dotl = {
208 .read = generic_read_dir,
209 .llseek = generic_file_llseek,
210 .readdir = v9fs_dir_readdir,
211 .open = v9fs_file_open,
212 .release = v9fs_dir_release,
213};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index df52d488d2a6..25b300e1c9d7 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -296,3 +296,14 @@ const struct file_operations v9fs_file_operations = {
296 .mmap = generic_file_readonly_mmap, 296 .mmap = generic_file_readonly_mmap,
297 .fsync = v9fs_file_fsync, 297 .fsync = v9fs_file_fsync,
298}; 298};
299
300const struct file_operations v9fs_file_operations_dotl = {
301 .llseek = generic_file_llseek,
302 .read = v9fs_file_read,
303 .write = v9fs_file_write,
304 .open = v9fs_file_open,
305 .release = v9fs_dir_release,
306 .lock = v9fs_file_lock,
307 .mmap = generic_file_readonly_mmap,
308 .fsync = v9fs_file_fsync,
309};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f2434fc9d2c4..4331b3b5ee1c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -44,9 +44,12 @@
44#include "cache.h" 44#include "cache.h"
45 45
46static const struct inode_operations v9fs_dir_inode_operations; 46static const struct inode_operations v9fs_dir_inode_operations;
47static const struct inode_operations v9fs_dir_inode_operations_ext; 47static const struct inode_operations v9fs_dir_inode_operations_dotu;
48static const struct inode_operations v9fs_dir_inode_operations_dotl;
48static const struct inode_operations v9fs_file_inode_operations; 49static const struct inode_operations v9fs_file_inode_operations;
50static const struct inode_operations v9fs_file_inode_operations_dotl;
49static const struct inode_operations v9fs_symlink_inode_operations; 51static const struct inode_operations v9fs_symlink_inode_operations;
52static const struct inode_operations v9fs_symlink_inode_operations_dotl;
50 53
51/** 54/**
52 * unixmode2p9mode - convert unix mode bits to plan 9 55 * unixmode2p9mode - convert unix mode bits to plan 9
@@ -253,9 +256,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
253 return ERR_PTR(-ENOMEM); 256 return ERR_PTR(-ENOMEM);
254 } 257 }
255 258
256 inode->i_mode = mode; 259 inode_init_owner(inode, NULL, mode);
257 inode->i_uid = current_fsuid();
258 inode->i_gid = current_fsgid();
259 inode->i_blocks = 0; 260 inode->i_blocks = 0;
260 inode->i_rdev = 0; 261 inode->i_rdev = 0;
261 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 262 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -275,25 +276,44 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
275 init_special_inode(inode, inode->i_mode, inode->i_rdev); 276 init_special_inode(inode, inode->i_mode, inode->i_rdev);
276 break; 277 break;
277 case S_IFREG: 278 case S_IFREG:
278 inode->i_op = &v9fs_file_inode_operations; 279 if (v9fs_proto_dotl(v9ses)) {
279 inode->i_fop = &v9fs_file_operations; 280 inode->i_op = &v9fs_file_inode_operations_dotl;
281 inode->i_fop = &v9fs_file_operations_dotl;
282 } else {
283 inode->i_op = &v9fs_file_inode_operations;
284 inode->i_fop = &v9fs_file_operations;
285 }
286
280 break; 287 break;
288
281 case S_IFLNK: 289 case S_IFLNK:
282 if (!v9fs_proto_dotu(v9ses)) { 290 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
283 P9_DPRINTK(P9_DEBUG_ERROR, 291 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with "
284 "extended modes used w/o 9P2000.u\n"); 292 "legacy protocol.\n");
285 err = -EINVAL; 293 err = -EINVAL;
286 goto error; 294 goto error;
287 } 295 }
288 inode->i_op = &v9fs_symlink_inode_operations; 296
297 if (v9fs_proto_dotl(v9ses))
298 inode->i_op = &v9fs_symlink_inode_operations_dotl;
299 else
300 inode->i_op = &v9fs_symlink_inode_operations;
301
289 break; 302 break;
290 case S_IFDIR: 303 case S_IFDIR:
291 inc_nlink(inode); 304 inc_nlink(inode);
292 if (v9fs_proto_dotu(v9ses)) 305 if (v9fs_proto_dotl(v9ses))
293 inode->i_op = &v9fs_dir_inode_operations_ext; 306 inode->i_op = &v9fs_dir_inode_operations_dotl;
307 else if (v9fs_proto_dotu(v9ses))
308 inode->i_op = &v9fs_dir_inode_operations_dotu;
294 else 309 else
295 inode->i_op = &v9fs_dir_inode_operations; 310 inode->i_op = &v9fs_dir_inode_operations;
296 inode->i_fop = &v9fs_dir_operations; 311
312 if (v9fs_proto_dotl(v9ses))
313 inode->i_fop = &v9fs_dir_operations_dotl;
314 else
315 inode->i_fop = &v9fs_dir_operations;
316
297 break; 317 break;
298 default: 318 default:
299 P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", 319 P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
@@ -434,14 +454,12 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
434{ 454{
435 int retval; 455 int retval;
436 struct inode *file_inode; 456 struct inode *file_inode;
437 struct v9fs_session_info *v9ses;
438 struct p9_fid *v9fid; 457 struct p9_fid *v9fid;
439 458
440 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, 459 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
441 rmdir); 460 rmdir);
442 461
443 file_inode = file->d_inode; 462 file_inode = file->d_inode;
444 v9ses = v9fs_inode2v9ses(file_inode);
445 v9fid = v9fs_fid_clone(file); 463 v9fid = v9fs_fid_clone(file);
446 if (IS_ERR(v9fid)) 464 if (IS_ERR(v9fid))
447 return PTR_ERR(v9fid); 465 return PTR_ERR(v9fid);
@@ -484,12 +502,11 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
484 ofid = NULL; 502 ofid = NULL;
485 fid = NULL; 503 fid = NULL;
486 name = (char *) dentry->d_name.name; 504 name = (char *) dentry->d_name.name;
487 dfid = v9fs_fid_clone(dentry->d_parent); 505 dfid = v9fs_fid_lookup(dentry->d_parent);
488 if (IS_ERR(dfid)) { 506 if (IS_ERR(dfid)) {
489 err = PTR_ERR(dfid); 507 err = PTR_ERR(dfid);
490 P9_DPRINTK(P9_DEBUG_VFS, "fid clone failed %d\n", err); 508 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
491 dfid = NULL; 509 return ERR_PTR(err);
492 goto error;
493 } 510 }
494 511
495 /* clone a fid to use for creation */ 512 /* clone a fid to use for creation */
@@ -497,8 +514,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
497 if (IS_ERR(ofid)) { 514 if (IS_ERR(ofid)) {
498 err = PTR_ERR(ofid); 515 err = PTR_ERR(ofid);
499 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); 516 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
500 ofid = NULL; 517 return ERR_PTR(err);
501 goto error;
502 } 518 }
503 519
504 err = p9_client_fcreate(ofid, name, perm, mode, extension); 520 err = p9_client_fcreate(ofid, name, perm, mode, extension);
@@ -508,14 +524,13 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
508 } 524 }
509 525
510 /* now walk from the parent so we can get unopened fid */ 526 /* now walk from the parent so we can get unopened fid */
511 fid = p9_client_walk(dfid, 1, &name, 0); 527 fid = p9_client_walk(dfid, 1, &name, 1);
512 if (IS_ERR(fid)) { 528 if (IS_ERR(fid)) {
513 err = PTR_ERR(fid); 529 err = PTR_ERR(fid);
514 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); 530 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
515 fid = NULL; 531 fid = NULL;
516 goto error; 532 goto error;
517 } else 533 }
518 dfid = NULL;
519 534
520 /* instantiate inode and assign the unopened fid to the dentry */ 535 /* instantiate inode and assign the unopened fid to the dentry */
521 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 536 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
@@ -538,9 +553,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
538 return ofid; 553 return ofid;
539 554
540error: 555error:
541 if (dfid)
542 p9_client_clunk(dfid);
543
544 if (ofid) 556 if (ofid)
545 p9_client_clunk(ofid); 557 p9_client_clunk(ofid);
546 558
@@ -675,8 +687,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
675 if (IS_ERR(fid)) { 687 if (IS_ERR(fid)) {
676 result = PTR_ERR(fid); 688 result = PTR_ERR(fid);
677 if (result == -ENOENT) { 689 if (result == -ENOENT) {
678 d_add(dentry, NULL); 690 inode = NULL;
679 return NULL; 691 goto inst_out;
680 } 692 }
681 693
682 return ERR_PTR(result); 694 return ERR_PTR(result);
@@ -693,7 +705,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
693 if (result < 0) 705 if (result < 0)
694 goto error; 706 goto error;
695 707
696 if ((fid->qid.version) && (v9ses->cache)) 708inst_out:
709 if (v9ses->cache)
697 dentry->d_op = &v9fs_cached_dentry_operations; 710 dentry->d_op = &v9fs_cached_dentry_operations;
698 else 711 else
699 dentry->d_op = &v9fs_dentry_operations; 712 dentry->d_op = &v9fs_dentry_operations;
@@ -772,6 +785,13 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
772 goto clunk_olddir; 785 goto clunk_olddir;
773 } 786 }
774 787
788 if (v9fs_proto_dotl(v9ses)) {
789 retval = p9_client_rename(oldfid, newdirfid,
790 (char *) new_dentry->d_name.name);
791 if (retval != -ENOSYS)
792 goto clunk_newdir;
793 }
794
775 /* 9P can only handle file rename in the same directory */ 795 /* 9P can only handle file rename in the same directory */
776 if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { 796 if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
777 P9_DPRINTK(P9_DEBUG_ERROR, 797 P9_DPRINTK(P9_DEBUG_ERROR,
@@ -1197,6 +1217,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1197 sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); 1217 sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev));
1198 else if (S_ISFIFO(mode)) 1218 else if (S_ISFIFO(mode))
1199 *name = 0; 1219 *name = 0;
1220 else if (S_ISSOCK(mode))
1221 *name = 0;
1200 else { 1222 else {
1201 __putname(name); 1223 __putname(name);
1202 return -EINVAL; 1224 return -EINVAL;
@@ -1208,7 +1230,21 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1208 return retval; 1230 return retval;
1209} 1231}
1210 1232
1211static const struct inode_operations v9fs_dir_inode_operations_ext = { 1233static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1234 .create = v9fs_vfs_create,
1235 .lookup = v9fs_vfs_lookup,
1236 .symlink = v9fs_vfs_symlink,
1237 .link = v9fs_vfs_link,
1238 .unlink = v9fs_vfs_unlink,
1239 .mkdir = v9fs_vfs_mkdir,
1240 .rmdir = v9fs_vfs_rmdir,
1241 .mknod = v9fs_vfs_mknod,
1242 .rename = v9fs_vfs_rename,
1243 .getattr = v9fs_vfs_getattr,
1244 .setattr = v9fs_vfs_setattr,
1245};
1246
1247static const struct inode_operations v9fs_dir_inode_operations_dotl = {
1212 .create = v9fs_vfs_create, 1248 .create = v9fs_vfs_create,
1213 .lookup = v9fs_vfs_lookup, 1249 .lookup = v9fs_vfs_lookup,
1214 .symlink = v9fs_vfs_symlink, 1250 .symlink = v9fs_vfs_symlink,
@@ -1239,6 +1275,11 @@ static const struct inode_operations v9fs_file_inode_operations = {
1239 .setattr = v9fs_vfs_setattr, 1275 .setattr = v9fs_vfs_setattr,
1240}; 1276};
1241 1277
1278static const struct inode_operations v9fs_file_inode_operations_dotl = {
1279 .getattr = v9fs_vfs_getattr,
1280 .setattr = v9fs_vfs_setattr,
1281};
1282
1242static const struct inode_operations v9fs_symlink_inode_operations = { 1283static const struct inode_operations v9fs_symlink_inode_operations = {
1243 .readlink = generic_readlink, 1284 .readlink = generic_readlink,
1244 .follow_link = v9fs_vfs_follow_link, 1285 .follow_link = v9fs_vfs_follow_link,
@@ -1246,3 +1287,11 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
1246 .getattr = v9fs_vfs_getattr, 1287 .getattr = v9fs_vfs_getattr,
1247 .setattr = v9fs_vfs_setattr, 1288 .setattr = v9fs_vfs_setattr,
1248}; 1289};
1290
1291static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
1292 .readlink = generic_readlink,
1293 .follow_link = v9fs_vfs_follow_link,
1294 .put_link = v9fs_vfs_put_link,
1295 .getattr = v9fs_vfs_getattr,
1296 .setattr = v9fs_vfs_setattr,
1297};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 806da5d3b3a0..be74d020436e 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -38,6 +38,7 @@
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/statfs.h>
41#include <net/9p/9p.h> 42#include <net/9p/9p.h>
42#include <net/9p/client.h> 43#include <net/9p/client.h>
43 44
@@ -45,7 +46,7 @@
45#include "v9fs_vfs.h" 46#include "v9fs_vfs.h"
46#include "fid.h" 47#include "fid.h"
47 48
48static const struct super_operations v9fs_super_ops; 49static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
49 50
50/** 51/**
51 * v9fs_set_super - set the superblock 52 * v9fs_set_super - set the superblock
@@ -76,7 +77,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
76 sb->s_blocksize_bits = fls(v9ses->maxdata - 1); 77 sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
77 sb->s_blocksize = 1 << sb->s_blocksize_bits; 78 sb->s_blocksize = 1 << sb->s_blocksize_bits;
78 sb->s_magic = V9FS_MAGIC; 79 sb->s_magic = V9FS_MAGIC;
79 sb->s_op = &v9fs_super_ops; 80 if (v9fs_proto_dotl(v9ses))
81 sb->s_op = &v9fs_super_ops_dotl;
82 else
83 sb->s_op = &v9fs_super_ops;
80 sb->s_bdi = &v9ses->bdi; 84 sb->s_bdi = &v9ses->bdi;
81 85
82 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 86 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
@@ -211,6 +215,42 @@ v9fs_umount_begin(struct super_block *sb)
211 v9fs_session_begin_cancel(v9ses); 215 v9fs_session_begin_cancel(v9ses);
212} 216}
213 217
218static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
219{
220 struct v9fs_session_info *v9ses;
221 struct p9_fid *fid;
222 struct p9_rstatfs rs;
223 int res;
224
225 fid = v9fs_fid_lookup(dentry);
226 if (IS_ERR(fid)) {
227 res = PTR_ERR(fid);
228 goto done;
229 }
230
231 v9ses = v9fs_inode2v9ses(dentry->d_inode);
232 if (v9fs_proto_dotl(v9ses)) {
233 res = p9_client_statfs(fid, &rs);
234 if (res == 0) {
235 buf->f_type = rs.type;
236 buf->f_bsize = rs.bsize;
237 buf->f_blocks = rs.blocks;
238 buf->f_bfree = rs.bfree;
239 buf->f_bavail = rs.bavail;
240 buf->f_files = rs.files;
241 buf->f_ffree = rs.ffree;
242 buf->f_fsid.val[0] = rs.fsid & 0xFFFFFFFFUL;
243 buf->f_fsid.val[1] = (rs.fsid >> 32) & 0xFFFFFFFFUL;
244 buf->f_namelen = rs.namelen;
245 }
246 if (res != -ENOSYS)
247 goto done;
248 }
249 res = simple_statfs(dentry, buf);
250done:
251 return res;
252}
253
214static const struct super_operations v9fs_super_ops = { 254static const struct super_operations v9fs_super_ops = {
215#ifdef CONFIG_9P_FSCACHE 255#ifdef CONFIG_9P_FSCACHE
216 .alloc_inode = v9fs_alloc_inode, 256 .alloc_inode = v9fs_alloc_inode,
@@ -222,6 +262,17 @@ static const struct super_operations v9fs_super_ops = {
222 .umount_begin = v9fs_umount_begin, 262 .umount_begin = v9fs_umount_begin,
223}; 263};
224 264
265static const struct super_operations v9fs_super_ops_dotl = {
266#ifdef CONFIG_9P_FSCACHE
267 .alloc_inode = v9fs_alloc_inode,
268 .destroy_inode = v9fs_destroy_inode,
269#endif
270 .statfs = v9fs_statfs,
271 .clear_inode = v9fs_clear_inode,
272 .show_options = generic_show_options,
273 .umount_begin = v9fs_umount_begin,
274};
275
225struct file_system_type v9fs_fs_type = { 276struct file_system_type v9fs_fs_type = {
226 .name = "9p", 277 .name = "9p",
227 .get_sb = v9fs_get_sb, 278 .get_sb = v9fs_get_sb,
diff --git a/fs/Makefile b/fs/Makefile
index 97f340f14ba2..e6ec1d309b1d 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 pnode.o drop_caches.o splice.o sync.o utimes.o \ 13 pnode.o drop_caches.o splice.o sync.o utimes.o \
14 stack.o fs_struct.o 14 stack.o fs_struct.o statfs.o
15 15
16ifeq ($(CONFIG_BLOCK),y) 16ifeq ($(CONFIG_BLOCK),y)
17obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o 17obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index adc1cb771b57..b42d5cc1d6d2 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -189,13 +189,9 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
189 struct key *key) 189 struct key *key)
190{ 190{
191 struct page *page; 191 struct page *page;
192 struct file file = {
193 .private_data = key,
194 };
195
196 _enter("{%lu},%lu", dir->i_ino, index); 192 _enter("{%lu},%lu", dir->i_ino, index);
197 193
198 page = read_mapping_page(dir->i_mapping, index, &file); 194 page = read_cache_page(dir->i_mapping, index, afs_page_filler, key);
199 if (!IS_ERR(page)) { 195 if (!IS_ERR(page)) {
200 kmap(page); 196 kmap(page);
201 if (!PageChecked(page)) 197 if (!PageChecked(page))
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 0df9bc2b724d..14d89fa58fee 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -121,34 +121,19 @@ static void afs_file_readpage_read_complete(struct page *page,
121#endif 121#endif
122 122
123/* 123/*
124 * AFS read page from file, directory or symlink 124 * read page from file, directory or symlink, given a key to use
125 */ 125 */
126static int afs_readpage(struct file *file, struct page *page) 126int afs_page_filler(void *data, struct page *page)
127{ 127{
128 struct afs_vnode *vnode; 128 struct inode *inode = page->mapping->host;
129 struct inode *inode; 129 struct afs_vnode *vnode = AFS_FS_I(inode);
130 struct key *key; 130 struct key *key = data;
131 size_t len; 131 size_t len;
132 off_t offset; 132 off_t offset;
133 int ret; 133 int ret;
134 134
135 inode = page->mapping->host;
136
137 if (file) {
138 key = file->private_data;
139 ASSERT(key != NULL);
140 } else {
141 key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
142 if (IS_ERR(key)) {
143 ret = PTR_ERR(key);
144 goto error_nokey;
145 }
146 }
147
148 _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); 135 _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index);
149 136
150 vnode = AFS_FS_I(inode);
151
152 BUG_ON(!PageLocked(page)); 137 BUG_ON(!PageLocked(page));
153 138
154 ret = -ESTALE; 139 ret = -ESTALE;
@@ -214,31 +199,56 @@ static int afs_readpage(struct file *file, struct page *page)
214 unlock_page(page); 199 unlock_page(page);
215 } 200 }
216 201
217 if (!file)
218 key_put(key);
219 _leave(" = 0"); 202 _leave(" = 0");
220 return 0; 203 return 0;
221 204
222error: 205error:
223 SetPageError(page); 206 SetPageError(page);
224 unlock_page(page); 207 unlock_page(page);
225 if (!file)
226 key_put(key);
227error_nokey:
228 _leave(" = %d", ret); 208 _leave(" = %d", ret);
229 return ret; 209 return ret;
230} 210}
231 211
232/* 212/*
213 * read page from file, directory or symlink, given a file to nominate the key
214 * to be used
215 */
216static int afs_readpage(struct file *file, struct page *page)
217{
218 struct key *key;
219 int ret;
220
221 if (file) {
222 key = file->private_data;
223 ASSERT(key != NULL);
224 ret = afs_page_filler(key, page);
225 } else {
226 struct inode *inode = page->mapping->host;
227 key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
228 if (IS_ERR(key)) {
229 ret = PTR_ERR(key);
230 } else {
231 ret = afs_page_filler(key, page);
232 key_put(key);
233 }
234 }
235 return ret;
236}
237
238/*
233 * read a set of pages 239 * read a set of pages
234 */ 240 */
235static int afs_readpages(struct file *file, struct address_space *mapping, 241static int afs_readpages(struct file *file, struct address_space *mapping,
236 struct list_head *pages, unsigned nr_pages) 242 struct list_head *pages, unsigned nr_pages)
237{ 243{
244 struct key *key = file->private_data;
238 struct afs_vnode *vnode; 245 struct afs_vnode *vnode;
239 int ret = 0; 246 int ret = 0;
240 247
241 _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages); 248 _enter("{%d},{%lu},,%d",
249 key_serial(key), mapping->host->i_ino, nr_pages);
250
251 ASSERT(key != NULL);
242 252
243 vnode = AFS_FS_I(mapping->host); 253 vnode = AFS_FS_I(mapping->host);
244 if (vnode->flags & AFS_VNODE_DELETED) { 254 if (vnode->flags & AFS_VNODE_DELETED) {
@@ -279,7 +289,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
279 } 289 }
280 290
281 /* load the missing pages from the network */ 291 /* load the missing pages from the network */
282 ret = read_cache_pages(mapping, pages, (void *) afs_readpage, file); 292 ret = read_cache_pages(mapping, pages, afs_page_filler, key);
283 293
284 _leave(" = %d [netting]", ret); 294 _leave(" = %d [netting]", ret);
285 return ret; 295 return ret;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index a10f2582844f..807f284cc75e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -494,6 +494,7 @@ extern const struct file_operations afs_file_operations;
494 494
495extern int afs_open(struct inode *, struct file *); 495extern int afs_open(struct inode *, struct file *);
496extern int afs_release(struct inode *, struct file *); 496extern int afs_release(struct inode *, struct file *);
497extern int afs_page_filler(void *, struct page *);
497 498
498/* 499/*
499 * flock.c 500 * flock.c
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index b3feddc4f7d6..a9e23039ea34 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -49,9 +49,6 @@ static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
49 */ 49 */
50int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) 50int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
51{ 51{
52 struct file file = {
53 .private_data = key,
54 };
55 struct page *page; 52 struct page *page;
56 size_t size; 53 size_t size;
57 char *buf; 54 char *buf;
@@ -61,7 +58,8 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
61 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); 58 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
62 59
63 /* read the contents of the symlink into the pagecache */ 60 /* read the contents of the symlink into the pagecache */
64 page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file); 61 page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
62 afs_page_filler, key);
65 if (IS_ERR(page)) { 63 if (IS_ERR(page)) {
66 ret = PTR_ERR(page); 64 ret = PTR_ERR(page);
67 goto out; 65 goto out;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e4b75d6eda83..9bd4b3876c99 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -205,7 +205,7 @@ static struct inode *anon_inode_mkinode(void)
205 * that it already _is_ on the dirty list. 205 * that it already _is_ on the dirty list.
206 */ 206 */
207 inode->i_state = I_DIRTY; 207 inode->i_state = I_DIRTY;
208 inode->i_mode = S_IRUSR | S_IWUSR; 208 inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
209 inode->i_uid = current_fsuid(); 209 inode->i_uid = current_fsuid();
210 inode->i_gid = current_fsgid(); 210 inode->i_gid = current_fsgid();
211 inode->i_flags |= S_PRIVATE; 211 inode->i_flags |= S_PRIVATE;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e8e5e63ac950..db4117ed7803 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -18,13 +18,14 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/param.h> 19#include <linux/param.h>
20#include <linux/time.h> 20#include <linux/time.h>
21#include <linux/smp_lock.h>
21#include "autofs_i.h" 22#include "autofs_i.h"
22 23
23static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 24static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
24static int autofs4_dir_unlink(struct inode *,struct dentry *); 25static int autofs4_dir_unlink(struct inode *,struct dentry *);
25static int autofs4_dir_rmdir(struct inode *,struct dentry *); 26static int autofs4_dir_rmdir(struct inode *,struct dentry *);
26static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); 27static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
27static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); 28static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
28static int autofs4_dir_open(struct inode *inode, struct file *file); 29static int autofs4_dir_open(struct inode *inode, struct file *file);
29static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); 30static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
30static void *autofs4_follow_link(struct dentry *, struct nameidata *); 31static void *autofs4_follow_link(struct dentry *, struct nameidata *);
@@ -38,7 +39,7 @@ const struct file_operations autofs4_root_operations = {
38 .read = generic_read_dir, 39 .read = generic_read_dir,
39 .readdir = dcache_readdir, 40 .readdir = dcache_readdir,
40 .llseek = dcache_dir_lseek, 41 .llseek = dcache_dir_lseek,
41 .ioctl = autofs4_root_ioctl, 42 .unlocked_ioctl = autofs4_root_ioctl,
42}; 43};
43 44
44const struct file_operations autofs4_dir_operations = { 45const struct file_operations autofs4_dir_operations = {
@@ -902,8 +903,8 @@ int is_autofs4_dentry(struct dentry *dentry)
902 * ioctl()'s on the root directory is the chief method for the daemon to 903 * ioctl()'s on the root directory is the chief method for the daemon to
903 * generate kernel reactions 904 * generate kernel reactions
904 */ 905 */
905static int autofs4_root_ioctl(struct inode *inode, struct file *filp, 906static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
906 unsigned int cmd, unsigned long arg) 907 unsigned int cmd, unsigned long arg)
907{ 908{
908 struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); 909 struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb);
909 void __user *p = (void __user *)arg; 910 void __user *p = (void __user *)arg;
@@ -947,3 +948,16 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
947 return -ENOSYS; 948 return -ENOSYS;
948 } 949 }
949} 950}
951
952static long autofs4_root_ioctl(struct file *filp,
953 unsigned int cmd, unsigned long arg)
954{
955 long ret;
956 struct inode *inode = filp->f_dentry->d_inode;
957
958 lock_kernel();
959 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
960 unlock_kernel();
961
962 return ret;
963}
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 1e41aadb1068..8f73841fc974 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -105,14 +105,12 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
105 } 105 }
106 set_bit(ino, info->si_imap); 106 set_bit(ino, info->si_imap);
107 info->si_freei--; 107 info->si_freei--;
108 inode->i_uid = current_fsuid(); 108 inode_init_owner(inode, dir, mode);
109 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid();
110 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 109 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
111 inode->i_blocks = 0; 110 inode->i_blocks = 0;
112 inode->i_op = &bfs_file_inops; 111 inode->i_op = &bfs_file_inops;
113 inode->i_fop = &bfs_file_operations; 112 inode->i_fop = &bfs_file_operations;
114 inode->i_mapping->a_ops = &bfs_aops; 113 inode->i_mapping->a_ops = &bfs_aops;
115 inode->i_mode = mode;
116 inode->i_ino = ino; 114 inode->i_ino = ino;
117 BFS_I(inode)->i_dsk_ino = ino; 115 BFS_I(inode)->i_dsk_ino = ino;
118 BFS_I(inode)->i_sblock = 0; 116 BFS_I(inode)->i_sblock = 0;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6dcee88c2e5d..26e5f5026620 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -245,37 +245,14 @@ struct super_block *freeze_bdev(struct block_device *bdev)
245 sb = get_active_super(bdev); 245 sb = get_active_super(bdev);
246 if (!sb) 246 if (!sb)
247 goto out; 247 goto out;
248 if (sb->s_flags & MS_RDONLY) { 248 error = freeze_super(sb);
249 sb->s_frozen = SB_FREEZE_TRANS; 249 if (error) {
250 up_write(&sb->s_umount); 250 deactivate_super(sb);
251 bdev->bd_fsfreeze_count--;
251 mutex_unlock(&bdev->bd_fsfreeze_mutex); 252 mutex_unlock(&bdev->bd_fsfreeze_mutex);
252 return sb; 253 return ERR_PTR(error);
253 }
254
255 sb->s_frozen = SB_FREEZE_WRITE;
256 smp_wmb();
257
258 sync_filesystem(sb);
259
260 sb->s_frozen = SB_FREEZE_TRANS;
261 smp_wmb();
262
263 sync_blockdev(sb->s_bdev);
264
265 if (sb->s_op->freeze_fs) {
266 error = sb->s_op->freeze_fs(sb);
267 if (error) {
268 printk(KERN_ERR
269 "VFS:Filesystem freeze failed\n");
270 sb->s_frozen = SB_UNFROZEN;
271 deactivate_locked_super(sb);
272 bdev->bd_fsfreeze_count--;
273 mutex_unlock(&bdev->bd_fsfreeze_mutex);
274 return ERR_PTR(error);
275 }
276 } 254 }
277 up_write(&sb->s_umount); 255 deactivate_super(sb);
278
279 out: 256 out:
280 sync_blockdev(bdev); 257 sync_blockdev(bdev);
281 mutex_unlock(&bdev->bd_fsfreeze_mutex); 258 mutex_unlock(&bdev->bd_fsfreeze_mutex);
@@ -296,40 +273,22 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
296 273
297 mutex_lock(&bdev->bd_fsfreeze_mutex); 274 mutex_lock(&bdev->bd_fsfreeze_mutex);
298 if (!bdev->bd_fsfreeze_count) 275 if (!bdev->bd_fsfreeze_count)
299 goto out_unlock; 276 goto out;
300 277
301 error = 0; 278 error = 0;
302 if (--bdev->bd_fsfreeze_count > 0) 279 if (--bdev->bd_fsfreeze_count > 0)
303 goto out_unlock; 280 goto out;
304 281
305 if (!sb) 282 if (!sb)
306 goto out_unlock; 283 goto out;
307
308 BUG_ON(sb->s_bdev != bdev);
309 down_write(&sb->s_umount);
310 if (sb->s_flags & MS_RDONLY)
311 goto out_unfrozen;
312
313 if (sb->s_op->unfreeze_fs) {
314 error = sb->s_op->unfreeze_fs(sb);
315 if (error) {
316 printk(KERN_ERR
317 "VFS:Filesystem thaw failed\n");
318 sb->s_frozen = SB_FREEZE_TRANS;
319 bdev->bd_fsfreeze_count++;
320 mutex_unlock(&bdev->bd_fsfreeze_mutex);
321 return error;
322 }
323 }
324
325out_unfrozen:
326 sb->s_frozen = SB_UNFROZEN;
327 smp_wmb();
328 wake_up(&sb->s_wait_unfrozen);
329 284
330 if (sb) 285 error = thaw_super(sb);
331 deactivate_locked_super(sb); 286 if (error) {
332out_unlock: 287 bdev->bd_fsfreeze_count++;
288 mutex_unlock(&bdev->bd_fsfreeze_mutex);
289 return error;
290 }
291out:
333 mutex_unlock(&bdev->bd_fsfreeze_mutex); 292 mutex_unlock(&bdev->bd_fsfreeze_mutex);
334 return 0; 293 return 0;
335} 294}
@@ -417,7 +376,7 @@ int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
417 */ 376 */
418 mutex_unlock(&bd_inode->i_mutex); 377 mutex_unlock(&bd_inode->i_mutex);
419 378
420 error = blkdev_issue_flush(bdev, NULL); 379 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT);
421 if (error == -EOPNOTSUPP) 380 if (error == -EOPNOTSUPP)
422 error = 0; 381 error = 0;
423 382
@@ -668,41 +627,209 @@ void bd_forget(struct inode *inode)
668 iput(bdev->bd_inode); 627 iput(bdev->bd_inode);
669} 628}
670 629
671int bd_claim(struct block_device *bdev, void *holder) 630/**
631 * bd_may_claim - test whether a block device can be claimed
632 * @bdev: block device of interest
633 * @whole: whole block device containing @bdev, may equal @bdev
634 * @holder: holder trying to claim @bdev
635 *
636 * Test whther @bdev can be claimed by @holder.
637 *
638 * CONTEXT:
639 * spin_lock(&bdev_lock).
640 *
641 * RETURNS:
642 * %true if @bdev can be claimed, %false otherwise.
643 */
644static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
645 void *holder)
672{ 646{
673 int res;
674 spin_lock(&bdev_lock);
675
676 /* first decide result */
677 if (bdev->bd_holder == holder) 647 if (bdev->bd_holder == holder)
678 res = 0; /* already a holder */ 648 return true; /* already a holder */
679 else if (bdev->bd_holder != NULL) 649 else if (bdev->bd_holder != NULL)
680 res = -EBUSY; /* held by someone else */ 650 return false; /* held by someone else */
681 else if (bdev->bd_contains == bdev) 651 else if (bdev->bd_contains == bdev)
682 res = 0; /* is a whole device which isn't held */ 652 return true; /* is a whole device which isn't held */
683 653
684 else if (bdev->bd_contains->bd_holder == bd_claim) 654 else if (whole->bd_holder == bd_claim)
685 res = 0; /* is a partition of a device that is being partitioned */ 655 return true; /* is a partition of a device that is being partitioned */
686 else if (bdev->bd_contains->bd_holder != NULL) 656 else if (whole->bd_holder != NULL)
687 res = -EBUSY; /* is a partition of a held device */ 657 return false; /* is a partition of a held device */
688 else 658 else
689 res = 0; /* is a partition of an un-held device */ 659 return true; /* is a partition of an un-held device */
660}
661
662/**
663 * bd_prepare_to_claim - prepare to claim a block device
664 * @bdev: block device of interest
665 * @whole: the whole device containing @bdev, may equal @bdev
666 * @holder: holder trying to claim @bdev
667 *
668 * Prepare to claim @bdev. This function fails if @bdev is already
669 * claimed by another holder and waits if another claiming is in
670 * progress. This function doesn't actually claim. On successful
671 * return, the caller has ownership of bd_claiming and bd_holder[s].
672 *
673 * CONTEXT:
674 * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab
675 * it multiple times.
676 *
677 * RETURNS:
678 * 0 if @bdev can be claimed, -EBUSY otherwise.
679 */
680static int bd_prepare_to_claim(struct block_device *bdev,
681 struct block_device *whole, void *holder)
682{
683retry:
684 /* if someone else claimed, fail */
685 if (!bd_may_claim(bdev, whole, holder))
686 return -EBUSY;
687
688 /* if someone else is claiming, wait for it to finish */
689 if (whole->bd_claiming && whole->bd_claiming != holder) {
690 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
691 DEFINE_WAIT(wait);
692
693 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
694 spin_unlock(&bdev_lock);
695 schedule();
696 finish_wait(wq, &wait);
697 spin_lock(&bdev_lock);
698 goto retry;
699 }
700
701 /* yay, all mine */
702 return 0;
703}
690 704
691 /* now impose change */ 705/**
692 if (res==0) { 706 * bd_start_claiming - start claiming a block device
707 * @bdev: block device of interest
708 * @holder: holder trying to claim @bdev
709 *
710 * @bdev is about to be opened exclusively. Check @bdev can be opened
711 * exclusively and mark that an exclusive open is in progress. Each
712 * successful call to this function must be matched with a call to
713 * either bd_claim() or bd_abort_claiming(). If this function
714 * succeeds, the matching bd_claim() is guaranteed to succeed.
715 *
716 * CONTEXT:
717 * Might sleep.
718 *
719 * RETURNS:
720 * Pointer to the block device containing @bdev on success, ERR_PTR()
721 * value on failure.
722 */
723static struct block_device *bd_start_claiming(struct block_device *bdev,
724 void *holder)
725{
726 struct gendisk *disk;
727 struct block_device *whole;
728 int partno, err;
729
730 might_sleep();
731
732 /*
733 * @bdev might not have been initialized properly yet, look up
734 * and grab the outer block device the hard way.
735 */
736 disk = get_gendisk(bdev->bd_dev, &partno);
737 if (!disk)
738 return ERR_PTR(-ENXIO);
739
740 whole = bdget_disk(disk, 0);
741 put_disk(disk);
742 if (!whole)
743 return ERR_PTR(-ENOMEM);
744
745 /* prepare to claim, if successful, mark claiming in progress */
746 spin_lock(&bdev_lock);
747
748 err = bd_prepare_to_claim(bdev, whole, holder);
749 if (err == 0) {
750 whole->bd_claiming = holder;
751 spin_unlock(&bdev_lock);
752 return whole;
753 } else {
754 spin_unlock(&bdev_lock);
755 bdput(whole);
756 return ERR_PTR(err);
757 }
758}
759
760/* releases bdev_lock */
761static void __bd_abort_claiming(struct block_device *whole, void *holder)
762{
763 BUG_ON(whole->bd_claiming != holder);
764 whole->bd_claiming = NULL;
765 wake_up_bit(&whole->bd_claiming, 0);
766
767 spin_unlock(&bdev_lock);
768 bdput(whole);
769}
770
771/**
772 * bd_abort_claiming - abort claiming a block device
773 * @whole: whole block device returned by bd_start_claiming()
774 * @holder: holder trying to claim @bdev
775 *
776 * Abort a claiming block started by bd_start_claiming(). Note that
777 * @whole is not the block device to be claimed but the whole device
778 * returned by bd_start_claiming().
779 *
780 * CONTEXT:
781 * Grabs and releases bdev_lock.
782 */
783static void bd_abort_claiming(struct block_device *whole, void *holder)
784{
785 spin_lock(&bdev_lock);
786 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
787}
788
789/**
790 * bd_claim - claim a block device
791 * @bdev: block device to claim
792 * @holder: holder trying to claim @bdev
793 *
794 * Try to claim @bdev which must have been opened successfully. This
795 * function may be called with or without preceding
796 * blk_start_claiming(). In the former case, this function is always
797 * successful and terminates the claiming block.
798 *
799 * CONTEXT:
800 * Might sleep.
801 *
802 * RETURNS:
803 * 0 if successful, -EBUSY if @bdev is already claimed.
804 */
805int bd_claim(struct block_device *bdev, void *holder)
806{
807 struct block_device *whole = bdev->bd_contains;
808 int res;
809
810 might_sleep();
811
812 spin_lock(&bdev_lock);
813
814 res = bd_prepare_to_claim(bdev, whole, holder);
815 if (res == 0) {
693 /* note that for a whole device bd_holders 816 /* note that for a whole device bd_holders
694 * will be incremented twice, and bd_holder will 817 * will be incremented twice, and bd_holder will
695 * be set to bd_claim before being set to holder 818 * be set to bd_claim before being set to holder
696 */ 819 */
697 bdev->bd_contains->bd_holders ++; 820 whole->bd_holders++;
698 bdev->bd_contains->bd_holder = bd_claim; 821 whole->bd_holder = bd_claim;
699 bdev->bd_holders++; 822 bdev->bd_holders++;
700 bdev->bd_holder = holder; 823 bdev->bd_holder = holder;
701 } 824 }
702 spin_unlock(&bdev_lock); 825
826 if (whole->bd_claiming)
827 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
828 else
829 spin_unlock(&bdev_lock);
830
703 return res; 831 return res;
704} 832}
705
706EXPORT_SYMBOL(bd_claim); 833EXPORT_SYMBOL(bd_claim);
707 834
708void bd_release(struct block_device *bdev) 835void bd_release(struct block_device *bdev)
@@ -1316,6 +1443,7 @@ EXPORT_SYMBOL(blkdev_get);
1316 1443
1317static int blkdev_open(struct inode * inode, struct file * filp) 1444static int blkdev_open(struct inode * inode, struct file * filp)
1318{ 1445{
1446 struct block_device *whole = NULL;
1319 struct block_device *bdev; 1447 struct block_device *bdev;
1320 int res; 1448 int res;
1321 1449
@@ -1338,22 +1466,25 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1338 if (bdev == NULL) 1466 if (bdev == NULL)
1339 return -ENOMEM; 1467 return -ENOMEM;
1340 1468
1469 if (filp->f_mode & FMODE_EXCL) {
1470 whole = bd_start_claiming(bdev, filp);
1471 if (IS_ERR(whole)) {
1472 bdput(bdev);
1473 return PTR_ERR(whole);
1474 }
1475 }
1476
1341 filp->f_mapping = bdev->bd_inode->i_mapping; 1477 filp->f_mapping = bdev->bd_inode->i_mapping;
1342 1478
1343 res = blkdev_get(bdev, filp->f_mode); 1479 res = blkdev_get(bdev, filp->f_mode);
1344 if (res)
1345 return res;
1346 1480
1347 if (filp->f_mode & FMODE_EXCL) { 1481 if (whole) {
1348 res = bd_claim(bdev, filp); 1482 if (res == 0)
1349 if (res) 1483 BUG_ON(bd_claim(bdev, filp) != 0);
1350 goto out_blkdev_put; 1484 else
1485 bd_abort_claiming(whole, filp);
1351 } 1486 }
1352 1487
1353 return 0;
1354
1355 out_blkdev_put:
1356 blkdev_put(bdev, filp->f_mode);
1357 return res; 1488 return res;
1358} 1489}
1359 1490
@@ -1564,27 +1695,34 @@ EXPORT_SYMBOL(lookup_bdev);
1564 */ 1695 */
1565struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) 1696struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1566{ 1697{
1567 struct block_device *bdev; 1698 struct block_device *bdev, *whole;
1568 int error = 0; 1699 int error;
1569 1700
1570 bdev = lookup_bdev(path); 1701 bdev = lookup_bdev(path);
1571 if (IS_ERR(bdev)) 1702 if (IS_ERR(bdev))
1572 return bdev; 1703 return bdev;
1573 1704
1705 whole = bd_start_claiming(bdev, holder);
1706 if (IS_ERR(whole)) {
1707 bdput(bdev);
1708 return whole;
1709 }
1710
1574 error = blkdev_get(bdev, mode); 1711 error = blkdev_get(bdev, mode);
1575 if (error) 1712 if (error)
1576 return ERR_PTR(error); 1713 goto out_abort_claiming;
1714
1577 error = -EACCES; 1715 error = -EACCES;
1578 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) 1716 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1579 goto blkdev_put; 1717 goto out_blkdev_put;
1580 error = bd_claim(bdev, holder);
1581 if (error)
1582 goto blkdev_put;
1583 1718
1719 BUG_ON(bd_claim(bdev, holder) != 0);
1584 return bdev; 1720 return bdev;
1585 1721
1586blkdev_put: 1722out_blkdev_put:
1587 blkdev_put(bdev, mode); 1723 blkdev_put(bdev, mode);
1724out_abort_claiming:
1725 bd_abort_claiming(whole, holder);
1588 return ERR_PTR(error); 1726 return ERR_PTR(error);
1589} 1727}
1590 1728
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 6ef7b26724ec..8d432cd9d580 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -282,14 +282,14 @@ int btrfs_acl_chmod(struct inode *inode)
282 return ret; 282 return ret;
283} 283}
284 284
285struct xattr_handler btrfs_xattr_acl_default_handler = { 285const struct xattr_handler btrfs_xattr_acl_default_handler = {
286 .prefix = POSIX_ACL_XATTR_DEFAULT, 286 .prefix = POSIX_ACL_XATTR_DEFAULT,
287 .flags = ACL_TYPE_DEFAULT, 287 .flags = ACL_TYPE_DEFAULT,
288 .get = btrfs_xattr_acl_get, 288 .get = btrfs_xattr_acl_get,
289 .set = btrfs_xattr_acl_set, 289 .set = btrfs_xattr_acl_set,
290}; 290};
291 291
292struct xattr_handler btrfs_xattr_acl_access_handler = { 292const struct xattr_handler btrfs_xattr_acl_access_handler = {
293 .prefix = POSIX_ACL_XATTR_ACCESS, 293 .prefix = POSIX_ACL_XATTR_ACCESS,
294 .flags = ACL_TYPE_ACCESS, 294 .flags = ACL_TYPE_ACCESS,
295 .get = btrfs_xattr_acl_get, 295 .get = btrfs_xattr_acl_get,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b34d32fdaaec..c6a4f459ad76 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1589,7 +1589,7 @@ static void btrfs_issue_discard(struct block_device *bdev,
1589 u64 start, u64 len) 1589 u64 start, u64 len)
1590{ 1590{
1591 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1591 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1592 DISCARD_FL_BARRIER); 1592 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
1593} 1593}
1594 1594
1595static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1595static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2bfdc641d4e3..d601629b85d1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4121,16 +4121,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4121 if (ret != 0) 4121 if (ret != 0)
4122 goto fail; 4122 goto fail;
4123 4123
4124 inode->i_uid = current_fsuid(); 4124 inode_init_owner(inode, dir, mode);
4125
4126 if (dir && (dir->i_mode & S_ISGID)) {
4127 inode->i_gid = dir->i_gid;
4128 if (S_ISDIR(mode))
4129 mode |= S_ISGID;
4130 } else
4131 inode->i_gid = current_fsgid();
4132
4133 inode->i_mode = mode;
4134 inode->i_ino = objectid; 4125 inode->i_ino = objectid;
4135 inode_set_bytes(inode, 0); 4126 inode_set_bytes(inode, 0);
4136 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 4127 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 193b58f7d3f3..59acd3eb288a 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -282,7 +282,7 @@ err:
282 * List of handlers for synthetic system.* attributes. All real ondisk 282 * List of handlers for synthetic system.* attributes. All real ondisk
283 * attributes are handled directly. 283 * attributes are handled directly.
284 */ 284 */
285struct xattr_handler *btrfs_xattr_handlers[] = { 285const struct xattr_handler *btrfs_xattr_handlers[] = {
286#ifdef CONFIG_BTRFS_FS_POSIX_ACL 286#ifdef CONFIG_BTRFS_FS_POSIX_ACL
287 &btrfs_xattr_acl_access_handler, 287 &btrfs_xattr_acl_access_handler,
288 &btrfs_xattr_acl_default_handler, 288 &btrfs_xattr_acl_default_handler,
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 721efa0346e0..7a43fd640bbb 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -21,9 +21,9 @@
21 21
22#include <linux/xattr.h> 22#include <linux/xattr.h>
23 23
24extern struct xattr_handler btrfs_xattr_acl_access_handler; 24extern const struct xattr_handler btrfs_xattr_acl_access_handler;
25extern struct xattr_handler btrfs_xattr_acl_default_handler; 25extern const struct xattr_handler btrfs_xattr_acl_default_handler;
26extern struct xattr_handler *btrfs_xattr_handlers[]; 26extern const struct xattr_handler *btrfs_xattr_handlers[];
27 27
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
29 void *buffer, size_t size); 29 void *buffer, size_t size);
diff --git a/fs/buffer.c b/fs/buffer.c
index c9c266db0624..e8aa7081d25c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -275,6 +275,7 @@ void invalidate_bdev(struct block_device *bdev)
275 return; 275 return;
276 276
277 invalidate_bh_lrus(); 277 invalidate_bh_lrus();
278 lru_add_drain_all(); /* make sure all lru add caches are flushed */
278 invalidate_mapping_pages(mapping, 0, -1); 279 invalidate_mapping_pages(mapping, 0, -1);
279} 280}
280EXPORT_SYMBOL(invalidate_bdev); 281EXPORT_SYMBOL(invalidate_bdev);
@@ -560,26 +561,17 @@ repeat:
560 return err; 561 return err;
561} 562}
562 563
563static void do_thaw_all(struct work_struct *work) 564static void do_thaw_one(struct super_block *sb, void *unused)
564{ 565{
565 struct super_block *sb;
566 char b[BDEVNAME_SIZE]; 566 char b[BDEVNAME_SIZE];
567 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
568 printk(KERN_WARNING "Emergency Thaw on %s\n",
569 bdevname(sb->s_bdev, b));
570}
567 571
568 spin_lock(&sb_lock); 572static void do_thaw_all(struct work_struct *work)
569restart: 573{
570 list_for_each_entry(sb, &super_blocks, s_list) { 574 iterate_supers(do_thaw_one, NULL);
571 sb->s_count++;
572 spin_unlock(&sb_lock);
573 down_read(&sb->s_umount);
574 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
575 printk(KERN_WARNING "Emergency Thaw on %s\n",
576 bdevname(sb->s_bdev, b));
577 up_read(&sb->s_umount);
578 spin_lock(&sb_lock);
579 if (__put_super_and_need_restart(sb))
580 goto restart;
581 }
582 spin_unlock(&sb_lock);
583 kfree(work); 575 kfree(work);
584 printk(KERN_WARNING "Emergency Thaw complete\n"); 576 printk(KERN_WARNING "Emergency Thaw complete\n");
585} 577}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index a9005d862ed4..d9c60b84949a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -274,7 +274,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
274 struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; 274 struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
275 int rc = 0; 275 int rc = 0;
276 struct page **pages; 276 struct page **pages;
277 struct pagevec pvec;
278 loff_t offset; 277 loff_t offset;
279 u64 len; 278 u64 len;
280 279
@@ -297,8 +296,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
297 if (rc < 0) 296 if (rc < 0)
298 goto out; 297 goto out;
299 298
300 /* set uptodate and add to lru in pagevec-sized chunks */
301 pagevec_init(&pvec, 0);
302 for (; !list_empty(page_list) && len > 0; 299 for (; !list_empty(page_list) && len > 0;
303 rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { 300 rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) {
304 struct page *page = 301 struct page *page =
@@ -312,7 +309,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
312 zero_user_segment(page, s, PAGE_CACHE_SIZE); 309 zero_user_segment(page, s, PAGE_CACHE_SIZE);
313 } 310 }
314 311
315 if (add_to_page_cache(page, mapping, page->index, GFP_NOFS)) { 312 if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) {
316 page_cache_release(page); 313 page_cache_release(page);
317 dout("readpages %p add_to_page_cache failed %p\n", 314 dout("readpages %p add_to_page_cache failed %p\n",
318 inode, page); 315 inode, page);
@@ -323,10 +320,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
323 flush_dcache_page(page); 320 flush_dcache_page(page);
324 SetPageUptodate(page); 321 SetPageUptodate(page);
325 unlock_page(page); 322 unlock_page(page);
326 if (pagevec_add(&pvec, page) == 0) 323 page_cache_release(page);
327 pagevec_lru_add_file(&pvec); /* add to lru */
328 } 324 }
329 pagevec_lru_add_file(&pvec);
330 rc = 0; 325 rc = 0;
331 326
332out: 327out:
@@ -568,7 +563,7 @@ static void writepages_finish(struct ceph_osd_request *req,
568 ceph_release_pages(req->r_pages, req->r_num_pages); 563 ceph_release_pages(req->r_pages, req->r_num_pages);
569 if (req->r_pages_from_pool) 564 if (req->r_pages_from_pool)
570 mempool_free(req->r_pages, 565 mempool_free(req->r_pages,
571 ceph_client(inode->i_sb)->wb_pagevec_pool); 566 ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
572 else 567 else
573 kfree(req->r_pages); 568 kfree(req->r_pages);
574 ceph_osdc_put_request(req); 569 ceph_osdc_put_request(req);
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index 818afe72e6c7..9f46de2ba7a7 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -150,7 +150,8 @@ int ceph_build_auth_request(struct ceph_auth_client *ac,
150 150
151 ret = ac->ops->build_request(ac, p + sizeof(u32), end); 151 ret = ac->ops->build_request(ac, p + sizeof(u32), end);
152 if (ret < 0) { 152 if (ret < 0) {
153 pr_err("error %d building request\n", ret); 153 pr_err("error %d building auth method %s request\n", ret,
154 ac->ops->name);
154 return ret; 155 return ret;
155 } 156 }
156 dout(" built request %d bytes\n", ret); 157 dout(" built request %d bytes\n", ret);
@@ -216,8 +217,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
216 if (ac->protocol != protocol) { 217 if (ac->protocol != protocol) {
217 ret = ceph_auth_init_protocol(ac, protocol); 218 ret = ceph_auth_init_protocol(ac, protocol);
218 if (ret) { 219 if (ret) {
219 pr_err("error %d on auth protocol %d init\n", 220 pr_err("error %d on auth method %s init\n",
220 ret, protocol); 221 ret, ac->ops->name);
221 goto out; 222 goto out;
222 } 223 }
223 } 224 }
@@ -229,7 +230,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
229 if (ret == -EAGAIN) { 230 if (ret == -EAGAIN) {
230 return ceph_build_auth_request(ac, reply_buf, reply_len); 231 return ceph_build_auth_request(ac, reply_buf, reply_len);
231 } else if (ret) { 232 } else if (ret) {
232 pr_err("authentication error %d\n", ret); 233 pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
233 return ret; 234 return ret;
234 } 235 }
235 return 0; 236 return 0;
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h
index ca4f57cfb267..4429a707c021 100644
--- a/fs/ceph/auth.h
+++ b/fs/ceph/auth.h
@@ -15,6 +15,8 @@ struct ceph_auth_client;
15struct ceph_authorizer; 15struct ceph_authorizer;
16 16
17struct ceph_auth_client_ops { 17struct ceph_auth_client_ops {
18 const char *name;
19
18 /* 20 /*
19 * true if we are authenticated and can connect to 21 * true if we are authenticated and can connect to
20 * services. 22 * services.
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c
index 8cd9e3af07f7..24407c119291 100644
--- a/fs/ceph/auth_none.c
+++ b/fs/ceph/auth_none.c
@@ -94,6 +94,7 @@ static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
94} 94}
95 95
96static const struct ceph_auth_client_ops ceph_auth_none_ops = { 96static const struct ceph_auth_client_ops ceph_auth_none_ops = {
97 .name = "none",
97 .reset = reset, 98 .reset = reset,
98 .destroy = destroy, 99 .destroy = destroy,
99 .is_authenticated = is_authenticated, 100 .is_authenticated = is_authenticated,
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index fee5a08da881..7b206231566d 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -127,7 +127,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
127 int ret; 127 int ret;
128 char *dbuf; 128 char *dbuf;
129 char *ticket_buf; 129 char *ticket_buf;
130 u8 struct_v; 130 u8 reply_struct_v;
131 131
132 dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); 132 dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
133 if (!dbuf) 133 if (!dbuf)
@@ -139,14 +139,14 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
139 goto out_dbuf; 139 goto out_dbuf;
140 140
141 ceph_decode_need(&p, end, 1 + sizeof(u32), bad); 141 ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
142 struct_v = ceph_decode_8(&p); 142 reply_struct_v = ceph_decode_8(&p);
143 if (struct_v != 1) 143 if (reply_struct_v != 1)
144 goto bad; 144 goto bad;
145 num = ceph_decode_32(&p); 145 num = ceph_decode_32(&p);
146 dout("%d tickets\n", num); 146 dout("%d tickets\n", num);
147 while (num--) { 147 while (num--) {
148 int type; 148 int type;
149 u8 struct_v; 149 u8 tkt_struct_v, blob_struct_v;
150 struct ceph_x_ticket_handler *th; 150 struct ceph_x_ticket_handler *th;
151 void *dp, *dend; 151 void *dp, *dend;
152 int dlen; 152 int dlen;
@@ -165,8 +165,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
165 type = ceph_decode_32(&p); 165 type = ceph_decode_32(&p);
166 dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); 166 dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
167 167
168 struct_v = ceph_decode_8(&p); 168 tkt_struct_v = ceph_decode_8(&p);
169 if (struct_v != 1) 169 if (tkt_struct_v != 1)
170 goto bad; 170 goto bad;
171 171
172 th = get_ticket_handler(ac, type); 172 th = get_ticket_handler(ac, type);
@@ -186,8 +186,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
186 dend = dbuf + dlen; 186 dend = dbuf + dlen;
187 dp = dbuf; 187 dp = dbuf;
188 188
189 struct_v = ceph_decode_8(&dp); 189 tkt_struct_v = ceph_decode_8(&dp);
190 if (struct_v != 1) 190 if (tkt_struct_v != 1)
191 goto bad; 191 goto bad;
192 192
193 memcpy(&old_key, &th->session_key, sizeof(old_key)); 193 memcpy(&old_key, &th->session_key, sizeof(old_key));
@@ -224,7 +224,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
224 tpend = tp + dlen; 224 tpend = tp + dlen;
225 dout(" ticket blob is %d bytes\n", dlen); 225 dout(" ticket blob is %d bytes\n", dlen);
226 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); 226 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
227 struct_v = ceph_decode_8(&tp); 227 blob_struct_v = ceph_decode_8(&tp);
228 new_secret_id = ceph_decode_64(&tp); 228 new_secret_id = ceph_decode_64(&tp);
229 ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); 229 ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
230 if (ret) 230 if (ret)
@@ -618,6 +618,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
618 618
619 619
620static const struct ceph_auth_client_ops ceph_x_ops = { 620static const struct ceph_auth_client_ops ceph_x_ops = {
621 .name = "x",
621 .is_authenticated = ceph_x_is_authenticated, 622 .is_authenticated = ceph_x_is_authenticated,
622 .build_request = ceph_x_build_request, 623 .build_request = ceph_x_build_request,
623 .handle_reply = ceph_x_handle_reply, 624 .handle_reply = ceph_x_handle_reply,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d9400534b279..0dd0b81e64f7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -867,7 +867,8 @@ void __ceph_remove_cap(struct ceph_cap *cap)
867{ 867{
868 struct ceph_mds_session *session = cap->session; 868 struct ceph_mds_session *session = cap->session;
869 struct ceph_inode_info *ci = cap->ci; 869 struct ceph_inode_info *ci = cap->ci;
870 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; 870 struct ceph_mds_client *mdsc =
871 &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
871 int removed = 0; 872 int removed = 0;
872 873
873 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); 874 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
@@ -937,9 +938,9 @@ static int send_cap_msg(struct ceph_mds_session *session,
937 seq, issue_seq, mseq, follows, size, max_size, 938 seq, issue_seq, mseq, follows, size, max_size,
938 xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); 939 xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
939 940
940 msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL); 941 msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS);
941 if (IS_ERR(msg)) 942 if (!msg)
942 return PTR_ERR(msg); 943 return -ENOMEM;
943 944
944 msg->hdr.tid = cpu_to_le64(flush_tid); 945 msg->hdr.tid = cpu_to_le64(flush_tid);
945 946
@@ -1298,7 +1299,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1298 */ 1299 */
1299void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) 1300void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1300{ 1301{
1301 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; 1302 struct ceph_mds_client *mdsc =
1303 &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
1302 struct inode *inode = &ci->vfs_inode; 1304 struct inode *inode = &ci->vfs_inode;
1303 int was = ci->i_dirty_caps; 1305 int was = ci->i_dirty_caps;
1304 int dirty = 0; 1306 int dirty = 0;
@@ -1336,7 +1338,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1336static int __mark_caps_flushing(struct inode *inode, 1338static int __mark_caps_flushing(struct inode *inode,
1337 struct ceph_mds_session *session) 1339 struct ceph_mds_session *session)
1338{ 1340{
1339 struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; 1341 struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
1340 struct ceph_inode_info *ci = ceph_inode(inode); 1342 struct ceph_inode_info *ci = ceph_inode(inode);
1341 int flushing; 1343 int flushing;
1342 1344
@@ -1663,7 +1665,7 @@ ack:
1663static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, 1665static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
1664 unsigned *flush_tid) 1666 unsigned *flush_tid)
1665{ 1667{
1666 struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; 1668 struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
1667 struct ceph_inode_info *ci = ceph_inode(inode); 1669 struct ceph_inode_info *ci = ceph_inode(inode);
1668 int unlock_session = session ? 0 : 1; 1670 int unlock_session = session ? 0 : 1;
1669 int flushing = 0; 1671 int flushing = 0;
@@ -1716,10 +1718,9 @@ out_unlocked:
1716static int caps_are_flushed(struct inode *inode, unsigned tid) 1718static int caps_are_flushed(struct inode *inode, unsigned tid)
1717{ 1719{
1718 struct ceph_inode_info *ci = ceph_inode(inode); 1720 struct ceph_inode_info *ci = ceph_inode(inode);
1719 int dirty, i, ret = 1; 1721 int i, ret = 1;
1720 1722
1721 spin_lock(&inode->i_lock); 1723 spin_lock(&inode->i_lock);
1722 dirty = __ceph_caps_dirty(ci);
1723 for (i = 0; i < CEPH_CAP_BITS; i++) 1724 for (i = 0; i < CEPH_CAP_BITS; i++)
1724 if ((ci->i_flushing_caps & (1 << i)) && 1725 if ((ci->i_flushing_caps & (1 << i)) &&
1725 ci->i_cap_flush_tid[i] <= tid) { 1726 ci->i_cap_flush_tid[i] <= tid) {
@@ -1829,7 +1830,8 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
1829 err = wait_event_interruptible(ci->i_cap_wq, 1830 err = wait_event_interruptible(ci->i_cap_wq,
1830 caps_are_flushed(inode, flush_tid)); 1831 caps_are_flushed(inode, flush_tid));
1831 } else { 1832 } else {
1832 struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; 1833 struct ceph_mds_client *mdsc =
1834 &ceph_sb_to_client(inode->i_sb)->mdsc;
1833 1835
1834 spin_lock(&inode->i_lock); 1836 spin_lock(&inode->i_lock);
1835 if (__ceph_caps_dirty(ci)) 1837 if (__ceph_caps_dirty(ci))
@@ -2411,7 +2413,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2411 __releases(inode->i_lock) 2413 __releases(inode->i_lock)
2412{ 2414{
2413 struct ceph_inode_info *ci = ceph_inode(inode); 2415 struct ceph_inode_info *ci = ceph_inode(inode);
2414 struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; 2416 struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
2415 unsigned seq = le32_to_cpu(m->seq); 2417 unsigned seq = le32_to_cpu(m->seq);
2416 int dirty = le32_to_cpu(m->dirty); 2418 int dirty = le32_to_cpu(m->dirty);
2417 int cleaned = 0; 2419 int cleaned = 0;
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h
index 0c2241ef3653..3b9eeed097b3 100644
--- a/fs/ceph/ceph_fs.h
+++ b/fs/ceph/ceph_fs.h
@@ -19,7 +19,7 @@
19 * Ceph release version 19 * Ceph release version
20 */ 20 */
21#define CEPH_VERSION_MAJOR 0 21#define CEPH_VERSION_MAJOR 0
22#define CEPH_VERSION_MINOR 19 22#define CEPH_VERSION_MINOR 20
23#define CEPH_VERSION_PATCH 0 23#define CEPH_VERSION_PATCH 0
24 24
25#define _CEPH_STRINGIFY(x) #x 25#define _CEPH_STRINGIFY(x) #x
@@ -36,7 +36,7 @@
36 * client-facing protocol. 36 * client-facing protocol.
37 */ 37 */
38#define CEPH_OSD_PROTOCOL 8 /* cluster internal */ 38#define CEPH_OSD_PROTOCOL 8 /* cluster internal */
39#define CEPH_MDS_PROTOCOL 9 /* cluster internal */ 39#define CEPH_MDS_PROTOCOL 12 /* cluster internal */
40#define CEPH_MON_PROTOCOL 5 /* cluster internal */ 40#define CEPH_MON_PROTOCOL 5 /* cluster internal */
41#define CEPH_OSDC_PROTOCOL 24 /* server/client */ 41#define CEPH_OSDC_PROTOCOL 24 /* server/client */
42#define CEPH_MDSC_PROTOCOL 32 /* server/client */ 42#define CEPH_MDSC_PROTOCOL 32 /* server/client */
@@ -53,8 +53,18 @@
53/* 53/*
54 * feature bits 54 * feature bits
55 */ 55 */
56#define CEPH_FEATURE_SUPPORTED 0 56#define CEPH_FEATURE_UID 1
57#define CEPH_FEATURE_REQUIRED 0 57#define CEPH_FEATURE_NOSRCADDR 2
58#define CEPH_FEATURE_FLOCK 4
59
60#define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR
61#define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID
62#define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK
63#define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID
64#define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR
65#define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID
66#define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR
67#define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR
58 68
59 69
60/* 70/*
@@ -91,6 +101,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
91#define CEPH_AUTH_NONE 0x1 101#define CEPH_AUTH_NONE 0x1
92#define CEPH_AUTH_CEPHX 0x2 102#define CEPH_AUTH_CEPHX 0x2
93 103
104#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
105
94 106
95/********************************************* 107/*********************************************
96 * message layer 108 * message layer
@@ -128,11 +140,27 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
128#define CEPH_MSG_CLIENT_SNAP 0x312 140#define CEPH_MSG_CLIENT_SNAP 0x312
129#define CEPH_MSG_CLIENT_CAPRELEASE 0x313 141#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
130 142
143/* pool ops */
144#define CEPH_MSG_POOLOP_REPLY 48
145#define CEPH_MSG_POOLOP 49
146
147
131/* osd */ 148/* osd */
132#define CEPH_MSG_OSD_MAP 41 149#define CEPH_MSG_OSD_MAP 41
133#define CEPH_MSG_OSD_OP 42 150#define CEPH_MSG_OSD_OP 42
134#define CEPH_MSG_OSD_OPREPLY 43 151#define CEPH_MSG_OSD_OPREPLY 43
135 152
153/* pool operations */
154enum {
155 POOL_OP_CREATE = 0x01,
156 POOL_OP_DELETE = 0x02,
157 POOL_OP_AUID_CHANGE = 0x03,
158 POOL_OP_CREATE_SNAP = 0x11,
159 POOL_OP_DELETE_SNAP = 0x12,
160 POOL_OP_CREATE_UNMANAGED_SNAP = 0x21,
161 POOL_OP_DELETE_UNMANAGED_SNAP = 0x22,
162};
163
136struct ceph_mon_request_header { 164struct ceph_mon_request_header {
137 __le64 have_version; 165 __le64 have_version;
138 __le16 session_mon; 166 __le16 session_mon;
@@ -155,6 +183,31 @@ struct ceph_mon_statfs_reply {
155 struct ceph_statfs st; 183 struct ceph_statfs st;
156} __attribute__ ((packed)); 184} __attribute__ ((packed));
157 185
186const char *ceph_pool_op_name(int op);
187
188struct ceph_mon_poolop {
189 struct ceph_mon_request_header monhdr;
190 struct ceph_fsid fsid;
191 __le32 pool;
192 __le32 op;
193 __le64 auid;
194 __le64 snapid;
195 __le32 name_len;
196} __attribute__ ((packed));
197
198struct ceph_mon_poolop_reply {
199 struct ceph_mon_request_header monhdr;
200 struct ceph_fsid fsid;
201 __le32 reply_code;
202 __le32 epoch;
203 char has_data;
204 char data[0];
205} __attribute__ ((packed));
206
207struct ceph_mon_unmanaged_snap {
208 __le64 snapid;
209} __attribute__ ((packed));
210
158struct ceph_osd_getmap { 211struct ceph_osd_getmap {
159 struct ceph_mon_request_header monhdr; 212 struct ceph_mon_request_header monhdr;
160 struct ceph_fsid fsid; 213 struct ceph_fsid fsid;
@@ -308,6 +361,7 @@ union ceph_mds_request_args {
308 struct { 361 struct {
309 __le32 frag; /* which dir fragment */ 362 __le32 frag; /* which dir fragment */
310 __le32 max_entries; /* how many dentries to grab */ 363 __le32 max_entries; /* how many dentries to grab */
364 __le32 max_bytes;
311 } __attribute__ ((packed)) readdir; 365 } __attribute__ ((packed)) readdir;
312 struct { 366 struct {
313 __le32 mode; 367 __le32 mode;
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c
index 8e4be6a80c62..7503aee828ce 100644
--- a/fs/ceph/ceph_strings.c
+++ b/fs/ceph/ceph_strings.c
@@ -10,7 +10,6 @@ const char *ceph_entity_type_name(int type)
10 case CEPH_ENTITY_TYPE_OSD: return "osd"; 10 case CEPH_ENTITY_TYPE_OSD: return "osd";
11 case CEPH_ENTITY_TYPE_MON: return "mon"; 11 case CEPH_ENTITY_TYPE_MON: return "mon";
12 case CEPH_ENTITY_TYPE_CLIENT: return "client"; 12 case CEPH_ENTITY_TYPE_CLIENT: return "client";
13 case CEPH_ENTITY_TYPE_ADMIN: return "admin";
14 case CEPH_ENTITY_TYPE_AUTH: return "auth"; 13 case CEPH_ENTITY_TYPE_AUTH: return "auth";
15 default: return "unknown"; 14 default: return "unknown";
16 } 15 }
@@ -45,6 +44,7 @@ const char *ceph_osd_op_name(int op)
45 case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; 44 case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
46 case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; 45 case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
47 case CEPH_OSD_OP_RMXATTR: return "rmxattr"; 46 case CEPH_OSD_OP_RMXATTR: return "rmxattr";
47 case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
48 48
49 case CEPH_OSD_OP_PULL: return "pull"; 49 case CEPH_OSD_OP_PULL: return "pull";
50 case CEPH_OSD_OP_PUSH: return "push"; 50 case CEPH_OSD_OP_PUSH: return "push";
@@ -174,3 +174,17 @@ const char *ceph_snap_op_name(int o)
174 } 174 }
175 return "???"; 175 return "???";
176} 176}
177
178const char *ceph_pool_op_name(int op)
179{
180 switch (op) {
181 case POOL_OP_CREATE: return "create";
182 case POOL_OP_DELETE: return "delete";
183 case POOL_OP_AUID_CHANGE: return "auid change";
184 case POOL_OP_CREATE_SNAP: return "create snap";
185 case POOL_OP_DELETE_SNAP: return "delete snap";
186 case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
187 case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
188 }
189 return "???";
190}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index f7048da92acc..3be33fb066cc 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -113,7 +113,7 @@ static int osdmap_show(struct seq_file *s, void *p)
113static int monc_show(struct seq_file *s, void *p) 113static int monc_show(struct seq_file *s, void *p)
114{ 114{
115 struct ceph_client *client = s->private; 115 struct ceph_client *client = s->private;
116 struct ceph_mon_statfs_request *req; 116 struct ceph_mon_generic_request *req;
117 struct ceph_mon_client *monc = &client->monc; 117 struct ceph_mon_client *monc = &client->monc;
118 struct rb_node *rp; 118 struct rb_node *rp;
119 119
@@ -126,9 +126,14 @@ static int monc_show(struct seq_file *s, void *p)
126 if (monc->want_next_osdmap) 126 if (monc->want_next_osdmap)
127 seq_printf(s, "want next osdmap\n"); 127 seq_printf(s, "want next osdmap\n");
128 128
129 for (rp = rb_first(&monc->statfs_request_tree); rp; rp = rb_next(rp)) { 129 for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
130 req = rb_entry(rp, struct ceph_mon_statfs_request, node); 130 __u16 op;
131 seq_printf(s, "%lld statfs\n", req->tid); 131 req = rb_entry(rp, struct ceph_mon_generic_request, node);
132 op = le16_to_cpu(req->request->hdr.type);
133 if (op == CEPH_MSG_STATFS)
134 seq_printf(s, "%lld statfs\n", req->tid);
135 else
136 seq_printf(s, "%lld unknown\n", req->tid);
132 } 137 }
133 138
134 mutex_unlock(&monc->mutex); 139 mutex_unlock(&monc->mutex);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 650d2db5ed26..4fd30900eff7 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -51,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry)
51 return -ENOMEM; /* oh well */ 51 return -ENOMEM; /* oh well */
52 52
53 spin_lock(&dentry->d_lock); 53 spin_lock(&dentry->d_lock);
54 if (dentry->d_fsdata) /* lost a race */ 54 if (dentry->d_fsdata) {
55 /* lost a race */
56 kmem_cache_free(ceph_dentry_cachep, di);
55 goto out_unlock; 57 goto out_unlock;
58 }
56 di->dentry = dentry; 59 di->dentry = dentry;
57 di->lease_session = NULL; 60 di->lease_session = NULL;
58 dentry->d_fsdata = di; 61 dentry->d_fsdata = di;
@@ -125,7 +128,8 @@ more:
125 dentry = list_entry(p, struct dentry, d_u.d_child); 128 dentry = list_entry(p, struct dentry, d_u.d_child);
126 di = ceph_dentry(dentry); 129 di = ceph_dentry(dentry);
127 while (1) { 130 while (1) {
128 dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next, 131 dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
132 d_unhashed(dentry) ? "!hashed" : "hashed",
129 parent->d_subdirs.prev, parent->d_subdirs.next); 133 parent->d_subdirs.prev, parent->d_subdirs.next);
130 if (p == &parent->d_subdirs) { 134 if (p == &parent->d_subdirs) {
131 fi->at_end = 1; 135 fi->at_end = 1;
@@ -229,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
229 u32 ftype; 233 u32 ftype;
230 struct ceph_mds_reply_info_parsed *rinfo; 234 struct ceph_mds_reply_info_parsed *rinfo;
231 const int max_entries = client->mount_args->max_readdir; 235 const int max_entries = client->mount_args->max_readdir;
236 const int max_bytes = client->mount_args->max_readdir_bytes;
232 237
233 dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); 238 dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
234 if (fi->at_end) 239 if (fi->at_end)
@@ -312,6 +317,7 @@ more:
312 req->r_readdir_offset = fi->next_offset; 317 req->r_readdir_offset = fi->next_offset;
313 req->r_args.readdir.frag = cpu_to_le32(frag); 318 req->r_args.readdir.frag = cpu_to_le32(frag);
314 req->r_args.readdir.max_entries = cpu_to_le32(max_entries); 319 req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
320 req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
315 req->r_num_caps = max_entries + 1; 321 req->r_num_caps = max_entries + 1;
316 err = ceph_mdsc_do_request(mdsc, NULL, req); 322 err = ceph_mdsc_do_request(mdsc, NULL, req);
317 if (err < 0) { 323 if (err < 0) {
@@ -335,7 +341,7 @@ more:
335 if (req->r_reply_info.dir_end) { 341 if (req->r_reply_info.dir_end) {
336 kfree(fi->last_name); 342 kfree(fi->last_name);
337 fi->last_name = NULL; 343 fi->last_name = NULL;
338 fi->next_offset = 0; 344 fi->next_offset = 2;
339 } else { 345 } else {
340 rinfo = &req->r_reply_info; 346 rinfo = &req->r_reply_info;
341 err = note_last_dentry(fi, 347 err = note_last_dentry(fi,
@@ -478,7 +484,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
478struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, 484struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
479 struct dentry *dentry, int err) 485 struct dentry *dentry, int err)
480{ 486{
481 struct ceph_client *client = ceph_client(dentry->d_sb); 487 struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
482 struct inode *parent = dentry->d_parent->d_inode; 488 struct inode *parent = dentry->d_parent->d_inode;
483 489
484 /* .snap dir? */ 490 /* .snap dir? */
@@ -568,7 +574,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
568 !is_root_ceph_dentry(dir, dentry) && 574 !is_root_ceph_dentry(dir, dentry) &&
569 (ci->i_ceph_flags & CEPH_I_COMPLETE) && 575 (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
570 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 576 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
571 di->offset = ci->i_max_offset++;
572 spin_unlock(&dir->i_lock); 577 spin_unlock(&dir->i_lock);
573 dout(" dir %p complete, -ENOENT\n", dir); 578 dout(" dir %p complete, -ENOENT\n", dir);
574 d_add(dentry, NULL); 579 d_add(dentry, NULL);
@@ -888,13 +893,22 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
888 893
889 /* ensure target dentry is invalidated, despite 894 /* ensure target dentry is invalidated, despite
890 rehashing bug in vfs_rename_dir */ 895 rehashing bug in vfs_rename_dir */
891 new_dentry->d_time = jiffies; 896 ceph_invalidate_dentry_lease(new_dentry);
892 ceph_dentry(new_dentry)->lease_shared_gen = 0;
893 } 897 }
894 ceph_mdsc_put_request(req); 898 ceph_mdsc_put_request(req);
895 return err; 899 return err;
896} 900}
897 901
902/*
903 * Ensure a dentry lease will no longer revalidate.
904 */
905void ceph_invalidate_dentry_lease(struct dentry *dentry)
906{
907 spin_lock(&dentry->d_lock);
908 dentry->d_time = jiffies;
909 ceph_dentry(dentry)->lease_shared_gen = 0;
910 spin_unlock(&dentry->d_lock);
911}
898 912
899/* 913/*
900 * Check if dentry lease is valid. If not, delete the lease. Try to 914 * Check if dentry lease is valid. If not, delete the lease. Try to
@@ -972,8 +986,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
972{ 986{
973 struct inode *dir = dentry->d_parent->d_inode; 987 struct inode *dir = dentry->d_parent->d_inode;
974 988
975 dout("d_revalidate %p '%.*s' inode %p\n", dentry, 989 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
976 dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 990 dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
991 ceph_dentry(dentry)->offset);
977 992
978 /* always trust cached snapped dentries, snapdir dentry */ 993 /* always trust cached snapped dentries, snapdir dentry */
979 if (ceph_snap(dir) != CEPH_NOSNAP) { 994 if (ceph_snap(dir) != CEPH_NOSNAP) {
@@ -1050,7 +1065,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1050 struct ceph_inode_info *ci = ceph_inode(inode); 1065 struct ceph_inode_info *ci = ceph_inode(inode);
1051 int left; 1066 int left;
1052 1067
1053 if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT)) 1068 if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
1054 return -EISDIR; 1069 return -EISDIR;
1055 1070
1056 if (!cf->dir_info) { 1071 if (!cf->dir_info) {
@@ -1152,7 +1167,7 @@ void ceph_dentry_lru_add(struct dentry *dn)
1152 dout("dentry_lru_add %p %p '%.*s'\n", di, dn, 1167 dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
1153 dn->d_name.len, dn->d_name.name); 1168 dn->d_name.len, dn->d_name.name);
1154 if (di) { 1169 if (di) {
1155 mdsc = &ceph_client(dn->d_sb)->mdsc; 1170 mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
1156 spin_lock(&mdsc->dentry_lru_lock); 1171 spin_lock(&mdsc->dentry_lru_lock);
1157 list_add_tail(&di->lru, &mdsc->dentry_lru); 1172 list_add_tail(&di->lru, &mdsc->dentry_lru);
1158 mdsc->num_dentry++; 1173 mdsc->num_dentry++;
@@ -1165,10 +1180,10 @@ void ceph_dentry_lru_touch(struct dentry *dn)
1165 struct ceph_dentry_info *di = ceph_dentry(dn); 1180 struct ceph_dentry_info *di = ceph_dentry(dn);
1166 struct ceph_mds_client *mdsc; 1181 struct ceph_mds_client *mdsc;
1167 1182
1168 dout("dentry_lru_touch %p %p '%.*s'\n", di, dn, 1183 dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
1169 dn->d_name.len, dn->d_name.name); 1184 dn->d_name.len, dn->d_name.name, di->offset);
1170 if (di) { 1185 if (di) {
1171 mdsc = &ceph_client(dn->d_sb)->mdsc; 1186 mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
1172 spin_lock(&mdsc->dentry_lru_lock); 1187 spin_lock(&mdsc->dentry_lru_lock);
1173 list_move_tail(&di->lru, &mdsc->dentry_lru); 1188 list_move_tail(&di->lru, &mdsc->dentry_lru);
1174 spin_unlock(&mdsc->dentry_lru_lock); 1189 spin_unlock(&mdsc->dentry_lru_lock);
@@ -1183,7 +1198,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
1183 dout("dentry_lru_del %p %p '%.*s'\n", di, dn, 1198 dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
1184 dn->d_name.len, dn->d_name.name); 1199 dn->d_name.len, dn->d_name.name);
1185 if (di) { 1200 if (di) {
1186 mdsc = &ceph_client(dn->d_sb)->mdsc; 1201 mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
1187 spin_lock(&mdsc->dentry_lru_lock); 1202 spin_lock(&mdsc->dentry_lru_lock);
1188 list_del_init(&di->lru); 1203 list_del_init(&di->lru);
1189 mdsc->num_dentry--; 1204 mdsc->num_dentry--;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 9d67572fb328..17447644d675 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -93,11 +93,11 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
93 return ERR_PTR(-ESTALE); 93 return ERR_PTR(-ESTALE);
94 94
95 dentry = d_obtain_alias(inode); 95 dentry = d_obtain_alias(inode);
96 if (!dentry) { 96 if (IS_ERR(dentry)) {
97 pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", 97 pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
98 fh->ino, inode); 98 fh->ino, inode);
99 iput(inode); 99 iput(inode);
100 return ERR_PTR(-ENOMEM); 100 return dentry;
101 } 101 }
102 err = ceph_init_dentry(dentry); 102 err = ceph_init_dentry(dentry);
103 103
@@ -115,7 +115,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
115static struct dentry *__cfh_to_dentry(struct super_block *sb, 115static struct dentry *__cfh_to_dentry(struct super_block *sb,
116 struct ceph_nfs_confh *cfh) 116 struct ceph_nfs_confh *cfh)
117{ 117{
118 struct ceph_mds_client *mdsc = &ceph_client(sb)->mdsc; 118 struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc;
119 struct inode *inode; 119 struct inode *inode;
120 struct dentry *dentry; 120 struct dentry *dentry;
121 struct ceph_vino vino; 121 struct ceph_vino vino;
@@ -149,11 +149,11 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
149 } 149 }
150 150
151 dentry = d_obtain_alias(inode); 151 dentry = d_obtain_alias(inode);
152 if (!dentry) { 152 if (IS_ERR(dentry)) {
153 pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", 153 pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
154 cfh->ino, inode); 154 cfh->ino, inode);
155 iput(inode); 155 iput(inode);
156 return ERR_PTR(-ENOMEM); 156 return dentry;
157 } 157 }
158 err = ceph_init_dentry(dentry); 158 err = ceph_init_dentry(dentry);
159 if (err < 0) { 159 if (err < 0) {
@@ -202,11 +202,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
202 return ERR_PTR(-ESTALE); 202 return ERR_PTR(-ESTALE);
203 203
204 dentry = d_obtain_alias(inode); 204 dentry = d_obtain_alias(inode);
205 if (!dentry) { 205 if (IS_ERR(dentry)) {
206 pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", 206 pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n",
207 cfh->ino, inode); 207 cfh->ino, inode);
208 iput(inode); 208 iput(inode);
209 return ERR_PTR(-ENOMEM); 209 return dentry;
210 } 210 }
211 err = ceph_init_dentry(dentry); 211 err = ceph_init_dentry(dentry);
212 if (err < 0) { 212 if (err < 0) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ed6f19721d6e..6512b6701b9e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -317,16 +317,16 @@ void ceph_release_page_vector(struct page **pages, int num_pages)
317/* 317/*
318 * allocate a vector new pages 318 * allocate a vector new pages
319 */ 319 */
320static struct page **alloc_page_vector(int num_pages) 320struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
321{ 321{
322 struct page **pages; 322 struct page **pages;
323 int i; 323 int i;
324 324
325 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); 325 pages = kmalloc(sizeof(*pages) * num_pages, flags);
326 if (!pages) 326 if (!pages)
327 return ERR_PTR(-ENOMEM); 327 return ERR_PTR(-ENOMEM);
328 for (i = 0; i < num_pages; i++) { 328 for (i = 0; i < num_pages; i++) {
329 pages[i] = alloc_page(GFP_NOFS); 329 pages[i] = __page_cache_alloc(flags);
330 if (pages[i] == NULL) { 330 if (pages[i] == NULL) {
331 ceph_release_page_vector(pages, i); 331 ceph_release_page_vector(pages, i);
332 return ERR_PTR(-ENOMEM); 332 return ERR_PTR(-ENOMEM);
@@ -540,7 +540,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
540 * in sequence. 540 * in sequence.
541 */ 541 */
542 } else { 542 } else {
543 pages = alloc_page_vector(num_pages); 543 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
544 } 544 }
545 if (IS_ERR(pages)) 545 if (IS_ERR(pages))
546 return PTR_ERR(pages); 546 return PTR_ERR(pages);
@@ -649,8 +649,8 @@ more:
649 do_sync, 649 do_sync,
650 ci->i_truncate_seq, ci->i_truncate_size, 650 ci->i_truncate_seq, ci->i_truncate_size,
651 &mtime, false, 2); 651 &mtime, false, 2);
652 if (IS_ERR(req)) 652 if (!req)
653 return PTR_ERR(req); 653 return -ENOMEM;
654 654
655 num_pages = calc_pages_for(pos, len); 655 num_pages = calc_pages_for(pos, len);
656 656
@@ -668,7 +668,7 @@ more:
668 truncate_inode_pages_range(inode->i_mapping, pos, 668 truncate_inode_pages_range(inode->i_mapping, pos,
669 (pos+len) | (PAGE_CACHE_SIZE-1)); 669 (pos+len) | (PAGE_CACHE_SIZE-1));
670 } else { 670 } else {
671 pages = alloc_page_vector(num_pages); 671 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
672 if (IS_ERR(pages)) { 672 if (IS_ERR(pages)) {
673 ret = PTR_ERR(pages); 673 ret = PTR_ERR(pages);
674 goto out; 674 goto out;
@@ -809,7 +809,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
809 struct file *file = iocb->ki_filp; 809 struct file *file = iocb->ki_filp;
810 struct inode *inode = file->f_dentry->d_inode; 810 struct inode *inode = file->f_dentry->d_inode;
811 struct ceph_inode_info *ci = ceph_inode(inode); 811 struct ceph_inode_info *ci = ceph_inode(inode);
812 struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; 812 struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
813 loff_t endoff = pos + iov->iov_len; 813 loff_t endoff = pos + iov->iov_len;
814 int got = 0; 814 int got = 0;
815 int ret, err; 815 int ret, err;
@@ -844,8 +844,7 @@ retry_snap:
844 if ((ret >= 0 || ret == -EIOCBQUEUED) && 844 if ((ret >= 0 || ret == -EIOCBQUEUED) &&
845 ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) 845 ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
846 || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { 846 || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
847 err = vfs_fsync_range(file, file->f_path.dentry, 847 err = vfs_fsync_range(file, pos, pos + ret - 1, 1);
848 pos, pos + ret - 1, 1);
849 if (err < 0) 848 if (err < 0)
850 ret = err; 849 ret = err;
851 } 850 }
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 85b4d2ffdeba..a81b8b662c7b 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -384,7 +384,7 @@ void ceph_destroy_inode(struct inode *inode)
384 */ 384 */
385 if (ci->i_snap_realm) { 385 if (ci->i_snap_realm) {
386 struct ceph_mds_client *mdsc = 386 struct ceph_mds_client *mdsc =
387 &ceph_client(ci->vfs_inode.i_sb)->mdsc; 387 &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
388 struct ceph_snap_realm *realm = ci->i_snap_realm; 388 struct ceph_snap_realm *realm = ci->i_snap_realm;
389 389
390 dout(" dropping residual ref to snap realm %p\n", realm); 390 dout(" dropping residual ref to snap realm %p\n", realm);
@@ -619,11 +619,12 @@ static int fill_inode(struct inode *inode,
619 memcpy(ci->i_xattrs.blob->vec.iov_base, 619 memcpy(ci->i_xattrs.blob->vec.iov_base,
620 iinfo->xattr_data, iinfo->xattr_len); 620 iinfo->xattr_data, iinfo->xattr_len);
621 ci->i_xattrs.version = le64_to_cpu(info->xattr_version); 621 ci->i_xattrs.version = le64_to_cpu(info->xattr_version);
622 xattr_blob = NULL;
622 } 623 }
623 624
624 inode->i_mapping->a_ops = &ceph_aops; 625 inode->i_mapping->a_ops = &ceph_aops;
625 inode->i_mapping->backing_dev_info = 626 inode->i_mapping->backing_dev_info =
626 &ceph_client(inode->i_sb)->backing_dev_info; 627 &ceph_sb_to_client(inode->i_sb)->backing_dev_info;
627 628
628 switch (inode->i_mode & S_IFMT) { 629 switch (inode->i_mode & S_IFMT) {
629 case S_IFIFO: 630 case S_IFIFO:
@@ -674,14 +675,15 @@ static int fill_inode(struct inode *inode,
674 /* set dir completion flag? */ 675 /* set dir completion flag? */
675 if (ci->i_files == 0 && ci->i_subdirs == 0 && 676 if (ci->i_files == 0 && ci->i_subdirs == 0 &&
676 ceph_snap(inode) == CEPH_NOSNAP && 677 ceph_snap(inode) == CEPH_NOSNAP &&
677 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED)) { 678 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
679 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
678 dout(" marking %p complete (empty)\n", inode); 680 dout(" marking %p complete (empty)\n", inode);
679 ci->i_ceph_flags |= CEPH_I_COMPLETE; 681 ci->i_ceph_flags |= CEPH_I_COMPLETE;
680 ci->i_max_offset = 2; 682 ci->i_max_offset = 2;
681 } 683 }
682 684
683 /* it may be better to set st_size in getattr instead? */ 685 /* it may be better to set st_size in getattr instead? */
684 if (ceph_test_opt(ceph_client(inode->i_sb), RBYTES)) 686 if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
685 inode->i_size = ci->i_rbytes; 687 inode->i_size = ci->i_rbytes;
686 break; 688 break;
687 default: 689 default:
@@ -802,6 +804,37 @@ out_unlock:
802} 804}
803 805
804/* 806/*
807 * Set dentry's directory position based on the current dir's max, and
808 * order it in d_subdirs, so that dcache_readdir behaves.
809 */
810static void ceph_set_dentry_offset(struct dentry *dn)
811{
812 struct dentry *dir = dn->d_parent;
813 struct inode *inode = dn->d_parent->d_inode;
814 struct ceph_dentry_info *di;
815
816 BUG_ON(!inode);
817
818 di = ceph_dentry(dn);
819
820 spin_lock(&inode->i_lock);
821 if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
822 spin_unlock(&inode->i_lock);
823 return;
824 }
825 di->offset = ceph_inode(inode)->i_max_offset++;
826 spin_unlock(&inode->i_lock);
827
828 spin_lock(&dcache_lock);
829 spin_lock(&dn->d_lock);
830 list_move_tail(&dir->d_subdirs, &dn->d_u.d_child);
831 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
832 dn->d_u.d_child.prev, dn->d_u.d_child.next);
833 spin_unlock(&dn->d_lock);
834 spin_unlock(&dcache_lock);
835}
836
837/*
805 * splice a dentry to an inode. 838 * splice a dentry to an inode.
806 * caller must hold directory i_mutex for this to be safe. 839 * caller must hold directory i_mutex for this to be safe.
807 * 840 *
@@ -814,6 +847,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
814{ 847{
815 struct dentry *realdn; 848 struct dentry *realdn;
816 849
850 BUG_ON(dn->d_inode);
851
817 /* dn must be unhashed */ 852 /* dn must be unhashed */
818 if (!d_unhashed(dn)) 853 if (!d_unhashed(dn))
819 d_drop(dn); 854 d_drop(dn);
@@ -835,44 +870,17 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
835 dn = realdn; 870 dn = realdn;
836 } else { 871 } else {
837 BUG_ON(!ceph_dentry(dn)); 872 BUG_ON(!ceph_dentry(dn));
838
839 dout("dn %p attached to %p ino %llx.%llx\n", 873 dout("dn %p attached to %p ino %llx.%llx\n",
840 dn, dn->d_inode, ceph_vinop(dn->d_inode)); 874 dn, dn->d_inode, ceph_vinop(dn->d_inode));
841 } 875 }
842 if ((!prehash || *prehash) && d_unhashed(dn)) 876 if ((!prehash || *prehash) && d_unhashed(dn))
843 d_rehash(dn); 877 d_rehash(dn);
878 ceph_set_dentry_offset(dn);
844out: 879out:
845 return dn; 880 return dn;
846} 881}
847 882
848/* 883/*
849 * Set dentry's directory position based on the current dir's max, and
850 * order it in d_subdirs, so that dcache_readdir behaves.
851 */
852static void ceph_set_dentry_offset(struct dentry *dn)
853{
854 struct dentry *dir = dn->d_parent;
855 struct inode *inode = dn->d_parent->d_inode;
856 struct ceph_dentry_info *di;
857
858 BUG_ON(!inode);
859
860 di = ceph_dentry(dn);
861
862 spin_lock(&inode->i_lock);
863 di->offset = ceph_inode(inode)->i_max_offset++;
864 spin_unlock(&inode->i_lock);
865
866 spin_lock(&dcache_lock);
867 spin_lock(&dn->d_lock);
868 list_move_tail(&dir->d_subdirs, &dn->d_u.d_child);
869 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
870 dn->d_u.d_child.prev, dn->d_u.d_child.next);
871 spin_unlock(&dn->d_lock);
872 spin_unlock(&dcache_lock);
873}
874
875/*
876 * Incorporate results into the local cache. This is either just 884 * Incorporate results into the local cache. This is either just
877 * one inode, or a directory, dentry, and possibly linked-to inode (e.g., 885 * one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
878 * after a lookup). 886 * after a lookup).
@@ -933,14 +941,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
933 941
934 if (!rinfo->head->is_target && !rinfo->head->is_dentry) { 942 if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
935 dout("fill_trace reply is empty!\n"); 943 dout("fill_trace reply is empty!\n");
936 if (rinfo->head->result == 0 && req->r_locked_dir) { 944 if (rinfo->head->result == 0 && req->r_locked_dir)
937 struct ceph_inode_info *ci = 945 ceph_invalidate_dir_request(req);
938 ceph_inode(req->r_locked_dir);
939 dout(" clearing %p complete (empty trace)\n",
940 req->r_locked_dir);
941 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
942 ci->i_release_count++;
943 }
944 return 0; 946 return 0;
945 } 947 }
946 948
@@ -1011,13 +1013,18 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1011 req->r_old_dentry->d_name.len, 1013 req->r_old_dentry->d_name.len,
1012 req->r_old_dentry->d_name.name, 1014 req->r_old_dentry->d_name.name,
1013 dn, dn->d_name.len, dn->d_name.name); 1015 dn, dn->d_name.len, dn->d_name.name);
1016
1014 /* ensure target dentry is invalidated, despite 1017 /* ensure target dentry is invalidated, despite
1015 rehashing bug in vfs_rename_dir */ 1018 rehashing bug in vfs_rename_dir */
1016 dn->d_time = jiffies; 1019 ceph_invalidate_dentry_lease(dn);
1017 ceph_dentry(dn)->lease_shared_gen = 0; 1020
1018 /* take overwritten dentry's readdir offset */ 1021 /* take overwritten dentry's readdir offset */
1022 dout("dn %p gets %p offset %lld (old offset %lld)\n",
1023 req->r_old_dentry, dn, ceph_dentry(dn)->offset,
1024 ceph_dentry(req->r_old_dentry)->offset);
1019 ceph_dentry(req->r_old_dentry)->offset = 1025 ceph_dentry(req->r_old_dentry)->offset =
1020 ceph_dentry(dn)->offset; 1026 ceph_dentry(dn)->offset;
1027
1021 dn = req->r_old_dentry; /* use old_dentry */ 1028 dn = req->r_old_dentry; /* use old_dentry */
1022 in = dn->d_inode; 1029 in = dn->d_inode;
1023 } 1030 }
@@ -1059,7 +1066,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1059 goto done; 1066 goto done;
1060 } 1067 }
1061 req->r_dentry = dn; /* may have spliced */ 1068 req->r_dentry = dn; /* may have spliced */
1062 ceph_set_dentry_offset(dn);
1063 igrab(in); 1069 igrab(in);
1064 } else if (ceph_ino(in) == vino.ino && 1070 } else if (ceph_ino(in) == vino.ino &&
1065 ceph_snap(in) == vino.snap) { 1071 ceph_snap(in) == vino.snap) {
@@ -1102,7 +1108,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1102 err = PTR_ERR(dn); 1108 err = PTR_ERR(dn);
1103 goto done; 1109 goto done;
1104 } 1110 }
1105 ceph_set_dentry_offset(dn);
1106 req->r_dentry = dn; /* may have spliced */ 1111 req->r_dentry = dn; /* may have spliced */
1107 igrab(in); 1112 igrab(in);
1108 rinfo->head->is_dentry = 1; /* fool notrace handlers */ 1113 rinfo->head->is_dentry = 1; /* fool notrace handlers */
@@ -1429,7 +1434,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
1429{ 1434{
1430 struct ceph_inode_info *ci = ceph_inode(inode); 1435 struct ceph_inode_info *ci = ceph_inode(inode);
1431 1436
1432 if (queue_work(ceph_client(inode->i_sb)->trunc_wq, 1437 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1433 &ci->i_vmtruncate_work)) { 1438 &ci->i_vmtruncate_work)) {
1434 dout("ceph_queue_vmtruncate %p\n", inode); 1439 dout("ceph_queue_vmtruncate %p\n", inode);
1435 igrab(inode); 1440 igrab(inode);
@@ -1518,7 +1523,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1518 struct inode *parent_inode = dentry->d_parent->d_inode; 1523 struct inode *parent_inode = dentry->d_parent->d_inode;
1519 const unsigned int ia_valid = attr->ia_valid; 1524 const unsigned int ia_valid = attr->ia_valid;
1520 struct ceph_mds_request *req; 1525 struct ceph_mds_request *req;
1521 struct ceph_mds_client *mdsc = &ceph_client(dentry->d_sb)->mdsc; 1526 struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc;
1522 int issued; 1527 int issued;
1523 int release = 0, dirtied = 0; 1528 int release = 0, dirtied = 0;
1524 int mask = 0; 1529 int mask = 0;
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8a5bcae62846..d085f07756b4 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -98,7 +98,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
98 struct ceph_ioctl_dataloc dl; 98 struct ceph_ioctl_dataloc dl;
99 struct inode *inode = file->f_dentry->d_inode; 99 struct inode *inode = file->f_dentry->d_inode;
100 struct ceph_inode_info *ci = ceph_inode(inode); 100 struct ceph_inode_info *ci = ceph_inode(inode);
101 struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; 101 struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
102 u64 len = 1, olen; 102 u64 len = 1, olen;
103 u64 tmp; 103 u64 tmp;
104 struct ceph_object_layout ol; 104 struct ceph_object_layout ol;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 24561a557e01..885aa5710cfd 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -40,7 +40,7 @@
40static void __wake_requests(struct ceph_mds_client *mdsc, 40static void __wake_requests(struct ceph_mds_client *mdsc,
41 struct list_head *head); 41 struct list_head *head);
42 42
43const static struct ceph_connection_operations mds_con_ops; 43static const struct ceph_connection_operations mds_con_ops;
44 44
45 45
46/* 46/*
@@ -665,10 +665,10 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
665 struct ceph_msg *msg; 665 struct ceph_msg *msg;
666 struct ceph_mds_session_head *h; 666 struct ceph_mds_session_head *h;
667 667
668 msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), 0, 0, NULL); 668 msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS);
669 if (IS_ERR(msg)) { 669 if (!msg) {
670 pr_err("create_session_msg ENOMEM creating msg\n"); 670 pr_err("create_session_msg ENOMEM creating msg\n");
671 return ERR_PTR(PTR_ERR(msg)); 671 return NULL;
672 } 672 }
673 h = msg->front.iov_base; 673 h = msg->front.iov_base;
674 h->op = cpu_to_le32(op); 674 h->op = cpu_to_le32(op);
@@ -687,7 +687,6 @@ static int __open_session(struct ceph_mds_client *mdsc,
687 struct ceph_msg *msg; 687 struct ceph_msg *msg;
688 int mstate; 688 int mstate;
689 int mds = session->s_mds; 689 int mds = session->s_mds;
690 int err = 0;
691 690
692 /* wait for mds to go active? */ 691 /* wait for mds to go active? */
693 mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); 692 mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
@@ -698,13 +697,9 @@ static int __open_session(struct ceph_mds_client *mdsc,
698 697
699 /* send connect message */ 698 /* send connect message */
700 msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); 699 msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq);
701 if (IS_ERR(msg)) { 700 if (!msg)
702 err = PTR_ERR(msg); 701 return -ENOMEM;
703 goto out;
704 }
705 ceph_con_send(&session->s_con, msg); 702 ceph_con_send(&session->s_con, msg);
706
707out:
708 return 0; 703 return 0;
709} 704}
710 705
@@ -804,12 +799,49 @@ out:
804} 799}
805 800
806static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, 801static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
807 void *arg) 802 void *arg)
808{ 803{
809 struct ceph_inode_info *ci = ceph_inode(inode); 804 struct ceph_inode_info *ci = ceph_inode(inode);
805 int drop = 0;
806
810 dout("removing cap %p, ci is %p, inode is %p\n", 807 dout("removing cap %p, ci is %p, inode is %p\n",
811 cap, ci, &ci->vfs_inode); 808 cap, ci, &ci->vfs_inode);
812 ceph_remove_cap(cap); 809 spin_lock(&inode->i_lock);
810 __ceph_remove_cap(cap);
811 if (!__ceph_is_any_real_caps(ci)) {
812 struct ceph_mds_client *mdsc =
813 &ceph_sb_to_client(inode->i_sb)->mdsc;
814
815 spin_lock(&mdsc->cap_dirty_lock);
816 if (!list_empty(&ci->i_dirty_item)) {
817 pr_info(" dropping dirty %s state for %p %lld\n",
818 ceph_cap_string(ci->i_dirty_caps),
819 inode, ceph_ino(inode));
820 ci->i_dirty_caps = 0;
821 list_del_init(&ci->i_dirty_item);
822 drop = 1;
823 }
824 if (!list_empty(&ci->i_flushing_item)) {
825 pr_info(" dropping dirty+flushing %s state for %p %lld\n",
826 ceph_cap_string(ci->i_flushing_caps),
827 inode, ceph_ino(inode));
828 ci->i_flushing_caps = 0;
829 list_del_init(&ci->i_flushing_item);
830 mdsc->num_cap_flushing--;
831 drop = 1;
832 }
833 if (drop && ci->i_wrbuffer_ref) {
834 pr_info(" dropping dirty data for %p %lld\n",
835 inode, ceph_ino(inode));
836 ci->i_wrbuffer_ref = 0;
837 ci->i_wrbuffer_ref_head = 0;
838 drop++;
839 }
840 spin_unlock(&mdsc->cap_dirty_lock);
841 }
842 spin_unlock(&inode->i_lock);
843 while (drop--)
844 iput(inode);
813 return 0; 845 return 0;
814} 846}
815 847
@@ -821,6 +853,7 @@ static void remove_session_caps(struct ceph_mds_session *session)
821 dout("remove_session_caps on %p\n", session); 853 dout("remove_session_caps on %p\n", session);
822 iterate_session_caps(session, remove_session_caps_cb, NULL); 854 iterate_session_caps(session, remove_session_caps_cb, NULL);
823 BUG_ON(session->s_nr_caps > 0); 855 BUG_ON(session->s_nr_caps > 0);
856 BUG_ON(!list_empty(&session->s_cap_flushing));
824 cleanup_cap_releases(session); 857 cleanup_cap_releases(session);
825} 858}
826 859
@@ -883,8 +916,8 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
883 ceph_mds_state_name(state)); 916 ceph_mds_state_name(state));
884 msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, 917 msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
885 ++session->s_renew_seq); 918 ++session->s_renew_seq);
886 if (IS_ERR(msg)) 919 if (!msg)
887 return PTR_ERR(msg); 920 return -ENOMEM;
888 ceph_con_send(&session->s_con, msg); 921 ceph_con_send(&session->s_con, msg);
889 return 0; 922 return 0;
890} 923}
@@ -931,17 +964,15 @@ static int request_close_session(struct ceph_mds_client *mdsc,
931 struct ceph_mds_session *session) 964 struct ceph_mds_session *session)
932{ 965{
933 struct ceph_msg *msg; 966 struct ceph_msg *msg;
934 int err = 0;
935 967
936 dout("request_close_session mds%d state %s seq %lld\n", 968 dout("request_close_session mds%d state %s seq %lld\n",
937 session->s_mds, session_state_name(session->s_state), 969 session->s_mds, session_state_name(session->s_state),
938 session->s_seq); 970 session->s_seq);
939 msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); 971 msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
940 if (IS_ERR(msg)) 972 if (!msg)
941 err = PTR_ERR(msg); 973 return -ENOMEM;
942 else 974 ceph_con_send(&session->s_con, msg);
943 ceph_con_send(&session->s_con, msg); 975 return 0;
944 return err;
945} 976}
946 977
947/* 978/*
@@ -1059,7 +1090,7 @@ static int add_cap_releases(struct ceph_mds_client *mdsc,
1059 while (session->s_num_cap_releases < session->s_nr_caps + extra) { 1090 while (session->s_num_cap_releases < session->s_nr_caps + extra) {
1060 spin_unlock(&session->s_cap_lock); 1091 spin_unlock(&session->s_cap_lock);
1061 msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, 1092 msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE,
1062 0, 0, NULL); 1093 GFP_NOFS);
1063 if (!msg) 1094 if (!msg)
1064 goto out_unlocked; 1095 goto out_unlocked;
1065 dout("add_cap_releases %p msg %p now %d\n", session, msg, 1096 dout("add_cap_releases %p msg %p now %d\n", session, msg,
@@ -1151,10 +1182,8 @@ static void send_cap_releases(struct ceph_mds_client *mdsc,
1151 struct ceph_msg *msg; 1182 struct ceph_msg *msg;
1152 1183
1153 dout("send_cap_releases mds%d\n", session->s_mds); 1184 dout("send_cap_releases mds%d\n", session->s_mds);
1154 while (1) { 1185 spin_lock(&session->s_cap_lock);
1155 spin_lock(&session->s_cap_lock); 1186 while (!list_empty(&session->s_cap_releases_done)) {
1156 if (list_empty(&session->s_cap_releases_done))
1157 break;
1158 msg = list_first_entry(&session->s_cap_releases_done, 1187 msg = list_first_entry(&session->s_cap_releases_done,
1159 struct ceph_msg, list_head); 1188 struct ceph_msg, list_head);
1160 list_del_init(&msg->list_head); 1189 list_del_init(&msg->list_head);
@@ -1162,10 +1191,49 @@ static void send_cap_releases(struct ceph_mds_client *mdsc,
1162 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 1191 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
1163 dout("send_cap_releases mds%d %p\n", session->s_mds, msg); 1192 dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
1164 ceph_con_send(&session->s_con, msg); 1193 ceph_con_send(&session->s_con, msg);
1194 spin_lock(&session->s_cap_lock);
1165 } 1195 }
1166 spin_unlock(&session->s_cap_lock); 1196 spin_unlock(&session->s_cap_lock);
1167} 1197}
1168 1198
1199static void discard_cap_releases(struct ceph_mds_client *mdsc,
1200 struct ceph_mds_session *session)
1201{
1202 struct ceph_msg *msg;
1203 struct ceph_mds_cap_release *head;
1204 unsigned num;
1205
1206 dout("discard_cap_releases mds%d\n", session->s_mds);
1207 spin_lock(&session->s_cap_lock);
1208
1209 /* zero out the in-progress message */
1210 msg = list_first_entry(&session->s_cap_releases,
1211 struct ceph_msg, list_head);
1212 head = msg->front.iov_base;
1213 num = le32_to_cpu(head->num);
1214 dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num);
1215 head->num = cpu_to_le32(0);
1216 session->s_num_cap_releases += num;
1217
1218 /* requeue completed messages */
1219 while (!list_empty(&session->s_cap_releases_done)) {
1220 msg = list_first_entry(&session->s_cap_releases_done,
1221 struct ceph_msg, list_head);
1222 list_del_init(&msg->list_head);
1223
1224 head = msg->front.iov_base;
1225 num = le32_to_cpu(head->num);
1226 dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg,
1227 num);
1228 session->s_num_cap_releases += num;
1229 head->num = cpu_to_le32(0);
1230 msg->front.iov_len = sizeof(*head);
1231 list_add(&msg->list_head, &session->s_cap_releases);
1232 }
1233
1234 spin_unlock(&session->s_cap_lock);
1235}
1236
1169/* 1237/*
1170 * requests 1238 * requests
1171 */ 1239 */
@@ -1181,6 +1249,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
1181 if (!req) 1249 if (!req)
1182 return ERR_PTR(-ENOMEM); 1250 return ERR_PTR(-ENOMEM);
1183 1251
1252 mutex_init(&req->r_fill_mutex);
1184 req->r_started = jiffies; 1253 req->r_started = jiffies;
1185 req->r_resend_mds = -1; 1254 req->r_resend_mds = -1;
1186 INIT_LIST_HEAD(&req->r_unsafe_dir_item); 1255 INIT_LIST_HEAD(&req->r_unsafe_dir_item);
@@ -1251,7 +1320,7 @@ retry:
1251 len += 1 + temp->d_name.len; 1320 len += 1 + temp->d_name.len;
1252 temp = temp->d_parent; 1321 temp = temp->d_parent;
1253 if (temp == NULL) { 1322 if (temp == NULL) {
1254 pr_err("build_path_dentry corrupt dentry %p\n", dentry); 1323 pr_err("build_path corrupt dentry %p\n", dentry);
1255 return ERR_PTR(-EINVAL); 1324 return ERR_PTR(-EINVAL);
1256 } 1325 }
1257 } 1326 }
@@ -1267,7 +1336,7 @@ retry:
1267 struct inode *inode = temp->d_inode; 1336 struct inode *inode = temp->d_inode;
1268 1337
1269 if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { 1338 if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
1270 dout("build_path_dentry path+%d: %p SNAPDIR\n", 1339 dout("build_path path+%d: %p SNAPDIR\n",
1271 pos, temp); 1340 pos, temp);
1272 } else if (stop_on_nosnap && inode && 1341 } else if (stop_on_nosnap && inode &&
1273 ceph_snap(inode) == CEPH_NOSNAP) { 1342 ceph_snap(inode) == CEPH_NOSNAP) {
@@ -1278,20 +1347,18 @@ retry:
1278 break; 1347 break;
1279 strncpy(path + pos, temp->d_name.name, 1348 strncpy(path + pos, temp->d_name.name,
1280 temp->d_name.len); 1349 temp->d_name.len);
1281 dout("build_path_dentry path+%d: %p '%.*s'\n",
1282 pos, temp, temp->d_name.len, path + pos);
1283 } 1350 }
1284 if (pos) 1351 if (pos)
1285 path[--pos] = '/'; 1352 path[--pos] = '/';
1286 temp = temp->d_parent; 1353 temp = temp->d_parent;
1287 if (temp == NULL) { 1354 if (temp == NULL) {
1288 pr_err("build_path_dentry corrupt dentry\n"); 1355 pr_err("build_path corrupt dentry\n");
1289 kfree(path); 1356 kfree(path);
1290 return ERR_PTR(-EINVAL); 1357 return ERR_PTR(-EINVAL);
1291 } 1358 }
1292 } 1359 }
1293 if (pos != 0) { 1360 if (pos != 0) {
1294 pr_err("build_path_dentry did not end path lookup where " 1361 pr_err("build_path did not end path lookup where "
1295 "expected, namelen is %d, pos is %d\n", len, pos); 1362 "expected, namelen is %d, pos is %d\n", len, pos);
1296 /* presumably this is only possible if racing with a 1363 /* presumably this is only possible if racing with a
1297 rename of one of the parent directories (we can not 1364 rename of one of the parent directories (we can not
@@ -1303,7 +1370,7 @@ retry:
1303 1370
1304 *base = ceph_ino(temp->d_inode); 1371 *base = ceph_ino(temp->d_inode);
1305 *plen = len; 1372 *plen = len;
1306 dout("build_path_dentry on %p %d built %llx '%.*s'\n", 1373 dout("build_path on %p %d built %llx '%.*s'\n",
1307 dentry, atomic_read(&dentry->d_count), *base, len, path); 1374 dentry, atomic_read(&dentry->d_count), *base, len, path);
1308 return path; 1375 return path;
1309} 1376}
@@ -1426,9 +1493,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1426 if (req->r_old_dentry_drop) 1493 if (req->r_old_dentry_drop)
1427 len += req->r_old_dentry->d_name.len; 1494 len += req->r_old_dentry->d_name.len;
1428 1495
1429 msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, 0, 0, NULL); 1496 msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS);
1430 if (IS_ERR(msg)) 1497 if (!msg) {
1498 msg = ERR_PTR(-ENOMEM);
1431 goto out_free2; 1499 goto out_free2;
1500 }
1432 1501
1433 msg->hdr.tid = cpu_to_le64(req->r_tid); 1502 msg->hdr.tid = cpu_to_le64(req->r_tid);
1434 1503
@@ -1517,9 +1586,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
1517 } 1586 }
1518 msg = create_request_message(mdsc, req, mds); 1587 msg = create_request_message(mdsc, req, mds);
1519 if (IS_ERR(msg)) { 1588 if (IS_ERR(msg)) {
1520 req->r_reply = ERR_PTR(PTR_ERR(msg)); 1589 req->r_err = PTR_ERR(msg);
1521 complete_request(mdsc, req); 1590 complete_request(mdsc, req);
1522 return -PTR_ERR(msg); 1591 return PTR_ERR(msg);
1523 } 1592 }
1524 req->r_request = msg; 1593 req->r_request = msg;
1525 1594
@@ -1552,7 +1621,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
1552 int mds = -1; 1621 int mds = -1;
1553 int err = -EAGAIN; 1622 int err = -EAGAIN;
1554 1623
1555 if (req->r_reply) 1624 if (req->r_err || req->r_got_result)
1556 goto out; 1625 goto out;
1557 1626
1558 if (req->r_timeout && 1627 if (req->r_timeout &&
@@ -1609,7 +1678,7 @@ out:
1609 return err; 1678 return err;
1610 1679
1611finish: 1680finish:
1612 req->r_reply = ERR_PTR(err); 1681 req->r_err = err;
1613 complete_request(mdsc, req); 1682 complete_request(mdsc, req);
1614 goto out; 1683 goto out;
1615} 1684}
@@ -1630,10 +1699,9 @@ static void __wake_requests(struct ceph_mds_client *mdsc,
1630 1699
1631/* 1700/*
1632 * Wake up threads with requests pending for @mds, so that they can 1701 * Wake up threads with requests pending for @mds, so that they can
1633 * resubmit their requests to a possibly different mds. If @all is set, 1702 * resubmit their requests to a possibly different mds.
1634 * wake up if their requests has been forwarded to @mds, too.
1635 */ 1703 */
1636static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all) 1704static void kick_requests(struct ceph_mds_client *mdsc, int mds)
1637{ 1705{
1638 struct ceph_mds_request *req; 1706 struct ceph_mds_request *req;
1639 struct rb_node *p; 1707 struct rb_node *p;
@@ -1689,64 +1757,78 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
1689 __register_request(mdsc, req, dir); 1757 __register_request(mdsc, req, dir);
1690 __do_request(mdsc, req); 1758 __do_request(mdsc, req);
1691 1759
1692 /* wait */ 1760 if (req->r_err) {
1693 if (!req->r_reply) { 1761 err = req->r_err;
1694 mutex_unlock(&mdsc->mutex); 1762 __unregister_request(mdsc, req);
1695 if (req->r_timeout) { 1763 dout("do_request early error %d\n", err);
1696 err = (long)wait_for_completion_interruptible_timeout( 1764 goto out;
1697 &req->r_completion, req->r_timeout);
1698 if (err == 0)
1699 req->r_reply = ERR_PTR(-EIO);
1700 else if (err < 0)
1701 req->r_reply = ERR_PTR(err);
1702 } else {
1703 err = wait_for_completion_interruptible(
1704 &req->r_completion);
1705 if (err)
1706 req->r_reply = ERR_PTR(err);
1707 }
1708 mutex_lock(&mdsc->mutex);
1709 } 1765 }
1710 1766
1711 if (IS_ERR(req->r_reply)) { 1767 /* wait */
1712 err = PTR_ERR(req->r_reply); 1768 mutex_unlock(&mdsc->mutex);
1713 req->r_reply = NULL; 1769 dout("do_request waiting\n");
1770 if (req->r_timeout) {
1771 err = (long)wait_for_completion_interruptible_timeout(
1772 &req->r_completion, req->r_timeout);
1773 if (err == 0)
1774 err = -EIO;
1775 } else {
1776 err = wait_for_completion_interruptible(&req->r_completion);
1777 }
1778 dout("do_request waited, got %d\n", err);
1779 mutex_lock(&mdsc->mutex);
1714 1780
1715 if (err == -ERESTARTSYS) { 1781 /* only abort if we didn't race with a real reply */
1716 /* aborted */ 1782 if (req->r_got_result) {
1717 req->r_aborted = true; 1783 err = le32_to_cpu(req->r_reply_info.head->result);
1784 } else if (err < 0) {
1785 dout("aborted request %lld with %d\n", req->r_tid, err);
1718 1786
1719 if (req->r_locked_dir && 1787 /*
1720 (req->r_op & CEPH_MDS_OP_WRITE)) { 1788 * ensure we aren't running concurrently with
1721 struct ceph_inode_info *ci = 1789 * ceph_fill_trace or ceph_readdir_prepopulate, which
1722 ceph_inode(req->r_locked_dir); 1790 * rely on locks (dir mutex) held by our caller.
1791 */
1792 mutex_lock(&req->r_fill_mutex);
1793 req->r_err = err;
1794 req->r_aborted = true;
1795 mutex_unlock(&req->r_fill_mutex);
1723 1796
1724 dout("aborted, clearing I_COMPLETE on %p\n", 1797 if (req->r_locked_dir &&
1725 req->r_locked_dir); 1798 (req->r_op & CEPH_MDS_OP_WRITE))
1726 spin_lock(&req->r_locked_dir->i_lock); 1799 ceph_invalidate_dir_request(req);
1727 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1728 ci->i_release_count++;
1729 spin_unlock(&req->r_locked_dir->i_lock);
1730 }
1731 } else {
1732 /* clean up this request */
1733 __unregister_request(mdsc, req);
1734 if (!list_empty(&req->r_unsafe_item))
1735 list_del_init(&req->r_unsafe_item);
1736 complete(&req->r_safe_completion);
1737 }
1738 } else if (req->r_err) {
1739 err = req->r_err;
1740 } else { 1800 } else {
1741 err = le32_to_cpu(req->r_reply_info.head->result); 1801 err = req->r_err;
1742 } 1802 }
1743 mutex_unlock(&mdsc->mutex);
1744 1803
1804out:
1805 mutex_unlock(&mdsc->mutex);
1745 dout("do_request %p done, result %d\n", req, err); 1806 dout("do_request %p done, result %d\n", req, err);
1746 return err; 1807 return err;
1747} 1808}
1748 1809
1749/* 1810/*
1811 * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS
1812 * namespace request.
1813 */
1814void ceph_invalidate_dir_request(struct ceph_mds_request *req)
1815{
1816 struct inode *inode = req->r_locked_dir;
1817 struct ceph_inode_info *ci = ceph_inode(inode);
1818
1819 dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode);
1820 spin_lock(&inode->i_lock);
1821 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1822 ci->i_release_count++;
1823 spin_unlock(&inode->i_lock);
1824
1825 if (req->r_dentry)
1826 ceph_invalidate_dentry_lease(req->r_dentry);
1827 if (req->r_old_dentry)
1828 ceph_invalidate_dentry_lease(req->r_old_dentry);
1829}
1830
1831/*
1750 * Handle mds reply. 1832 * Handle mds reply.
1751 * 1833 *
1752 * We take the session mutex and parse and process the reply immediately. 1834 * We take the session mutex and parse and process the reply immediately.
@@ -1797,6 +1879,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1797 mutex_unlock(&mdsc->mutex); 1879 mutex_unlock(&mdsc->mutex);
1798 goto out; 1880 goto out;
1799 } 1881 }
1882 if (req->r_got_safe && !head->safe) {
1883 pr_warning("got unsafe after safe on %llu from mds%d\n",
1884 tid, mds);
1885 mutex_unlock(&mdsc->mutex);
1886 goto out;
1887 }
1800 1888
1801 result = le32_to_cpu(head->result); 1889 result = le32_to_cpu(head->result);
1802 1890
@@ -1838,11 +1926,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1838 mutex_unlock(&mdsc->mutex); 1926 mutex_unlock(&mdsc->mutex);
1839 goto out; 1927 goto out;
1840 } 1928 }
1841 } 1929 } else {
1842
1843 BUG_ON(req->r_reply);
1844
1845 if (!head->safe) {
1846 req->r_got_unsafe = true; 1930 req->r_got_unsafe = true;
1847 list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); 1931 list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
1848 } 1932 }
@@ -1871,21 +1955,30 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1871 } 1955 }
1872 1956
1873 /* insert trace into our cache */ 1957 /* insert trace into our cache */
1958 mutex_lock(&req->r_fill_mutex);
1874 err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); 1959 err = ceph_fill_trace(mdsc->client->sb, req, req->r_session);
1875 if (err == 0) { 1960 if (err == 0) {
1876 if (result == 0 && rinfo->dir_nr) 1961 if (result == 0 && rinfo->dir_nr)
1877 ceph_readdir_prepopulate(req, req->r_session); 1962 ceph_readdir_prepopulate(req, req->r_session);
1878 ceph_unreserve_caps(&req->r_caps_reservation); 1963 ceph_unreserve_caps(&req->r_caps_reservation);
1879 } 1964 }
1965 mutex_unlock(&req->r_fill_mutex);
1880 1966
1881 up_read(&mdsc->snap_rwsem); 1967 up_read(&mdsc->snap_rwsem);
1882out_err: 1968out_err:
1883 if (err) { 1969 mutex_lock(&mdsc->mutex);
1884 req->r_err = err; 1970 if (!req->r_aborted) {
1971 if (err) {
1972 req->r_err = err;
1973 } else {
1974 req->r_reply = msg;
1975 ceph_msg_get(msg);
1976 req->r_got_result = true;
1977 }
1885 } else { 1978 } else {
1886 req->r_reply = msg; 1979 dout("reply arrived after request %lld was aborted\n", tid);
1887 ceph_msg_get(msg);
1888 } 1980 }
1981 mutex_unlock(&mdsc->mutex);
1889 1982
1890 add_cap_releases(mdsc, req->r_session, -1); 1983 add_cap_releases(mdsc, req->r_session, -1);
1891 mutex_unlock(&session->s_mutex); 1984 mutex_unlock(&session->s_mutex);
@@ -1984,6 +2077,8 @@ static void handle_session(struct ceph_mds_session *session,
1984 2077
1985 switch (op) { 2078 switch (op) {
1986 case CEPH_SESSION_OPEN: 2079 case CEPH_SESSION_OPEN:
2080 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
2081 pr_info("mds%d reconnect success\n", session->s_mds);
1987 session->s_state = CEPH_MDS_SESSION_OPEN; 2082 session->s_state = CEPH_MDS_SESSION_OPEN;
1988 renewed_caps(mdsc, session, 0); 2083 renewed_caps(mdsc, session, 0);
1989 wake = 1; 2084 wake = 1;
@@ -1997,10 +2092,12 @@ static void handle_session(struct ceph_mds_session *session,
1997 break; 2092 break;
1998 2093
1999 case CEPH_SESSION_CLOSE: 2094 case CEPH_SESSION_CLOSE:
2095 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
2096 pr_info("mds%d reconnect denied\n", session->s_mds);
2000 remove_session_caps(session); 2097 remove_session_caps(session);
2001 wake = 1; /* for good measure */ 2098 wake = 1; /* for good measure */
2002 complete(&mdsc->session_close_waiters); 2099 complete(&mdsc->session_close_waiters);
2003 kick_requests(mdsc, mds, 0); /* cur only */ 2100 kick_requests(mdsc, mds);
2004 break; 2101 break;
2005 2102
2006 case CEPH_SESSION_STALE: 2103 case CEPH_SESSION_STALE:
@@ -2132,54 +2229,44 @@ out:
2132 * 2229 *
2133 * called with mdsc->mutex held. 2230 * called with mdsc->mutex held.
2134 */ 2231 */
2135static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) 2232static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2233 struct ceph_mds_session *session)
2136{ 2234{
2137 struct ceph_mds_session *session = NULL;
2138 struct ceph_msg *reply; 2235 struct ceph_msg *reply;
2139 struct rb_node *p; 2236 struct rb_node *p;
2237 int mds = session->s_mds;
2140 int err = -ENOMEM; 2238 int err = -ENOMEM;
2141 struct ceph_pagelist *pagelist; 2239 struct ceph_pagelist *pagelist;
2142 2240
2143 pr_info("reconnect to recovering mds%d\n", mds); 2241 pr_info("mds%d reconnect start\n", mds);
2144 2242
2145 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); 2243 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
2146 if (!pagelist) 2244 if (!pagelist)
2147 goto fail_nopagelist; 2245 goto fail_nopagelist;
2148 ceph_pagelist_init(pagelist); 2246 ceph_pagelist_init(pagelist);
2149 2247
2150 reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, 0, 0, NULL); 2248 reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS);
2151 if (IS_ERR(reply)) { 2249 if (!reply)
2152 err = PTR_ERR(reply);
2153 goto fail_nomsg; 2250 goto fail_nomsg;
2154 }
2155
2156 /* find session */
2157 session = __ceph_lookup_mds_session(mdsc, mds);
2158 mutex_unlock(&mdsc->mutex); /* drop lock for duration */
2159 2251
2160 if (session) { 2252 mutex_lock(&session->s_mutex);
2161 mutex_lock(&session->s_mutex); 2253 session->s_state = CEPH_MDS_SESSION_RECONNECTING;
2254 session->s_seq = 0;
2162 2255
2163 session->s_state = CEPH_MDS_SESSION_RECONNECTING; 2256 ceph_con_open(&session->s_con,
2164 session->s_seq = 0; 2257 ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
2165 2258
2166 ceph_con_open(&session->s_con, 2259 /* replay unsafe requests */
2167 ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); 2260 replay_unsafe_requests(mdsc, session);
2168
2169 /* replay unsafe requests */
2170 replay_unsafe_requests(mdsc, session);
2171 } else {
2172 dout("no session for mds%d, will send short reconnect\n",
2173 mds);
2174 }
2175 2261
2176 down_read(&mdsc->snap_rwsem); 2262 down_read(&mdsc->snap_rwsem);
2177 2263
2178 if (!session)
2179 goto send;
2180 dout("session %p state %s\n", session, 2264 dout("session %p state %s\n", session,
2181 session_state_name(session->s_state)); 2265 session_state_name(session->s_state));
2182 2266
2267 /* drop old cap expires; we're about to reestablish that state */
2268 discard_cap_releases(mdsc, session);
2269
2183 /* traverse this session's caps */ 2270 /* traverse this session's caps */
2184 err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); 2271 err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps);
2185 if (err) 2272 if (err)
@@ -2208,36 +2295,29 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
2208 goto fail; 2295 goto fail;
2209 } 2296 }
2210 2297
2211send:
2212 reply->pagelist = pagelist; 2298 reply->pagelist = pagelist;
2213 reply->hdr.data_len = cpu_to_le32(pagelist->length); 2299 reply->hdr.data_len = cpu_to_le32(pagelist->length);
2214 reply->nr_pages = calc_pages_for(0, pagelist->length); 2300 reply->nr_pages = calc_pages_for(0, pagelist->length);
2215 ceph_con_send(&session->s_con, reply); 2301 ceph_con_send(&session->s_con, reply);
2216 2302
2217 session->s_state = CEPH_MDS_SESSION_OPEN;
2218 mutex_unlock(&session->s_mutex); 2303 mutex_unlock(&session->s_mutex);
2219 2304
2220 mutex_lock(&mdsc->mutex); 2305 mutex_lock(&mdsc->mutex);
2221 __wake_requests(mdsc, &session->s_waiting); 2306 __wake_requests(mdsc, &session->s_waiting);
2222 mutex_unlock(&mdsc->mutex); 2307 mutex_unlock(&mdsc->mutex);
2223 2308
2224 ceph_put_mds_session(session);
2225
2226 up_read(&mdsc->snap_rwsem); 2309 up_read(&mdsc->snap_rwsem);
2227 mutex_lock(&mdsc->mutex);
2228 return; 2310 return;
2229 2311
2230fail: 2312fail:
2231 ceph_msg_put(reply); 2313 ceph_msg_put(reply);
2232 up_read(&mdsc->snap_rwsem); 2314 up_read(&mdsc->snap_rwsem);
2233 mutex_unlock(&session->s_mutex); 2315 mutex_unlock(&session->s_mutex);
2234 ceph_put_mds_session(session);
2235fail_nomsg: 2316fail_nomsg:
2236 ceph_pagelist_release(pagelist); 2317 ceph_pagelist_release(pagelist);
2237 kfree(pagelist); 2318 kfree(pagelist);
2238fail_nopagelist: 2319fail_nopagelist:
2239 pr_err("error %d preparing reconnect for mds%d\n", err, mds); 2320 pr_err("error %d preparing reconnect for mds%d\n", err, mds);
2240 mutex_lock(&mdsc->mutex);
2241 return; 2321 return;
2242} 2322}
2243 2323
@@ -2290,7 +2370,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
2290 } 2370 }
2291 2371
2292 /* kick any requests waiting on the recovering mds */ 2372 /* kick any requests waiting on the recovering mds */
2293 kick_requests(mdsc, i, 1); 2373 kick_requests(mdsc, i);
2294 } else if (oldstate == newstate) { 2374 } else if (oldstate == newstate) {
2295 continue; /* nothing new with this mds */ 2375 continue; /* nothing new with this mds */
2296 } 2376 }
@@ -2299,22 +2379,21 @@ static void check_new_map(struct ceph_mds_client *mdsc,
2299 * send reconnect? 2379 * send reconnect?
2300 */ 2380 */
2301 if (s->s_state == CEPH_MDS_SESSION_RESTARTING && 2381 if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
2302 newstate >= CEPH_MDS_STATE_RECONNECT) 2382 newstate >= CEPH_MDS_STATE_RECONNECT) {
2303 send_mds_reconnect(mdsc, i); 2383 mutex_unlock(&mdsc->mutex);
2384 send_mds_reconnect(mdsc, s);
2385 mutex_lock(&mdsc->mutex);
2386 }
2304 2387
2305 /* 2388 /*
2306 * kick requests on any mds that has gone active. 2389 * kick request on any mds that has gone active.
2307 *
2308 * kick requests on cur or forwarder: we may have sent
2309 * the request to mds1, mds1 told us it forwarded it
2310 * to mds2, but then we learn mds1 failed and can't be
2311 * sure it successfully forwarded our request before
2312 * it died.
2313 */ 2390 */
2314 if (oldstate < CEPH_MDS_STATE_ACTIVE && 2391 if (oldstate < CEPH_MDS_STATE_ACTIVE &&
2315 newstate >= CEPH_MDS_STATE_ACTIVE) { 2392 newstate >= CEPH_MDS_STATE_ACTIVE) {
2316 pr_info("mds%d reconnect completed\n", s->s_mds); 2393 if (oldstate != CEPH_MDS_STATE_CREATING &&
2317 kick_requests(mdsc, i, 1); 2394 oldstate != CEPH_MDS_STATE_STARTING)
2395 pr_info("mds%d recovery completed\n", s->s_mds);
2396 kick_requests(mdsc, i);
2318 ceph_kick_flushing_caps(mdsc, s); 2397 ceph_kick_flushing_caps(mdsc, s);
2319 wake_up_session_caps(s, 1); 2398 wake_up_session_caps(s, 1);
2320 } 2399 }
@@ -2457,8 +2536,8 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
2457 dnamelen = dentry->d_name.len; 2536 dnamelen = dentry->d_name.len;
2458 len += dnamelen; 2537 len += dnamelen;
2459 2538
2460 msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, 0, 0, NULL); 2539 msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS);
2461 if (IS_ERR(msg)) 2540 if (!msg)
2462 return; 2541 return;
2463 lease = msg->front.iov_base; 2542 lease = msg->front.iov_base;
2464 lease->action = action; 2543 lease->action = action;
@@ -2603,7 +2682,9 @@ static void delayed_work(struct work_struct *work)
2603 else 2682 else
2604 ceph_con_keepalive(&s->s_con); 2683 ceph_con_keepalive(&s->s_con);
2605 add_cap_releases(mdsc, s, -1); 2684 add_cap_releases(mdsc, s, -1);
2606 send_cap_releases(mdsc, s); 2685 if (s->s_state == CEPH_MDS_SESSION_OPEN ||
2686 s->s_state == CEPH_MDS_SESSION_HUNG)
2687 send_cap_releases(mdsc, s);
2607 mutex_unlock(&s->s_mutex); 2688 mutex_unlock(&s->s_mutex);
2608 ceph_put_mds_session(s); 2689 ceph_put_mds_session(s);
2609 2690
@@ -2620,6 +2701,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
2620 mdsc->client = client; 2701 mdsc->client = client;
2621 mutex_init(&mdsc->mutex); 2702 mutex_init(&mdsc->mutex);
2622 mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); 2703 mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
2704 if (mdsc->mdsmap == NULL)
2705 return -ENOMEM;
2706
2623 init_completion(&mdsc->safe_umount_waiters); 2707 init_completion(&mdsc->safe_umount_waiters);
2624 init_completion(&mdsc->session_close_waiters); 2708 init_completion(&mdsc->session_close_waiters);
2625 INIT_LIST_HEAD(&mdsc->waiting_for_map); 2709 INIT_LIST_HEAD(&mdsc->waiting_for_map);
@@ -2645,6 +2729,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
2645 init_waitqueue_head(&mdsc->cap_flushing_wq); 2729 init_waitqueue_head(&mdsc->cap_flushing_wq);
2646 spin_lock_init(&mdsc->dentry_lru_lock); 2730 spin_lock_init(&mdsc->dentry_lru_lock);
2647 INIT_LIST_HEAD(&mdsc->dentry_lru); 2731 INIT_LIST_HEAD(&mdsc->dentry_lru);
2732
2648 return 0; 2733 return 0;
2649} 2734}
2650 2735
@@ -2740,6 +2825,9 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
2740{ 2825{
2741 u64 want_tid, want_flush; 2826 u64 want_tid, want_flush;
2742 2827
2828 if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
2829 return;
2830
2743 dout("sync\n"); 2831 dout("sync\n");
2744 mutex_lock(&mdsc->mutex); 2832 mutex_lock(&mdsc->mutex);
2745 want_tid = mdsc->last_tid; 2833 want_tid = mdsc->last_tid;
@@ -2922,9 +3010,10 @@ static void con_put(struct ceph_connection *con)
2922static void peer_reset(struct ceph_connection *con) 3010static void peer_reset(struct ceph_connection *con)
2923{ 3011{
2924 struct ceph_mds_session *s = con->private; 3012 struct ceph_mds_session *s = con->private;
3013 struct ceph_mds_client *mdsc = s->s_mdsc;
2925 3014
2926 pr_err("mds%d gave us the boot. IMPLEMENT RECONNECT.\n", 3015 pr_warning("mds%d closed our session\n", s->s_mds);
2927 s->s_mds); 3016 send_mds_reconnect(mdsc, s);
2928} 3017}
2929 3018
2930static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) 3019static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
@@ -3031,7 +3120,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
3031 return ceph_monc_validate_auth(&mdsc->client->monc); 3120 return ceph_monc_validate_auth(&mdsc->client->monc);
3032} 3121}
3033 3122
3034const static struct ceph_connection_operations mds_con_ops = { 3123static const struct ceph_connection_operations mds_con_ops = {
3035 .get = con_get, 3124 .get = con_get,
3036 .put = con_put, 3125 .put = con_put,
3037 .dispatch = dispatch, 3126 .dispatch = dispatch,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 961cc6f65878..d9936c4f1212 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -165,6 +165,8 @@ struct ceph_mds_request {
165 struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ 165 struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
166 struct inode *r_target_inode; /* resulting inode */ 166 struct inode *r_target_inode; /* resulting inode */
167 167
168 struct mutex r_fill_mutex;
169
168 union ceph_mds_request_args r_args; 170 union ceph_mds_request_args r_args;
169 int r_fmode; /* file mode, if expecting cap */ 171 int r_fmode; /* file mode, if expecting cap */
170 172
@@ -213,7 +215,7 @@ struct ceph_mds_request {
213 struct completion r_safe_completion; 215 struct completion r_safe_completion;
214 ceph_mds_request_callback_t r_callback; 216 ceph_mds_request_callback_t r_callback;
215 struct list_head r_unsafe_item; /* per-session unsafe list item */ 217 struct list_head r_unsafe_item; /* per-session unsafe list item */
216 bool r_got_unsafe, r_got_safe; 218 bool r_got_unsafe, r_got_safe, r_got_result;
217 219
218 bool r_did_prepopulate; 220 bool r_did_prepopulate;
219 u32 r_readdir_offset; 221 u32 r_readdir_offset;
@@ -301,6 +303,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
301 struct inode *inode, 303 struct inode *inode,
302 struct dentry *dn, int mask); 304 struct dentry *dn, int mask);
303 305
306extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
307
304extern struct ceph_mds_request * 308extern struct ceph_mds_request *
305ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); 309ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
306extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, 310extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index cd4fadb6491a..60b74839ebec 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -39,18 +39,6 @@ static void queue_con(struct ceph_connection *con);
39static void con_work(struct work_struct *); 39static void con_work(struct work_struct *);
40static void ceph_fault(struct ceph_connection *con); 40static void ceph_fault(struct ceph_connection *con);
41 41
42const char *ceph_name_type_str(int t)
43{
44 switch (t) {
45 case CEPH_ENTITY_TYPE_MON: return "mon";
46 case CEPH_ENTITY_TYPE_MDS: return "mds";
47 case CEPH_ENTITY_TYPE_OSD: return "osd";
48 case CEPH_ENTITY_TYPE_CLIENT: return "client";
49 case CEPH_ENTITY_TYPE_ADMIN: return "admin";
50 default: return "???";
51 }
52}
53
54/* 42/*
55 * nicely render a sockaddr as a string. 43 * nicely render a sockaddr as a string.
56 */ 44 */
@@ -340,6 +328,7 @@ static void reset_connection(struct ceph_connection *con)
340 ceph_msg_put(con->out_msg); 328 ceph_msg_put(con->out_msg);
341 con->out_msg = NULL; 329 con->out_msg = NULL;
342 } 330 }
331 con->out_keepalive_pending = false;
343 con->in_seq = 0; 332 con->in_seq = 0;
344 con->in_seq_acked = 0; 333 con->in_seq_acked = 0;
345} 334}
@@ -357,6 +346,7 @@ void ceph_con_close(struct ceph_connection *con)
357 clear_bit(WRITE_PENDING, &con->state); 346 clear_bit(WRITE_PENDING, &con->state);
358 mutex_lock(&con->mutex); 347 mutex_lock(&con->mutex);
359 reset_connection(con); 348 reset_connection(con);
349 con->peer_global_seq = 0;
360 cancel_delayed_work(&con->work); 350 cancel_delayed_work(&con->work);
361 mutex_unlock(&con->mutex); 351 mutex_unlock(&con->mutex);
362 queue_con(con); 352 queue_con(con);
@@ -661,7 +651,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
661 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, 651 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
662 con->connect_seq, global_seq, proto); 652 con->connect_seq, global_seq, proto);
663 653
664 con->out_connect.features = CEPH_FEATURE_SUPPORTED; 654 con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT;
665 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); 655 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
666 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); 656 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
667 con->out_connect.global_seq = cpu_to_le32(global_seq); 657 con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1124,8 +1114,8 @@ static void fail_protocol(struct ceph_connection *con)
1124 1114
1125static int process_connect(struct ceph_connection *con) 1115static int process_connect(struct ceph_connection *con)
1126{ 1116{
1127 u64 sup_feat = CEPH_FEATURE_SUPPORTED; 1117 u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT;
1128 u64 req_feat = CEPH_FEATURE_REQUIRED; 1118 u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT;
1129 u64 server_feat = le64_to_cpu(con->in_reply.features); 1119 u64 server_feat = le64_to_cpu(con->in_reply.features);
1130 1120
1131 dout("process_connect on %p tag %d\n", con, (int)con->in_tag); 1121 dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
@@ -1233,6 +1223,7 @@ static int process_connect(struct ceph_connection *con)
1233 clear_bit(CONNECTING, &con->state); 1223 clear_bit(CONNECTING, &con->state);
1234 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); 1224 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
1235 con->connect_seq++; 1225 con->connect_seq++;
1226 con->peer_features = server_feat;
1236 dout("process_connect got READY gseq %d cseq %d (%d)\n", 1227 dout("process_connect got READY gseq %d cseq %d (%d)\n",
1237 con->peer_global_seq, 1228 con->peer_global_seq,
1238 le32_to_cpu(con->in_reply.connect_seq), 1229 le32_to_cpu(con->in_reply.connect_seq),
@@ -1402,19 +1393,17 @@ static int read_partial_message(struct ceph_connection *con)
1402 con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); 1393 con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
1403 if (skip) { 1394 if (skip) {
1404 /* skip this message */ 1395 /* skip this message */
1405 dout("alloc_msg returned NULL, skipping message\n"); 1396 dout("alloc_msg said skip message\n");
1406 con->in_base_pos = -front_len - middle_len - data_len - 1397 con->in_base_pos = -front_len - middle_len - data_len -
1407 sizeof(m->footer); 1398 sizeof(m->footer);
1408 con->in_tag = CEPH_MSGR_TAG_READY; 1399 con->in_tag = CEPH_MSGR_TAG_READY;
1409 con->in_seq++; 1400 con->in_seq++;
1410 return 0; 1401 return 0;
1411 } 1402 }
1412 if (IS_ERR(con->in_msg)) { 1403 if (!con->in_msg) {
1413 ret = PTR_ERR(con->in_msg);
1414 con->in_msg = NULL;
1415 con->error_msg = 1404 con->error_msg =
1416 "error allocating memory for incoming message"; 1405 "error allocating memory for incoming message";
1417 return ret; 1406 return -ENOMEM;
1418 } 1407 }
1419 m = con->in_msg; 1408 m = con->in_msg;
1420 m->front.iov_len = 0; /* haven't read it yet */ 1409 m->front.iov_len = 0; /* haven't read it yet */
@@ -1514,14 +1503,14 @@ static void process_message(struct ceph_connection *con)
1514 1503
1515 /* if first message, set peer_name */ 1504 /* if first message, set peer_name */
1516 if (con->peer_name.type == 0) 1505 if (con->peer_name.type == 0)
1517 con->peer_name = msg->hdr.src.name; 1506 con->peer_name = msg->hdr.src;
1518 1507
1519 con->in_seq++; 1508 con->in_seq++;
1520 mutex_unlock(&con->mutex); 1509 mutex_unlock(&con->mutex);
1521 1510
1522 dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", 1511 dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n",
1523 msg, le64_to_cpu(msg->hdr.seq), 1512 msg, le64_to_cpu(msg->hdr.seq),
1524 ENTITY_NAME(msg->hdr.src.name), 1513 ENTITY_NAME(msg->hdr.src),
1525 le16_to_cpu(msg->hdr.type), 1514 le16_to_cpu(msg->hdr.type),
1526 ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), 1515 ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
1527 le32_to_cpu(msg->hdr.front_len), 1516 le32_to_cpu(msg->hdr.front_len),
@@ -1546,7 +1535,6 @@ static int try_write(struct ceph_connection *con)
1546 dout("try_write start %p state %lu nref %d\n", con, con->state, 1535 dout("try_write start %p state %lu nref %d\n", con, con->state,
1547 atomic_read(&con->nref)); 1536 atomic_read(&con->nref));
1548 1537
1549 mutex_lock(&con->mutex);
1550more: 1538more:
1551 dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); 1539 dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
1552 1540
@@ -1639,7 +1627,6 @@ do_next:
1639done: 1627done:
1640 ret = 0; 1628 ret = 0;
1641out: 1629out:
1642 mutex_unlock(&con->mutex);
1643 dout("try_write done on %p\n", con); 1630 dout("try_write done on %p\n", con);
1644 return ret; 1631 return ret;
1645} 1632}
@@ -1651,7 +1638,6 @@ out:
1651 */ 1638 */
1652static int try_read(struct ceph_connection *con) 1639static int try_read(struct ceph_connection *con)
1653{ 1640{
1654 struct ceph_messenger *msgr;
1655 int ret = -1; 1641 int ret = -1;
1656 1642
1657 if (!con->sock) 1643 if (!con->sock)
@@ -1661,9 +1647,6 @@ static int try_read(struct ceph_connection *con)
1661 return 0; 1647 return 0;
1662 1648
1663 dout("try_read start on %p\n", con); 1649 dout("try_read start on %p\n", con);
1664 msgr = con->msgr;
1665
1666 mutex_lock(&con->mutex);
1667 1650
1668more: 1651more:
1669 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, 1652 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
@@ -1758,7 +1741,6 @@ more:
1758done: 1741done:
1759 ret = 0; 1742 ret = 0;
1760out: 1743out:
1761 mutex_unlock(&con->mutex);
1762 dout("try_read done on %p\n", con); 1744 dout("try_read done on %p\n", con);
1763 return ret; 1745 return ret;
1764 1746
@@ -1830,6 +1812,8 @@ more:
1830 dout("con_work %p start, clearing QUEUED\n", con); 1812 dout("con_work %p start, clearing QUEUED\n", con);
1831 clear_bit(QUEUED, &con->state); 1813 clear_bit(QUEUED, &con->state);
1832 1814
1815 mutex_lock(&con->mutex);
1816
1833 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1817 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
1834 dout("con_work CLOSED\n"); 1818 dout("con_work CLOSED\n");
1835 con_close_socket(con); 1819 con_close_socket(con);
@@ -1844,11 +1828,16 @@ more:
1844 if (test_and_clear_bit(SOCK_CLOSED, &con->state) || 1828 if (test_and_clear_bit(SOCK_CLOSED, &con->state) ||
1845 try_read(con) < 0 || 1829 try_read(con) < 0 ||
1846 try_write(con) < 0) { 1830 try_write(con) < 0) {
1831 mutex_unlock(&con->mutex);
1847 backoff = 1; 1832 backoff = 1;
1848 ceph_fault(con); /* error/fault path */ 1833 ceph_fault(con); /* error/fault path */
1834 goto done_unlocked;
1849 } 1835 }
1850 1836
1851done: 1837done:
1838 mutex_unlock(&con->mutex);
1839
1840done_unlocked:
1852 clear_bit(BUSY, &con->state); 1841 clear_bit(BUSY, &con->state);
1853 dout("con->state=%lu\n", con->state); 1842 dout("con->state=%lu\n", con->state);
1854 if (test_bit(QUEUED, &con->state)) { 1843 if (test_bit(QUEUED, &con->state)) {
@@ -1947,7 +1936,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
1947 1936
1948 /* the zero page is needed if a request is "canceled" while the message 1937 /* the zero page is needed if a request is "canceled" while the message
1949 * is being written over the socket */ 1938 * is being written over the socket */
1950 msgr->zero_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1939 msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO);
1951 if (!msgr->zero_page) { 1940 if (!msgr->zero_page) {
1952 kfree(msgr); 1941 kfree(msgr);
1953 return ERR_PTR(-ENOMEM); 1942 return ERR_PTR(-ENOMEM);
@@ -1987,9 +1976,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
1987 } 1976 }
1988 1977
1989 /* set src+dst */ 1978 /* set src+dst */
1990 msg->hdr.src.name = con->msgr->inst.name; 1979 msg->hdr.src = con->msgr->inst.name;
1991 msg->hdr.src.addr = con->msgr->my_enc_addr;
1992 msg->hdr.orig_src = msg->hdr.src;
1993 1980
1994 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); 1981 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
1995 1982
@@ -2083,12 +2070,11 @@ void ceph_con_keepalive(struct ceph_connection *con)
2083 * construct a new message with given type, size 2070 * construct a new message with given type, size
2084 * the new msg has a ref count of 1. 2071 * the new msg has a ref count of 1.
2085 */ 2072 */
2086struct ceph_msg *ceph_msg_new(int type, int front_len, 2073struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
2087 int page_len, int page_off, struct page **pages)
2088{ 2074{
2089 struct ceph_msg *m; 2075 struct ceph_msg *m;
2090 2076
2091 m = kmalloc(sizeof(*m), GFP_NOFS); 2077 m = kmalloc(sizeof(*m), flags);
2092 if (m == NULL) 2078 if (m == NULL)
2093 goto out; 2079 goto out;
2094 kref_init(&m->kref); 2080 kref_init(&m->kref);
@@ -2100,8 +2086,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
2100 m->hdr.version = 0; 2086 m->hdr.version = 0;
2101 m->hdr.front_len = cpu_to_le32(front_len); 2087 m->hdr.front_len = cpu_to_le32(front_len);
2102 m->hdr.middle_len = 0; 2088 m->hdr.middle_len = 0;
2103 m->hdr.data_len = cpu_to_le32(page_len); 2089 m->hdr.data_len = 0;
2104 m->hdr.data_off = cpu_to_le16(page_off); 2090 m->hdr.data_off = 0;
2105 m->hdr.reserved = 0; 2091 m->hdr.reserved = 0;
2106 m->footer.front_crc = 0; 2092 m->footer.front_crc = 0;
2107 m->footer.middle_crc = 0; 2093 m->footer.middle_crc = 0;
@@ -2115,11 +2101,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
2115 /* front */ 2101 /* front */
2116 if (front_len) { 2102 if (front_len) {
2117 if (front_len > PAGE_CACHE_SIZE) { 2103 if (front_len > PAGE_CACHE_SIZE) {
2118 m->front.iov_base = __vmalloc(front_len, GFP_NOFS, 2104 m->front.iov_base = __vmalloc(front_len, flags,
2119 PAGE_KERNEL); 2105 PAGE_KERNEL);
2120 m->front_is_vmalloc = true; 2106 m->front_is_vmalloc = true;
2121 } else { 2107 } else {
2122 m->front.iov_base = kmalloc(front_len, GFP_NOFS); 2108 m->front.iov_base = kmalloc(front_len, flags);
2123 } 2109 }
2124 if (m->front.iov_base == NULL) { 2110 if (m->front.iov_base == NULL) {
2125 pr_err("msg_new can't allocate %d bytes\n", 2111 pr_err("msg_new can't allocate %d bytes\n",
@@ -2135,19 +2121,18 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
2135 m->middle = NULL; 2121 m->middle = NULL;
2136 2122
2137 /* data */ 2123 /* data */
2138 m->nr_pages = calc_pages_for(page_off, page_len); 2124 m->nr_pages = 0;
2139 m->pages = pages; 2125 m->pages = NULL;
2140 m->pagelist = NULL; 2126 m->pagelist = NULL;
2141 2127
2142 dout("ceph_msg_new %p page %d~%d -> %d\n", m, page_off, page_len, 2128 dout("ceph_msg_new %p front %d\n", m, front_len);
2143 m->nr_pages);
2144 return m; 2129 return m;
2145 2130
2146out2: 2131out2:
2147 ceph_msg_put(m); 2132 ceph_msg_put(m);
2148out: 2133out:
2149 pr_err("msg_new can't create type %d len %d\n", type, front_len); 2134 pr_err("msg_new can't create type %d front %d\n", type, front_len);
2150 return ERR_PTR(-ENOMEM); 2135 return NULL;
2151} 2136}
2152 2137
2153/* 2138/*
@@ -2190,29 +2175,25 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
2190 mutex_unlock(&con->mutex); 2175 mutex_unlock(&con->mutex);
2191 msg = con->ops->alloc_msg(con, hdr, skip); 2176 msg = con->ops->alloc_msg(con, hdr, skip);
2192 mutex_lock(&con->mutex); 2177 mutex_lock(&con->mutex);
2193 if (IS_ERR(msg)) 2178 if (!msg || *skip)
2194 return msg;
2195
2196 if (*skip)
2197 return NULL; 2179 return NULL;
2198 } 2180 }
2199 if (!msg) { 2181 if (!msg) {
2200 *skip = 0; 2182 *skip = 0;
2201 msg = ceph_msg_new(type, front_len, 0, 0, NULL); 2183 msg = ceph_msg_new(type, front_len, GFP_NOFS);
2202 if (!msg) { 2184 if (!msg) {
2203 pr_err("unable to allocate msg type %d len %d\n", 2185 pr_err("unable to allocate msg type %d len %d\n",
2204 type, front_len); 2186 type, front_len);
2205 return ERR_PTR(-ENOMEM); 2187 return NULL;
2206 } 2188 }
2207 } 2189 }
2208 memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 2190 memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
2209 2191
2210 if (middle_len) { 2192 if (middle_len && !msg->middle) {
2211 ret = ceph_alloc_middle(con, msg); 2193 ret = ceph_alloc_middle(con, msg);
2212
2213 if (ret < 0) { 2194 if (ret < 0) {
2214 ceph_msg_put(msg); 2195 ceph_msg_put(msg);
2215 return msg; 2196 return NULL;
2216 } 2197 }
2217 } 2198 }
2218 2199
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index a5caf91cc971..00a9430b1ffc 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -49,10 +49,8 @@ struct ceph_connection_operations {
49 int *skip); 49 int *skip);
50}; 50};
51 51
52extern const char *ceph_name_type_str(int t);
53
54/* use format string %s%d */ 52/* use format string %s%d */
55#define ENTITY_NAME(n) ceph_name_type_str((n).type), le64_to_cpu((n).num) 53#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
56 54
57struct ceph_messenger { 55struct ceph_messenger {
58 struct ceph_entity_inst inst; /* my name+address */ 56 struct ceph_entity_inst inst; /* my name+address */
@@ -144,6 +142,7 @@ struct ceph_connection {
144 struct ceph_entity_addr peer_addr; /* peer address */ 142 struct ceph_entity_addr peer_addr; /* peer address */
145 struct ceph_entity_name peer_name; /* peer name */ 143 struct ceph_entity_name peer_name; /* peer name */
146 struct ceph_entity_addr peer_addr_for_me; 144 struct ceph_entity_addr peer_addr_for_me;
145 unsigned peer_features;
147 u32 connect_seq; /* identify the most recent connection 146 u32 connect_seq; /* identify the most recent connection
148 attempt for this connection, client */ 147 attempt for this connection, client */
149 u32 peer_global_seq; /* peer's global seq for this connection */ 148 u32 peer_global_seq; /* peer's global seq for this connection */
@@ -158,7 +157,6 @@ struct ceph_connection {
158 struct list_head out_queue; 157 struct list_head out_queue;
159 struct list_head out_sent; /* sending or sent but unacked */ 158 struct list_head out_sent; /* sending or sent but unacked */
160 u64 out_seq; /* last message queued for send */ 159 u64 out_seq; /* last message queued for send */
161 u64 out_seq_sent; /* last message sent */
162 bool out_keepalive_pending; 160 bool out_keepalive_pending;
163 161
164 u64 in_seq, in_seq_acked; /* last message received, acked */ 162 u64 in_seq, in_seq_acked; /* last message received, acked */
@@ -234,9 +232,7 @@ extern void ceph_con_keepalive(struct ceph_connection *con);
234extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); 232extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
235extern void ceph_con_put(struct ceph_connection *con); 233extern void ceph_con_put(struct ceph_connection *con);
236 234
237extern struct ceph_msg *ceph_msg_new(int type, int front_len, 235extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags);
238 int page_len, int page_off,
239 struct page **pages);
240extern void ceph_msg_kfree(struct ceph_msg *m); 236extern void ceph_msg_kfree(struct ceph_msg *m);
241 237
242 238
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 8fdc011ca956..f6510a476e7e 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -28,7 +28,7 @@
28 * resend any outstanding requests. 28 * resend any outstanding requests.
29 */ 29 */
30 30
31const static struct ceph_connection_operations mon_con_ops; 31static const struct ceph_connection_operations mon_con_ops;
32 32
33static int __validate_auth(struct ceph_mon_client *monc); 33static int __validate_auth(struct ceph_mon_client *monc);
34 34
@@ -104,6 +104,7 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
104 monc->pending_auth = 1; 104 monc->pending_auth = 1;
105 monc->m_auth->front.iov_len = len; 105 monc->m_auth->front.iov_len = len;
106 monc->m_auth->hdr.front_len = cpu_to_le32(len); 106 monc->m_auth->hdr.front_len = cpu_to_le32(len);
107 ceph_con_revoke(monc->con, monc->m_auth);
107 ceph_msg_get(monc->m_auth); /* keep our ref */ 108 ceph_msg_get(monc->m_auth); /* keep our ref */
108 ceph_con_send(monc->con, monc->m_auth); 109 ceph_con_send(monc->con, monc->m_auth);
109} 110}
@@ -187,16 +188,12 @@ static void __send_subscribe(struct ceph_mon_client *monc)
187 monc->want_next_osdmap); 188 monc->want_next_osdmap);
188 if ((__sub_expired(monc) && !monc->sub_sent) || 189 if ((__sub_expired(monc) && !monc->sub_sent) ||
189 monc->want_next_osdmap == 1) { 190 monc->want_next_osdmap == 1) {
190 struct ceph_msg *msg; 191 struct ceph_msg *msg = monc->m_subscribe;
191 struct ceph_mon_subscribe_item *i; 192 struct ceph_mon_subscribe_item *i;
192 void *p, *end; 193 void *p, *end;
193 194
194 msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, 0, 0, NULL);
195 if (!msg)
196 return;
197
198 p = msg->front.iov_base; 195 p = msg->front.iov_base;
199 end = p + msg->front.iov_len; 196 end = p + msg->front_max;
200 197
201 dout("__send_subscribe to 'mdsmap' %u+\n", 198 dout("__send_subscribe to 'mdsmap' %u+\n",
202 (unsigned)monc->have_mdsmap); 199 (unsigned)monc->have_mdsmap);
@@ -226,7 +223,8 @@ static void __send_subscribe(struct ceph_mon_client *monc)
226 223
227 msg->front.iov_len = p - msg->front.iov_base; 224 msg->front.iov_len = p - msg->front.iov_base;
228 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 225 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
229 ceph_con_send(monc->con, msg); 226 ceph_con_revoke(monc->con, msg);
227 ceph_con_send(monc->con, ceph_msg_get(msg));
230 228
231 monc->sub_sent = jiffies | 1; /* never 0 */ 229 monc->sub_sent = jiffies | 1; /* never 0 */
232 } 230 }
@@ -353,14 +351,14 @@ out:
353/* 351/*
354 * statfs 352 * statfs
355 */ 353 */
356static struct ceph_mon_statfs_request *__lookup_statfs( 354static struct ceph_mon_generic_request *__lookup_generic_req(
357 struct ceph_mon_client *monc, u64 tid) 355 struct ceph_mon_client *monc, u64 tid)
358{ 356{
359 struct ceph_mon_statfs_request *req; 357 struct ceph_mon_generic_request *req;
360 struct rb_node *n = monc->statfs_request_tree.rb_node; 358 struct rb_node *n = monc->generic_request_tree.rb_node;
361 359
362 while (n) { 360 while (n) {
363 req = rb_entry(n, struct ceph_mon_statfs_request, node); 361 req = rb_entry(n, struct ceph_mon_generic_request, node);
364 if (tid < req->tid) 362 if (tid < req->tid)
365 n = n->rb_left; 363 n = n->rb_left;
366 else if (tid > req->tid) 364 else if (tid > req->tid)
@@ -371,16 +369,16 @@ static struct ceph_mon_statfs_request *__lookup_statfs(
371 return NULL; 369 return NULL;
372} 370}
373 371
374static void __insert_statfs(struct ceph_mon_client *monc, 372static void __insert_generic_request(struct ceph_mon_client *monc,
375 struct ceph_mon_statfs_request *new) 373 struct ceph_mon_generic_request *new)
376{ 374{
377 struct rb_node **p = &monc->statfs_request_tree.rb_node; 375 struct rb_node **p = &monc->generic_request_tree.rb_node;
378 struct rb_node *parent = NULL; 376 struct rb_node *parent = NULL;
379 struct ceph_mon_statfs_request *req = NULL; 377 struct ceph_mon_generic_request *req = NULL;
380 378
381 while (*p) { 379 while (*p) {
382 parent = *p; 380 parent = *p;
383 req = rb_entry(parent, struct ceph_mon_statfs_request, node); 381 req = rb_entry(parent, struct ceph_mon_generic_request, node);
384 if (new->tid < req->tid) 382 if (new->tid < req->tid)
385 p = &(*p)->rb_left; 383 p = &(*p)->rb_left;
386 else if (new->tid > req->tid) 384 else if (new->tid > req->tid)
@@ -390,113 +388,157 @@ static void __insert_statfs(struct ceph_mon_client *monc,
390 } 388 }
391 389
392 rb_link_node(&new->node, parent, p); 390 rb_link_node(&new->node, parent, p);
393 rb_insert_color(&new->node, &monc->statfs_request_tree); 391 rb_insert_color(&new->node, &monc->generic_request_tree);
392}
393
394static void release_generic_request(struct kref *kref)
395{
396 struct ceph_mon_generic_request *req =
397 container_of(kref, struct ceph_mon_generic_request, kref);
398
399 if (req->reply)
400 ceph_msg_put(req->reply);
401 if (req->request)
402 ceph_msg_put(req->request);
403}
404
405static void put_generic_request(struct ceph_mon_generic_request *req)
406{
407 kref_put(&req->kref, release_generic_request);
408}
409
410static void get_generic_request(struct ceph_mon_generic_request *req)
411{
412 kref_get(&req->kref);
413}
414
415static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
416 struct ceph_msg_header *hdr,
417 int *skip)
418{
419 struct ceph_mon_client *monc = con->private;
420 struct ceph_mon_generic_request *req;
421 u64 tid = le64_to_cpu(hdr->tid);
422 struct ceph_msg *m;
423
424 mutex_lock(&monc->mutex);
425 req = __lookup_generic_req(monc, tid);
426 if (!req) {
427 dout("get_generic_reply %lld dne\n", tid);
428 *skip = 1;
429 m = NULL;
430 } else {
431 dout("get_generic_reply %lld got %p\n", tid, req->reply);
432 m = ceph_msg_get(req->reply);
433 /*
434 * we don't need to track the connection reading into
435 * this reply because we only have one open connection
436 * at a time, ever.
437 */
438 }
439 mutex_unlock(&monc->mutex);
440 return m;
394} 441}
395 442
396static void handle_statfs_reply(struct ceph_mon_client *monc, 443static void handle_statfs_reply(struct ceph_mon_client *monc,
397 struct ceph_msg *msg) 444 struct ceph_msg *msg)
398{ 445{
399 struct ceph_mon_statfs_request *req; 446 struct ceph_mon_generic_request *req;
400 struct ceph_mon_statfs_reply *reply = msg->front.iov_base; 447 struct ceph_mon_statfs_reply *reply = msg->front.iov_base;
401 u64 tid; 448 u64 tid = le64_to_cpu(msg->hdr.tid);
402 449
403 if (msg->front.iov_len != sizeof(*reply)) 450 if (msg->front.iov_len != sizeof(*reply))
404 goto bad; 451 goto bad;
405 tid = le64_to_cpu(msg->hdr.tid);
406 dout("handle_statfs_reply %p tid %llu\n", msg, tid); 452 dout("handle_statfs_reply %p tid %llu\n", msg, tid);
407 453
408 mutex_lock(&monc->mutex); 454 mutex_lock(&monc->mutex);
409 req = __lookup_statfs(monc, tid); 455 req = __lookup_generic_req(monc, tid);
410 if (req) { 456 if (req) {
411 *req->buf = reply->st; 457 *(struct ceph_statfs *)req->buf = reply->st;
412 req->result = 0; 458 req->result = 0;
459 get_generic_request(req);
413 } 460 }
414 mutex_unlock(&monc->mutex); 461 mutex_unlock(&monc->mutex);
415 if (req) 462 if (req) {
416 complete(&req->completion); 463 complete(&req->completion);
464 put_generic_request(req);
465 }
417 return; 466 return;
418 467
419bad: 468bad:
420 pr_err("corrupt statfs reply, no tid\n"); 469 pr_err("corrupt generic reply, no tid\n");
421 ceph_msg_dump(msg); 470 ceph_msg_dump(msg);
422} 471}
423 472
424/* 473/*
425 * (re)send a statfs request 474 * Do a synchronous statfs().
426 */ 475 */
427static int send_statfs(struct ceph_mon_client *monc, 476int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
428 struct ceph_mon_statfs_request *req)
429{ 477{
430 struct ceph_msg *msg; 478 struct ceph_mon_generic_request *req;
431 struct ceph_mon_statfs *h; 479 struct ceph_mon_statfs *h;
480 int err;
432 481
433 dout("send_statfs tid %llu\n", req->tid); 482 req = kzalloc(sizeof(*req), GFP_NOFS);
434 msg = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); 483 if (!req)
435 if (IS_ERR(msg)) 484 return -ENOMEM;
436 return PTR_ERR(msg); 485
437 req->request = msg; 486 kref_init(&req->kref);
438 msg->hdr.tid = cpu_to_le64(req->tid); 487 req->buf = buf;
439 h = msg->front.iov_base; 488 init_completion(&req->completion);
489
490 err = -ENOMEM;
491 req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS);
492 if (!req->request)
493 goto out;
494 req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS);
495 if (!req->reply)
496 goto out;
497
498 /* fill out request */
499 h = req->request->front.iov_base;
440 h->monhdr.have_version = 0; 500 h->monhdr.have_version = 0;
441 h->monhdr.session_mon = cpu_to_le16(-1); 501 h->monhdr.session_mon = cpu_to_le16(-1);
442 h->monhdr.session_mon_tid = 0; 502 h->monhdr.session_mon_tid = 0;
443 h->fsid = monc->monmap->fsid; 503 h->fsid = monc->monmap->fsid;
444 ceph_con_send(monc->con, msg);
445 return 0;
446}
447
448/*
449 * Do a synchronous statfs().
450 */
451int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
452{
453 struct ceph_mon_statfs_request req;
454 int err;
455
456 req.buf = buf;
457 init_completion(&req.completion);
458
459 /* allocate memory for reply */
460 err = ceph_msgpool_resv(&monc->msgpool_statfs_reply, 1);
461 if (err)
462 return err;
463 504
464 /* register request */ 505 /* register request */
465 mutex_lock(&monc->mutex); 506 mutex_lock(&monc->mutex);
466 req.tid = ++monc->last_tid; 507 req->tid = ++monc->last_tid;
467 req.last_attempt = jiffies; 508 req->request->hdr.tid = cpu_to_le64(req->tid);
468 req.delay = BASE_DELAY_INTERVAL; 509 __insert_generic_request(monc, req);
469 __insert_statfs(monc, &req); 510 monc->num_generic_requests++;
470 monc->num_statfs_requests++;
471 mutex_unlock(&monc->mutex); 511 mutex_unlock(&monc->mutex);
472 512
473 /* send request and wait */ 513 /* send request and wait */
474 err = send_statfs(monc, &req); 514 ceph_con_send(monc->con, ceph_msg_get(req->request));
475 if (!err) 515 err = wait_for_completion_interruptible(&req->completion);
476 err = wait_for_completion_interruptible(&req.completion);
477 516
478 mutex_lock(&monc->mutex); 517 mutex_lock(&monc->mutex);
479 rb_erase(&req.node, &monc->statfs_request_tree); 518 rb_erase(&req->node, &monc->generic_request_tree);
480 monc->num_statfs_requests--; 519 monc->num_generic_requests--;
481 ceph_msgpool_resv(&monc->msgpool_statfs_reply, -1);
482 mutex_unlock(&monc->mutex); 520 mutex_unlock(&monc->mutex);
483 521
484 if (!err) 522 if (!err)
485 err = req.result; 523 err = req->result;
524
525out:
526 kref_put(&req->kref, release_generic_request);
486 return err; 527 return err;
487} 528}
488 529
489/* 530/*
490 * Resend pending statfs requests. 531 * Resend pending statfs requests.
491 */ 532 */
492static void __resend_statfs(struct ceph_mon_client *monc) 533static void __resend_generic_request(struct ceph_mon_client *monc)
493{ 534{
494 struct ceph_mon_statfs_request *req; 535 struct ceph_mon_generic_request *req;
495 struct rb_node *p; 536 struct rb_node *p;
496 537
497 for (p = rb_first(&monc->statfs_request_tree); p; p = rb_next(p)) { 538 for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
498 req = rb_entry(p, struct ceph_mon_statfs_request, node); 539 req = rb_entry(p, struct ceph_mon_generic_request, node);
499 send_statfs(monc, req); 540 ceph_con_revoke(monc->con, req->request);
541 ceph_con_send(monc->con, ceph_msg_get(req->request));
500 } 542 }
501} 543}
502 544
@@ -586,26 +628,26 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
586 CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | 628 CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
587 CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; 629 CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS;
588 630
589 /* msg pools */ 631 /* msgs */
590 err = ceph_msgpool_init(&monc->msgpool_subscribe_ack, 632 err = -ENOMEM;
591 sizeof(struct ceph_mon_subscribe_ack), 1, false); 633 monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK,
592 if (err < 0) 634 sizeof(struct ceph_mon_subscribe_ack),
635 GFP_NOFS);
636 if (!monc->m_subscribe_ack)
593 goto out_monmap; 637 goto out_monmap;
594 err = ceph_msgpool_init(&monc->msgpool_statfs_reply, 638
595 sizeof(struct ceph_mon_statfs_reply), 0, false); 639 monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS);
596 if (err < 0) 640 if (!monc->m_subscribe)
597 goto out_pool1; 641 goto out_subscribe_ack;
598 err = ceph_msgpool_init(&monc->msgpool_auth_reply, 4096, 1, false); 642
599 if (err < 0) 643 monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS);
600 goto out_pool2; 644 if (!monc->m_auth_reply)
601 645 goto out_subscribe;
602 monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); 646
647 monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS);
603 monc->pending_auth = 0; 648 monc->pending_auth = 0;
604 if (IS_ERR(monc->m_auth)) { 649 if (!monc->m_auth)
605 err = PTR_ERR(monc->m_auth); 650 goto out_auth_reply;
606 monc->m_auth = NULL;
607 goto out_pool3;
608 }
609 651
610 monc->cur_mon = -1; 652 monc->cur_mon = -1;
611 monc->hunting = true; 653 monc->hunting = true;
@@ -613,8 +655,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
613 monc->sub_sent = 0; 655 monc->sub_sent = 0;
614 656
615 INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); 657 INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
616 monc->statfs_request_tree = RB_ROOT; 658 monc->generic_request_tree = RB_ROOT;
617 monc->num_statfs_requests = 0; 659 monc->num_generic_requests = 0;
618 monc->last_tid = 0; 660 monc->last_tid = 0;
619 661
620 monc->have_mdsmap = 0; 662 monc->have_mdsmap = 0;
@@ -622,12 +664,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
622 monc->want_next_osdmap = 1; 664 monc->want_next_osdmap = 1;
623 return 0; 665 return 0;
624 666
625out_pool3: 667out_auth_reply:
626 ceph_msgpool_destroy(&monc->msgpool_auth_reply); 668 ceph_msg_put(monc->m_auth_reply);
627out_pool2: 669out_subscribe:
628 ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); 670 ceph_msg_put(monc->m_subscribe);
629out_pool1: 671out_subscribe_ack:
630 ceph_msgpool_destroy(&monc->msgpool_statfs_reply); 672 ceph_msg_put(monc->m_subscribe_ack);
631out_monmap: 673out_monmap:
632 kfree(monc->monmap); 674 kfree(monc->monmap);
633out: 675out:
@@ -651,9 +693,9 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
651 ceph_auth_destroy(monc->auth); 693 ceph_auth_destroy(monc->auth);
652 694
653 ceph_msg_put(monc->m_auth); 695 ceph_msg_put(monc->m_auth);
654 ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); 696 ceph_msg_put(monc->m_auth_reply);
655 ceph_msgpool_destroy(&monc->msgpool_statfs_reply); 697 ceph_msg_put(monc->m_subscribe);
656 ceph_msgpool_destroy(&monc->msgpool_auth_reply); 698 ceph_msg_put(monc->m_subscribe_ack);
657 699
658 kfree(monc->monmap); 700 kfree(monc->monmap);
659} 701}
@@ -681,7 +723,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
681 monc->client->msgr->inst.name.num = monc->auth->global_id; 723 monc->client->msgr->inst.name.num = monc->auth->global_id;
682 724
683 __send_subscribe(monc); 725 __send_subscribe(monc);
684 __resend_statfs(monc); 726 __resend_generic_request(monc);
685 } 727 }
686 mutex_unlock(&monc->mutex); 728 mutex_unlock(&monc->mutex);
687} 729}
@@ -770,18 +812,17 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
770 812
771 switch (type) { 813 switch (type) {
772 case CEPH_MSG_MON_SUBSCRIBE_ACK: 814 case CEPH_MSG_MON_SUBSCRIBE_ACK:
773 m = ceph_msgpool_get(&monc->msgpool_subscribe_ack, front_len); 815 m = ceph_msg_get(monc->m_subscribe_ack);
774 break; 816 break;
775 case CEPH_MSG_STATFS_REPLY: 817 case CEPH_MSG_STATFS_REPLY:
776 m = ceph_msgpool_get(&monc->msgpool_statfs_reply, front_len); 818 return get_generic_reply(con, hdr, skip);
777 break;
778 case CEPH_MSG_AUTH_REPLY: 819 case CEPH_MSG_AUTH_REPLY:
779 m = ceph_msgpool_get(&monc->msgpool_auth_reply, front_len); 820 m = ceph_msg_get(monc->m_auth_reply);
780 break; 821 break;
781 case CEPH_MSG_MON_MAP: 822 case CEPH_MSG_MON_MAP:
782 case CEPH_MSG_MDS_MAP: 823 case CEPH_MSG_MDS_MAP:
783 case CEPH_MSG_OSD_MAP: 824 case CEPH_MSG_OSD_MAP:
784 m = ceph_msg_new(type, front_len, 0, 0, NULL); 825 m = ceph_msg_new(type, front_len, GFP_NOFS);
785 break; 826 break;
786 } 827 }
787 828
@@ -826,7 +867,7 @@ out:
826 mutex_unlock(&monc->mutex); 867 mutex_unlock(&monc->mutex);
827} 868}
828 869
829const static struct ceph_connection_operations mon_con_ops = { 870static const struct ceph_connection_operations mon_con_ops = {
830 .get = ceph_con_get, 871 .get = ceph_con_get,
831 .put = ceph_con_put, 872 .put = ceph_con_put,
832 .dispatch = dispatch, 873 .dispatch = dispatch,
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h
index b958ad5afa06..174d794321d0 100644
--- a/fs/ceph/mon_client.h
+++ b/fs/ceph/mon_client.h
@@ -2,10 +2,10 @@
2#define _FS_CEPH_MON_CLIENT_H 2#define _FS_CEPH_MON_CLIENT_H
3 3
4#include <linux/completion.h> 4#include <linux/completion.h>
5#include <linux/kref.h>
5#include <linux/rbtree.h> 6#include <linux/rbtree.h>
6 7
7#include "messenger.h" 8#include "messenger.h"
8#include "msgpool.h"
9 9
10struct ceph_client; 10struct ceph_client;
11struct ceph_mount_args; 11struct ceph_mount_args;
@@ -22,7 +22,7 @@ struct ceph_monmap {
22}; 22};
23 23
24struct ceph_mon_client; 24struct ceph_mon_client;
25struct ceph_mon_statfs_request; 25struct ceph_mon_generic_request;
26 26
27 27
28/* 28/*
@@ -40,17 +40,19 @@ struct ceph_mon_request {
40}; 40};
41 41
42/* 42/*
43 * statfs() is done a bit differently because we need to get data back 43 * ceph_mon_generic_request is being used for the statfs and poolop requests
44 * which are bening done a bit differently because we need to get data back
44 * to the caller 45 * to the caller
45 */ 46 */
46struct ceph_mon_statfs_request { 47struct ceph_mon_generic_request {
48 struct kref kref;
47 u64 tid; 49 u64 tid;
48 struct rb_node node; 50 struct rb_node node;
49 int result; 51 int result;
50 struct ceph_statfs *buf; 52 void *buf;
51 struct completion completion; 53 struct completion completion;
52 unsigned long last_attempt, delay; /* jiffies */
53 struct ceph_msg *request; /* original request */ 54 struct ceph_msg *request; /* original request */
55 struct ceph_msg *reply; /* and reply */
54}; 56};
55 57
56struct ceph_mon_client { 58struct ceph_mon_client {
@@ -61,7 +63,7 @@ struct ceph_mon_client {
61 struct delayed_work delayed_work; 63 struct delayed_work delayed_work;
62 64
63 struct ceph_auth_client *auth; 65 struct ceph_auth_client *auth;
64 struct ceph_msg *m_auth; 66 struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack;
65 int pending_auth; 67 int pending_auth;
66 68
67 bool hunting; 69 bool hunting;
@@ -70,14 +72,9 @@ struct ceph_mon_client {
70 struct ceph_connection *con; 72 struct ceph_connection *con;
71 bool have_fsid; 73 bool have_fsid;
72 74
73 /* msg pools */ 75 /* pending generic requests */
74 struct ceph_msgpool msgpool_subscribe_ack; 76 struct rb_root generic_request_tree;
75 struct ceph_msgpool msgpool_statfs_reply; 77 int num_generic_requests;
76 struct ceph_msgpool msgpool_auth_reply;
77
78 /* pending statfs requests */
79 struct rb_root statfs_request_tree;
80 int num_statfs_requests;
81 u64 last_tid; 78 u64 last_tid;
82 79
83 /* mds/osd map */ 80 /* mds/osd map */
diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c
index ca3b44a89f2d..dd65a6438131 100644
--- a/fs/ceph/msgpool.c
+++ b/fs/ceph/msgpool.c
@@ -7,180 +7,58 @@
7 7
8#include "msgpool.h" 8#include "msgpool.h"
9 9
10/* 10static void *alloc_fn(gfp_t gfp_mask, void *arg)
11 * We use msg pools to preallocate memory for messages we expect to 11{
12 * receive over the wire, to avoid getting ourselves into OOM 12 struct ceph_msgpool *pool = arg;
13 * conditions at unexpected times. We take use a few different 13 void *p;
14 * strategies:
15 *
16 * - for request/response type interactions, we preallocate the
17 * memory needed for the response when we generate the request.
18 *
19 * - for messages we can receive at any time from the MDS, we preallocate
20 * a pool of messages we can re-use.
21 *
22 * - for writeback, we preallocate some number of messages to use for
23 * requests and their replies, so that we always make forward
24 * progress.
25 *
26 * The msgpool behaves like a mempool_t, but keeps preallocated
27 * ceph_msgs strung together on a list_head instead of using a pointer
28 * vector. This avoids vector reallocation when we adjust the number
29 * of preallocated items (which happens frequently).
30 */
31 14
15 p = ceph_msg_new(0, pool->front_len, gfp_mask);
16 if (!p)
17 pr_err("msgpool %s alloc failed\n", pool->name);
18 return p;
19}
32 20
33/* 21static void free_fn(void *element, void *arg)
34 * Allocate or release as necessary to meet our target pool size.
35 */
36static int __fill_msgpool(struct ceph_msgpool *pool)
37{ 22{
38 struct ceph_msg *msg; 23 ceph_msg_put(element);
39
40 while (pool->num < pool->min) {
41 dout("fill_msgpool %p %d/%d allocating\n", pool, pool->num,
42 pool->min);
43 spin_unlock(&pool->lock);
44 msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL);
45 spin_lock(&pool->lock);
46 if (IS_ERR(msg))
47 return PTR_ERR(msg);
48 msg->pool = pool;
49 list_add(&msg->list_head, &pool->msgs);
50 pool->num++;
51 }
52 while (pool->num > pool->min) {
53 msg = list_first_entry(&pool->msgs, struct ceph_msg, list_head);
54 dout("fill_msgpool %p %d/%d releasing %p\n", pool, pool->num,
55 pool->min, msg);
56 list_del_init(&msg->list_head);
57 pool->num--;
58 ceph_msg_kfree(msg);
59 }
60 return 0;
61} 24}
62 25
63int ceph_msgpool_init(struct ceph_msgpool *pool, 26int ceph_msgpool_init(struct ceph_msgpool *pool,
64 int front_len, int min, bool blocking) 27 int front_len, int size, bool blocking, const char *name)
65{ 28{
66 int ret;
67
68 dout("msgpool_init %p front_len %d min %d\n", pool, front_len, min);
69 spin_lock_init(&pool->lock);
70 pool->front_len = front_len; 29 pool->front_len = front_len;
71 INIT_LIST_HEAD(&pool->msgs); 30 pool->pool = mempool_create(size, alloc_fn, free_fn, pool);
72 pool->num = 0; 31 if (!pool->pool)
73 pool->min = min; 32 return -ENOMEM;
74 pool->blocking = blocking; 33 pool->name = name;
75 init_waitqueue_head(&pool->wait); 34 return 0;
76
77 spin_lock(&pool->lock);
78 ret = __fill_msgpool(pool);
79 spin_unlock(&pool->lock);
80 return ret;
81} 35}
82 36
83void ceph_msgpool_destroy(struct ceph_msgpool *pool) 37void ceph_msgpool_destroy(struct ceph_msgpool *pool)
84{ 38{
85 dout("msgpool_destroy %p\n", pool); 39 mempool_destroy(pool->pool);
86 spin_lock(&pool->lock);
87 pool->min = 0;
88 __fill_msgpool(pool);
89 spin_unlock(&pool->lock);
90} 40}
91 41
92int ceph_msgpool_resv(struct ceph_msgpool *pool, int delta) 42struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
43 int front_len)
93{ 44{
94 int ret; 45 if (front_len > pool->front_len) {
95 46 pr_err("msgpool_get pool %s need front %d, pool size is %d\n",
96 spin_lock(&pool->lock); 47 pool->name, front_len, pool->front_len);
97 dout("msgpool_resv %p delta %d\n", pool, delta);
98 pool->min += delta;
99 ret = __fill_msgpool(pool);
100 spin_unlock(&pool->lock);
101 return ret;
102}
103
104struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len)
105{
106 wait_queue_t wait;
107 struct ceph_msg *msg;
108
109 if (front_len && front_len > pool->front_len) {
110 pr_err("msgpool_get pool %p need front %d, pool size is %d\n",
111 pool, front_len, pool->front_len);
112 WARN_ON(1); 48 WARN_ON(1);
113 49
114 /* try to alloc a fresh message */ 50 /* try to alloc a fresh message */
115 msg = ceph_msg_new(0, front_len, 0, 0, NULL); 51 return ceph_msg_new(0, front_len, GFP_NOFS);
116 if (!IS_ERR(msg))
117 return msg;
118 }
119
120 if (!front_len)
121 front_len = pool->front_len;
122
123 if (pool->blocking) {
124 /* mempool_t behavior; first try to alloc */
125 msg = ceph_msg_new(0, front_len, 0, 0, NULL);
126 if (!IS_ERR(msg))
127 return msg;
128 } 52 }
129 53
130 while (1) { 54 return mempool_alloc(pool->pool, GFP_NOFS);
131 spin_lock(&pool->lock);
132 if (likely(pool->num)) {
133 msg = list_entry(pool->msgs.next, struct ceph_msg,
134 list_head);
135 list_del_init(&msg->list_head);
136 pool->num--;
137 dout("msgpool_get %p got %p, now %d/%d\n", pool, msg,
138 pool->num, pool->min);
139 spin_unlock(&pool->lock);
140 return msg;
141 }
142 pr_err("msgpool_get %p now %d/%d, %s\n", pool, pool->num,
143 pool->min, pool->blocking ? "waiting" : "may fail");
144 spin_unlock(&pool->lock);
145
146 if (!pool->blocking) {
147 WARN_ON(1);
148
149 /* maybe we can allocate it now? */
150 msg = ceph_msg_new(0, front_len, 0, 0, NULL);
151 if (!IS_ERR(msg))
152 return msg;
153
154 pr_err("msgpool_get %p empty + alloc failed\n", pool);
155 return ERR_PTR(-ENOMEM);
156 }
157
158 init_wait(&wait);
159 prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
160 schedule();
161 finish_wait(&pool->wait, &wait);
162 }
163} 55}
164 56
165void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) 57void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
166{ 58{
167 spin_lock(&pool->lock); 59 /* reset msg front_len; user may have changed it */
168 if (pool->num < pool->min) { 60 msg->front.iov_len = pool->front_len;
169 /* reset msg front_len; user may have changed it */ 61 msg->hdr.front_len = cpu_to_le32(pool->front_len);
170 msg->front.iov_len = pool->front_len;
171 msg->hdr.front_len = cpu_to_le32(pool->front_len);
172 62
173 kref_set(&msg->kref, 1); /* retake a single ref */ 63 kref_init(&msg->kref); /* retake single ref */
174 list_add(&msg->list_head, &pool->msgs);
175 pool->num++;
176 dout("msgpool_put %p reclaim %p, now %d/%d\n", pool, msg,
177 pool->num, pool->min);
178 spin_unlock(&pool->lock);
179 wake_up(&pool->wait);
180 } else {
181 dout("msgpool_put %p drop %p, at %d/%d\n", pool, msg,
182 pool->num, pool->min);
183 spin_unlock(&pool->lock);
184 ceph_msg_kfree(msg);
185 }
186} 64}
diff --git a/fs/ceph/msgpool.h b/fs/ceph/msgpool.h
index bc834bfcd720..a362605f9368 100644
--- a/fs/ceph/msgpool.h
+++ b/fs/ceph/msgpool.h
@@ -1,6 +1,7 @@
1#ifndef _FS_CEPH_MSGPOOL 1#ifndef _FS_CEPH_MSGPOOL
2#define _FS_CEPH_MSGPOOL 2#define _FS_CEPH_MSGPOOL
3 3
4#include <linux/mempool.h>
4#include "messenger.h" 5#include "messenger.h"
5 6
6/* 7/*
@@ -8,18 +9,15 @@
8 * avoid unexpected OOM conditions. 9 * avoid unexpected OOM conditions.
9 */ 10 */
10struct ceph_msgpool { 11struct ceph_msgpool {
11 spinlock_t lock; 12 const char *name;
13 mempool_t *pool;
12 int front_len; /* preallocated payload size */ 14 int front_len; /* preallocated payload size */
13 struct list_head msgs; /* msgs in the pool; each has 1 ref */
14 int num, min; /* cur, min # msgs in the pool */
15 bool blocking;
16 wait_queue_head_t wait;
17}; 15};
18 16
19extern int ceph_msgpool_init(struct ceph_msgpool *pool, 17extern int ceph_msgpool_init(struct ceph_msgpool *pool,
20 int front_len, int size, bool blocking); 18 int front_len, int size, bool blocking,
19 const char *name);
21extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); 20extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
22extern int ceph_msgpool_resv(struct ceph_msgpool *, int delta);
23extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, 21extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
24 int front_len); 22 int front_len);
25extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); 23extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h
index 8aaab414f3f8..892a0298dfdf 100644
--- a/fs/ceph/msgr.h
+++ b/fs/ceph/msgr.h
@@ -50,7 +50,6 @@ struct ceph_entity_name {
50#define CEPH_ENTITY_TYPE_MDS 0x02 50#define CEPH_ENTITY_TYPE_MDS 0x02
51#define CEPH_ENTITY_TYPE_OSD 0x04 51#define CEPH_ENTITY_TYPE_OSD 0x04
52#define CEPH_ENTITY_TYPE_CLIENT 0x08 52#define CEPH_ENTITY_TYPE_CLIENT 0x08
53#define CEPH_ENTITY_TYPE_ADMIN 0x10
54#define CEPH_ENTITY_TYPE_AUTH 0x20 53#define CEPH_ENTITY_TYPE_AUTH 0x20
55 54
56#define CEPH_ENTITY_TYPE_ANY 0xFF 55#define CEPH_ENTITY_TYPE_ANY 0xFF
@@ -120,7 +119,7 @@ struct ceph_msg_connect_reply {
120/* 119/*
121 * message header 120 * message header
122 */ 121 */
123struct ceph_msg_header { 122struct ceph_msg_header_old {
124 __le64 seq; /* message seq# for this session */ 123 __le64 seq; /* message seq# for this session */
125 __le64 tid; /* transaction id */ 124 __le64 tid; /* transaction id */
126 __le16 type; /* message type */ 125 __le16 type; /* message type */
@@ -138,6 +137,24 @@ struct ceph_msg_header {
138 __le32 crc; /* header crc32c */ 137 __le32 crc; /* header crc32c */
139} __attribute__ ((packed)); 138} __attribute__ ((packed));
140 139
140struct ceph_msg_header {
141 __le64 seq; /* message seq# for this session */
142 __le64 tid; /* transaction id */
143 __le16 type; /* message type */
144 __le16 priority; /* priority. higher value == higher priority */
145 __le16 version; /* version of message encoding */
146
147 __le32 front_len; /* bytes in main payload */
148 __le32 middle_len;/* bytes in middle payload */
149 __le32 data_len; /* bytes of data payload */
150 __le16 data_off; /* sender: include full offset;
151 receiver: mask against ~PAGE_MASK */
152
153 struct ceph_entity_name src;
154 __le32 reserved;
155 __le32 crc; /* header crc32c */
156} __attribute__ ((packed));
157
141#define CEPH_MSG_PRIO_LOW 64 158#define CEPH_MSG_PRIO_LOW 64
142#define CEPH_MSG_PRIO_DEFAULT 127 159#define CEPH_MSG_PRIO_DEFAULT 127
143#define CEPH_MSG_PRIO_HIGH 196 160#define CEPH_MSG_PRIO_HIGH 196
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 3514f71ff85f..afa7bb3895c4 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -16,7 +16,7 @@
16#define OSD_OP_FRONT_LEN 4096 16#define OSD_OP_FRONT_LEN 4096
17#define OSD_OPREPLY_FRONT_LEN 512 17#define OSD_OPREPLY_FRONT_LEN 512
18 18
19const static struct ceph_connection_operations osd_con_ops; 19static const struct ceph_connection_operations osd_con_ops;
20static int __kick_requests(struct ceph_osd_client *osdc, 20static int __kick_requests(struct ceph_osd_client *osdc,
21 struct ceph_osd *kickosd); 21 struct ceph_osd *kickosd);
22 22
@@ -147,7 +147,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
147 req = kzalloc(sizeof(*req), GFP_NOFS); 147 req = kzalloc(sizeof(*req), GFP_NOFS);
148 } 148 }
149 if (req == NULL) 149 if (req == NULL)
150 return ERR_PTR(-ENOMEM); 150 return NULL;
151 151
152 req->r_osdc = osdc; 152 req->r_osdc = osdc;
153 req->r_mempool = use_mempool; 153 req->r_mempool = use_mempool;
@@ -164,10 +164,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
164 msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); 164 msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
165 else 165 else
166 msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, 166 msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
167 OSD_OPREPLY_FRONT_LEN, 0, 0, NULL); 167 OSD_OPREPLY_FRONT_LEN, GFP_NOFS);
168 if (IS_ERR(msg)) { 168 if (!msg) {
169 ceph_osdc_put_request(req); 169 ceph_osdc_put_request(req);
170 return ERR_PTR(PTR_ERR(msg)); 170 return NULL;
171 } 171 }
172 req->r_reply = msg; 172 req->r_reply = msg;
173 173
@@ -178,10 +178,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
178 if (use_mempool) 178 if (use_mempool)
179 msg = ceph_msgpool_get(&osdc->msgpool_op, 0); 179 msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
180 else 180 else
181 msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, 0, 0, NULL); 181 msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS);
182 if (IS_ERR(msg)) { 182 if (!msg) {
183 ceph_osdc_put_request(req); 183 ceph_osdc_put_request(req);
184 return ERR_PTR(PTR_ERR(msg)); 184 return NULL;
185 } 185 }
186 msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); 186 msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
187 memset(msg->front.iov_base, 0, msg->front.iov_len); 187 memset(msg->front.iov_base, 0, msg->front.iov_len);
@@ -715,7 +715,7 @@ static void handle_timeout(struct work_struct *work)
715 * should mark the osd as failed and we should find out about 715 * should mark the osd as failed and we should find out about
716 * it from an updated osd map. 716 * it from an updated osd map.
717 */ 717 */
718 while (!list_empty(&osdc->req_lru)) { 718 while (timeout && !list_empty(&osdc->req_lru)) {
719 req = list_entry(osdc->req_lru.next, struct ceph_osd_request, 719 req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
720 r_req_lru_item); 720 r_req_lru_item);
721 721
@@ -1078,6 +1078,7 @@ done:
1078 if (newmap) 1078 if (newmap)
1079 kick_requests(osdc, NULL); 1079 kick_requests(osdc, NULL);
1080 up_read(&osdc->map_sem); 1080 up_read(&osdc->map_sem);
1081 wake_up(&osdc->client->auth_wq);
1081 return; 1082 return;
1082 1083
1083bad: 1084bad:
@@ -1087,45 +1088,6 @@ bad:
1087 return; 1088 return;
1088} 1089}
1089 1090
1090
1091/*
1092 * A read request prepares specific pages that data is to be read into.
1093 * When a message is being read off the wire, we call prepare_pages to
1094 * find those pages.
1095 * 0 = success, -1 failure.
1096 */
1097static int __prepare_pages(struct ceph_connection *con,
1098 struct ceph_msg_header *hdr,
1099 struct ceph_osd_request *req,
1100 u64 tid,
1101 struct ceph_msg *m)
1102{
1103 struct ceph_osd *osd = con->private;
1104 struct ceph_osd_client *osdc;
1105 int ret = -1;
1106 int data_len = le32_to_cpu(hdr->data_len);
1107 unsigned data_off = le16_to_cpu(hdr->data_off);
1108
1109 int want = calc_pages_for(data_off & ~PAGE_MASK, data_len);
1110
1111 if (!osd)
1112 return -1;
1113
1114 osdc = osd->o_osdc;
1115
1116 dout("__prepare_pages on msg %p tid %llu, has %d pages, want %d\n", m,
1117 tid, req->r_num_pages, want);
1118 if (unlikely(req->r_num_pages < want))
1119 goto out;
1120 m->pages = req->r_pages;
1121 m->nr_pages = req->r_num_pages;
1122 ret = 0; /* success */
1123out:
1124 BUG_ON(ret < 0 || m->nr_pages < want);
1125
1126 return ret;
1127}
1128
1129/* 1091/*
1130 * Register request, send initial attempt. 1092 * Register request, send initial attempt.
1131 */ 1093 */
@@ -1252,11 +1214,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
1252 if (!osdc->req_mempool) 1214 if (!osdc->req_mempool)
1253 goto out; 1215 goto out;
1254 1216
1255 err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true); 1217 err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
1218 "osd_op");
1256 if (err < 0) 1219 if (err < 0)
1257 goto out_mempool; 1220 goto out_mempool;
1258 err = ceph_msgpool_init(&osdc->msgpool_op_reply, 1221 err = ceph_msgpool_init(&osdc->msgpool_op_reply,
1259 OSD_OPREPLY_FRONT_LEN, 10, true); 1222 OSD_OPREPLY_FRONT_LEN, 10, true,
1223 "osd_op_reply");
1260 if (err < 0) 1224 if (err < 0)
1261 goto out_msgpool; 1225 goto out_msgpool;
1262 return 0; 1226 return 0;
@@ -1302,8 +1266,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
1302 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 1266 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
1303 NULL, 0, truncate_seq, truncate_size, NULL, 1267 NULL, 0, truncate_seq, truncate_size, NULL,
1304 false, 1); 1268 false, 1);
1305 if (IS_ERR(req)) 1269 if (!req)
1306 return PTR_ERR(req); 1270 return -ENOMEM;
1307 1271
1308 /* it may be a short read due to an object boundary */ 1272 /* it may be a short read due to an object boundary */
1309 req->r_pages = pages; 1273 req->r_pages = pages;
@@ -1345,8 +1309,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
1345 snapc, do_sync, 1309 snapc, do_sync,
1346 truncate_seq, truncate_size, mtime, 1310 truncate_seq, truncate_size, mtime,
1347 nofail, 1); 1311 nofail, 1);
1348 if (IS_ERR(req)) 1312 if (!req)
1349 return PTR_ERR(req); 1313 return -ENOMEM;
1350 1314
1351 /* it may be a short write due to an object boundary */ 1315 /* it may be a short write due to an object boundary */
1352 req->r_pages = pages; 1316 req->r_pages = pages;
@@ -1394,7 +1358,8 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
1394} 1358}
1395 1359
1396/* 1360/*
1397 * lookup and return message for incoming reply 1361 * lookup and return message for incoming reply. set up reply message
1362 * pages.
1398 */ 1363 */
1399static struct ceph_msg *get_reply(struct ceph_connection *con, 1364static struct ceph_msg *get_reply(struct ceph_connection *con,
1400 struct ceph_msg_header *hdr, 1365 struct ceph_msg_header *hdr,
@@ -1407,7 +1372,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
1407 int front = le32_to_cpu(hdr->front_len); 1372 int front = le32_to_cpu(hdr->front_len);
1408 int data_len = le32_to_cpu(hdr->data_len); 1373 int data_len = le32_to_cpu(hdr->data_len);
1409 u64 tid; 1374 u64 tid;
1410 int err;
1411 1375
1412 tid = le64_to_cpu(hdr->tid); 1376 tid = le64_to_cpu(hdr->tid);
1413 mutex_lock(&osdc->request_mutex); 1377 mutex_lock(&osdc->request_mutex);
@@ -1425,13 +1389,14 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
1425 req->r_reply, req->r_con_filling_msg); 1389 req->r_reply, req->r_con_filling_msg);
1426 ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); 1390 ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
1427 ceph_con_put(req->r_con_filling_msg); 1391 ceph_con_put(req->r_con_filling_msg);
1392 req->r_con_filling_msg = NULL;
1428 } 1393 }
1429 1394
1430 if (front > req->r_reply->front.iov_len) { 1395 if (front > req->r_reply->front.iov_len) {
1431 pr_warning("get_reply front %d > preallocated %d\n", 1396 pr_warning("get_reply front %d > preallocated %d\n",
1432 front, (int)req->r_reply->front.iov_len); 1397 front, (int)req->r_reply->front.iov_len);
1433 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, 0, 0, NULL); 1398 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS);
1434 if (IS_ERR(m)) 1399 if (!m)
1435 goto out; 1400 goto out;
1436 ceph_msg_put(req->r_reply); 1401 ceph_msg_put(req->r_reply);
1437 req->r_reply = m; 1402 req->r_reply = m;
@@ -1439,12 +1404,19 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
1439 m = ceph_msg_get(req->r_reply); 1404 m = ceph_msg_get(req->r_reply);
1440 1405
1441 if (data_len > 0) { 1406 if (data_len > 0) {
1442 err = __prepare_pages(con, hdr, req, tid, m); 1407 unsigned data_off = le16_to_cpu(hdr->data_off);
1443 if (err < 0) { 1408 int want = calc_pages_for(data_off & ~PAGE_MASK, data_len);
1409
1410 if (unlikely(req->r_num_pages < want)) {
1411 pr_warning("tid %lld reply %d > expected %d pages\n",
1412 tid, want, m->nr_pages);
1444 *skip = 1; 1413 *skip = 1;
1445 ceph_msg_put(m); 1414 ceph_msg_put(m);
1446 m = ERR_PTR(err); 1415 m = NULL;
1416 goto out;
1447 } 1417 }
1418 m->pages = req->r_pages;
1419 m->nr_pages = req->r_num_pages;
1448 } 1420 }
1449 *skip = 0; 1421 *skip = 0;
1450 req->r_con_filling_msg = ceph_con_get(con); 1422 req->r_con_filling_msg = ceph_con_get(con);
@@ -1466,7 +1438,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
1466 1438
1467 switch (type) { 1439 switch (type) {
1468 case CEPH_MSG_OSD_MAP: 1440 case CEPH_MSG_OSD_MAP:
1469 return ceph_msg_new(type, front, 0, 0, NULL); 1441 return ceph_msg_new(type, front, GFP_NOFS);
1470 case CEPH_MSG_OSD_OPREPLY: 1442 case CEPH_MSG_OSD_OPREPLY:
1471 return get_reply(con, hdr, skip); 1443 return get_reply(con, hdr, skip);
1472 default: 1444 default:
@@ -1552,7 +1524,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
1552 return ceph_monc_validate_auth(&osdc->client->monc); 1524 return ceph_monc_validate_auth(&osdc->client->monc);
1553} 1525}
1554 1526
1555const static struct ceph_connection_operations osd_con_ops = { 1527static const struct ceph_connection_operations osd_con_ops = {
1556 .get = get_osd_con, 1528 .get = get_osd_con,
1557 .put = put_osd_con, 1529 .put = put_osd_con,
1558 .dispatch = dispatch, 1530 .dispatch = dispatch,
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index 5f8dbf7c745a..b6859f47d364 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -20,7 +20,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl)
20 20
21static int ceph_pagelist_addpage(struct ceph_pagelist *pl) 21static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
22{ 22{
23 struct page *page = alloc_page(GFP_NOFS); 23 struct page *page = __page_cache_alloc(GFP_NOFS);
24 if (!page) 24 if (!page)
25 return -ENOMEM; 25 return -ENOMEM;
26 pl->room += PAGE_SIZE; 26 pl->room += PAGE_SIZE;
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index fd56451a871f..8fcc023056c7 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -101,8 +101,8 @@ struct ceph_pg_pool {
101 __le64 snap_seq; /* seq for per-pool snapshot */ 101 __le64 snap_seq; /* seq for per-pool snapshot */
102 __le32 snap_epoch; /* epoch of last snap */ 102 __le32 snap_epoch; /* epoch of last snap */
103 __le32 num_snaps; 103 __le32 num_snaps;
104 __le32 num_removed_snap_intervals; 104 __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */
105 __le64 uid; 105 __le64 auid; /* who owns the pg */
106} __attribute__ ((packed)); 106} __attribute__ ((packed));
107 107
108/* 108/*
@@ -208,6 +208,7 @@ enum {
208 /* read */ 208 /* read */
209 CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, 209 CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
210 CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, 210 CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
211 CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
211 212
212 /* write */ 213 /* write */
213 CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, 214 CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
@@ -305,6 +306,22 @@ enum {
305#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ 306#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
306#define EBLACKLISTED ESHUTDOWN /* blacklisted */ 307#define EBLACKLISTED ESHUTDOWN /* blacklisted */
307 308
309/* xattr comparison */
310enum {
311 CEPH_OSD_CMPXATTR_OP_NOP = 0,
312 CEPH_OSD_CMPXATTR_OP_EQ = 1,
313 CEPH_OSD_CMPXATTR_OP_NE = 2,
314 CEPH_OSD_CMPXATTR_OP_GT = 3,
315 CEPH_OSD_CMPXATTR_OP_GTE = 4,
316 CEPH_OSD_CMPXATTR_OP_LT = 5,
317 CEPH_OSD_CMPXATTR_OP_LTE = 6
318};
319
320enum {
321 CEPH_OSD_CMPXATTR_MODE_STRING = 1,
322 CEPH_OSD_CMPXATTR_MODE_U64 = 2
323};
324
308/* 325/*
309 * an individual object operation. each may be accompanied by some data 326 * an individual object operation. each may be accompanied by some data
310 * payload 327 * payload
@@ -321,6 +338,8 @@ struct ceph_osd_op {
321 struct { 338 struct {
322 __le32 name_len; 339 __le32 name_len;
323 __le32 value_len; 340 __le32 value_len;
341 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
342 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
324 } __attribute__ ((packed)) xattr; 343 } __attribute__ ((packed)) xattr;
325 struct { 344 struct {
326 __u8 class_len; 345 __u8 class_len;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index d5114db70453..c0b26b6badba 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -512,7 +512,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
512 struct ceph_cap_snap *capsnap) 512 struct ceph_cap_snap *capsnap)
513{ 513{
514 struct inode *inode = &ci->vfs_inode; 514 struct inode *inode = &ci->vfs_inode;
515 struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; 515 struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
516 516
517 BUG_ON(capsnap->writing); 517 BUG_ON(capsnap->writing);
518 capsnap->size = inode->i_size; 518 capsnap->size = inode->i_size;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 110857ba9269..7c663d9b9f81 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -8,14 +8,11 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/parser.h> 10#include <linux/parser.h>
11#include <linux/rwsem.h>
12#include <linux/sched.h> 11#include <linux/sched.h>
13#include <linux/seq_file.h> 12#include <linux/seq_file.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
15#include <linux/statfs.h> 14#include <linux/statfs.h>
16#include <linux/string.h> 15#include <linux/string.h>
17#include <linux/version.h>
18#include <linux/vmalloc.h>
19 16
20#include "decode.h" 17#include "decode.h"
21#include "super.h" 18#include "super.h"
@@ -107,12 +104,40 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
107static int ceph_syncfs(struct super_block *sb, int wait) 104static int ceph_syncfs(struct super_block *sb, int wait)
108{ 105{
109 dout("sync_fs %d\n", wait); 106 dout("sync_fs %d\n", wait);
110 ceph_osdc_sync(&ceph_client(sb)->osdc); 107 ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc);
111 ceph_mdsc_sync(&ceph_client(sb)->mdsc); 108 ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc);
112 dout("sync_fs %d done\n", wait); 109 dout("sync_fs %d done\n", wait);
113 return 0; 110 return 0;
114} 111}
115 112
113static int default_congestion_kb(void)
114{
115 int congestion_kb;
116
117 /*
118 * Copied from NFS
119 *
120 * congestion size, scale with available memory.
121 *
122 * 64MB: 8192k
123 * 128MB: 11585k
124 * 256MB: 16384k
125 * 512MB: 23170k
126 * 1GB: 32768k
127 * 2GB: 46340k
128 * 4GB: 65536k
129 * 8GB: 92681k
130 * 16GB: 131072k
131 *
132 * This allows larger machines to have larger/more transfers.
133 * Limit the default to 256M
134 */
135 congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
136 if (congestion_kb > 256*1024)
137 congestion_kb = 256*1024;
138
139 return congestion_kb;
140}
116 141
117/** 142/**
118 * ceph_show_options - Show mount options in /proc/mounts 143 * ceph_show_options - Show mount options in /proc/mounts
@@ -138,6 +163,35 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
138 seq_puts(m, ",nocrc"); 163 seq_puts(m, ",nocrc");
139 if (args->flags & CEPH_OPT_NOASYNCREADDIR) 164 if (args->flags & CEPH_OPT_NOASYNCREADDIR)
140 seq_puts(m, ",noasyncreaddir"); 165 seq_puts(m, ",noasyncreaddir");
166
167 if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
168 seq_printf(m, ",mount_timeout=%d", args->mount_timeout);
169 if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
170 seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl);
171 if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
172 seq_printf(m, ",osdtimeout=%d", args->osd_timeout);
173 if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
174 seq_printf(m, ",osdkeepalivetimeout=%d",
175 args->osd_keepalive_timeout);
176 if (args->wsize)
177 seq_printf(m, ",wsize=%d", args->wsize);
178 if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
179 seq_printf(m, ",rsize=%d", args->rsize);
180 if (args->congestion_kb != default_congestion_kb())
181 seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb);
182 if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
183 seq_printf(m, ",caps_wanted_delay_min=%d",
184 args->caps_wanted_delay_min);
185 if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
186 seq_printf(m, ",caps_wanted_delay_max=%d",
187 args->caps_wanted_delay_max);
188 if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
189 seq_printf(m, ",cap_release_safety=%d",
190 args->cap_release_safety);
191 if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT)
192 seq_printf(m, ",readdir_max_entries=%d", args->max_readdir);
193 if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
194 seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes);
141 if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 195 if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
142 seq_printf(m, ",snapdirname=%s", args->snapdir_name); 196 seq_printf(m, ",snapdirname=%s", args->snapdir_name);
143 if (args->name) 197 if (args->name)
@@ -161,35 +215,6 @@ static void ceph_inode_init_once(void *foo)
161 inode_init_once(&ci->vfs_inode); 215 inode_init_once(&ci->vfs_inode);
162} 216}
163 217
164static int default_congestion_kb(void)
165{
166 int congestion_kb;
167
168 /*
169 * Copied from NFS
170 *
171 * congestion size, scale with available memory.
172 *
173 * 64MB: 8192k
174 * 128MB: 11585k
175 * 256MB: 16384k
176 * 512MB: 23170k
177 * 1GB: 32768k
178 * 2GB: 46340k
179 * 4GB: 65536k
180 * 8GB: 92681k
181 * 16GB: 131072k
182 *
183 * This allows larger machines to have larger/more transfers.
184 * Limit the default to 256M
185 */
186 congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
187 if (congestion_kb > 256*1024)
188 congestion_kb = 256*1024;
189
190 return congestion_kb;
191}
192
193static int __init init_caches(void) 218static int __init init_caches(void)
194{ 219{
195 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 220 ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
@@ -308,7 +333,9 @@ enum {
308 Opt_osd_idle_ttl, 333 Opt_osd_idle_ttl,
309 Opt_caps_wanted_delay_min, 334 Opt_caps_wanted_delay_min,
310 Opt_caps_wanted_delay_max, 335 Opt_caps_wanted_delay_max,
336 Opt_cap_release_safety,
311 Opt_readdir_max_entries, 337 Opt_readdir_max_entries,
338 Opt_readdir_max_bytes,
312 Opt_congestion_kb, 339 Opt_congestion_kb,
313 Opt_last_int, 340 Opt_last_int,
314 /* int args above */ 341 /* int args above */
@@ -339,7 +366,9 @@ static match_table_t arg_tokens = {
339 {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, 366 {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
340 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 367 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
341 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 368 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
369 {Opt_cap_release_safety, "cap_release_safety=%d"},
342 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 370 {Opt_readdir_max_entries, "readdir_max_entries=%d"},
371 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
343 {Opt_congestion_kb, "write_congestion_kb=%d"}, 372 {Opt_congestion_kb, "write_congestion_kb=%d"},
344 /* int args above */ 373 /* int args above */
345 {Opt_snapdirname, "snapdirname=%s"}, 374 {Opt_snapdirname, "snapdirname=%s"},
@@ -388,8 +417,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
388 args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 417 args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
389 args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 418 args->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
390 args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 419 args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
391 args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4; 420 args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
392 args->max_readdir = 1024; 421 args->max_readdir = CEPH_MAX_READDIR_DEFAULT;
422 args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
393 args->congestion_kb = default_congestion_kb(); 423 args->congestion_kb = default_congestion_kb();
394 424
395 /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ 425 /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
@@ -497,6 +527,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
497 case Opt_readdir_max_entries: 527 case Opt_readdir_max_entries:
498 args->max_readdir = intval; 528 args->max_readdir = intval;
499 break; 529 break;
530 case Opt_readdir_max_bytes:
531 args->max_readdir_bytes = intval;
532 break;
500 case Opt_congestion_kb: 533 case Opt_congestion_kb:
501 args->congestion_kb = intval; 534 args->congestion_kb = intval;
502 break; 535 break;
@@ -682,9 +715,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
682/* 715/*
683 * true if we have the mon map (and have thus joined the cluster) 716 * true if we have the mon map (and have thus joined the cluster)
684 */ 717 */
685static int have_mon_map(struct ceph_client *client) 718static int have_mon_and_osd_map(struct ceph_client *client)
686{ 719{
687 return client->monc.monmap && client->monc.monmap->epoch; 720 return client->monc.monmap && client->monc.monmap->epoch &&
721 client->osdc.osdmap && client->osdc.osdmap->epoch;
688} 722}
689 723
690/* 724/*
@@ -762,7 +796,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
762 if (err < 0) 796 if (err < 0)
763 goto out; 797 goto out;
764 798
765 while (!have_mon_map(client)) { 799 while (!have_mon_and_osd_map(client)) {
766 err = -EIO; 800 err = -EIO;
767 if (timeout && time_after_eq(jiffies, started + timeout)) 801 if (timeout && time_after_eq(jiffies, started + timeout))
768 goto out; 802 goto out;
@@ -770,8 +804,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
770 /* wait */ 804 /* wait */
771 dout("mount waiting for mon_map\n"); 805 dout("mount waiting for mon_map\n");
772 err = wait_event_interruptible_timeout(client->auth_wq, 806 err = wait_event_interruptible_timeout(client->auth_wq,
773 have_mon_map(client) || (client->auth_err < 0), 807 have_mon_and_osd_map(client) || (client->auth_err < 0),
774 timeout); 808 timeout);
775 if (err == -EINTR || err == -ERESTARTSYS) 809 if (err == -EINTR || err == -ERESTARTSYS)
776 goto out; 810 goto out;
777 if (client->auth_err < 0) { 811 if (client->auth_err < 0) {
@@ -884,6 +918,8 @@ static int ceph_compare_super(struct super_block *sb, void *data)
884/* 918/*
885 * construct our own bdi so we can control readahead, etc. 919 * construct our own bdi so we can control readahead, etc.
886 */ 920 */
921static atomic_long_t bdi_seq = ATOMIC_INIT(0);
922
887static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) 923static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
888{ 924{
889 int err; 925 int err;
@@ -893,7 +929,8 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
893 client->backing_dev_info.ra_pages = 929 client->backing_dev_info.ra_pages =
894 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) 930 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
895 >> PAGE_SHIFT; 931 >> PAGE_SHIFT;
896 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); 932 err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d",
933 atomic_long_inc_return(&bdi_seq));
897 if (!err) 934 if (!err)
898 sb->s_bdi = &client->backing_dev_info; 935 sb->s_bdi = &client->backing_dev_info;
899 return err; 936 return err;
@@ -932,9 +969,9 @@ static int ceph_get_sb(struct file_system_type *fs_type,
932 goto out; 969 goto out;
933 } 970 }
934 971
935 if (ceph_client(sb) != client) { 972 if (ceph_sb_to_client(sb) != client) {
936 ceph_destroy_client(client); 973 ceph_destroy_client(client);
937 client = ceph_client(sb); 974 client = ceph_sb_to_client(sb);
938 dout("get_sb got existing client %p\n", client); 975 dout("get_sb got existing client %p\n", client);
939 } else { 976 } else {
940 dout("get_sb using new client %p\n", client); 977 dout("get_sb using new client %p\n", client);
@@ -952,8 +989,7 @@ static int ceph_get_sb(struct file_system_type *fs_type,
952 989
953out_splat: 990out_splat:
954 ceph_mdsc_close_sessions(&client->mdsc); 991 ceph_mdsc_close_sessions(&client->mdsc);
955 up_write(&sb->s_umount); 992 deactivate_locked_super(sb);
956 deactivate_super(sb);
957 goto out_final; 993 goto out_final;
958 994
959out: 995out:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 13513b80d87f..3725c9ee9d08 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -52,24 +52,25 @@
52 52
53struct ceph_mount_args { 53struct ceph_mount_args {
54 int sb_flags; 54 int sb_flags;
55 int flags;
56 struct ceph_fsid fsid;
57 struct ceph_entity_addr my_addr;
55 int num_mon; 58 int num_mon;
56 struct ceph_entity_addr *mon_addr; 59 struct ceph_entity_addr *mon_addr;
57 int flags;
58 int mount_timeout; 60 int mount_timeout;
59 int osd_idle_ttl; 61 int osd_idle_ttl;
60 int caps_wanted_delay_min, caps_wanted_delay_max;
61 struct ceph_fsid fsid;
62 struct ceph_entity_addr my_addr;
63 int wsize;
64 int rsize; /* max readahead */
65 int max_readdir; /* max readdir size */
66 int congestion_kb; /* max readdir size */
67 int osd_timeout; 62 int osd_timeout;
68 int osd_keepalive_timeout; 63 int osd_keepalive_timeout;
64 int wsize;
65 int rsize; /* max readahead */
66 int congestion_kb; /* max writeback in flight */
67 int caps_wanted_delay_min, caps_wanted_delay_max;
68 int cap_release_safety;
69 int max_readdir; /* max readdir result (entires) */
70 int max_readdir_bytes; /* max readdir result (bytes) */
69 char *snapdir_name; /* default ".snap" */ 71 char *snapdir_name; /* default ".snap" */
70 char *name; 72 char *name;
71 char *secret; 73 char *secret;
72 int cap_release_safety;
73}; 74};
74 75
75/* 76/*
@@ -80,13 +81,14 @@ struct ceph_mount_args {
80#define CEPH_OSD_KEEPALIVE_DEFAULT 5 81#define CEPH_OSD_KEEPALIVE_DEFAULT 5
81#define CEPH_OSD_IDLE_TTL_DEFAULT 60 82#define CEPH_OSD_IDLE_TTL_DEFAULT 60
82#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ 83#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */
84#define CEPH_MAX_READDIR_DEFAULT 1024
85#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
83 86
84#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) 87#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
85#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) 88#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
86 89
87#define CEPH_SNAPDIRNAME_DEFAULT ".snap" 90#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
88#define CEPH_AUTH_NAME_DEFAULT "guest" 91#define CEPH_AUTH_NAME_DEFAULT "guest"
89
90/* 92/*
91 * Delay telling the MDS we no longer want caps, in case we reopen 93 * Delay telling the MDS we no longer want caps, in case we reopen
92 * the file. Delay a minimum amount of time, even if we send a cap 94 * the file. Delay a minimum amount of time, even if we send a cap
@@ -96,6 +98,7 @@ struct ceph_mount_args {
96#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ 98#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */
97#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ 99#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
98 100
101#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4)
99 102
100/* mount state */ 103/* mount state */
101enum { 104enum {
@@ -160,12 +163,6 @@ struct ceph_client {
160#endif 163#endif
161}; 164};
162 165
163static inline struct ceph_client *ceph_client(struct super_block *sb)
164{
165 return sb->s_fs_info;
166}
167
168
169/* 166/*
170 * File i/o capability. This tracks shared state with the metadata 167 * File i/o capability. This tracks shared state with the metadata
171 * server that allows us to cache or writeback attributes or to read 168 * server that allows us to cache or writeback attributes or to read
@@ -871,6 +868,7 @@ extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
871extern void ceph_dentry_lru_add(struct dentry *dn); 868extern void ceph_dentry_lru_add(struct dentry *dn);
872extern void ceph_dentry_lru_touch(struct dentry *dn); 869extern void ceph_dentry_lru_touch(struct dentry *dn);
873extern void ceph_dentry_lru_del(struct dentry *dn); 870extern void ceph_dentry_lru_del(struct dentry *dn);
871extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
874 872
875/* 873/*
876 * our d_ops vary depending on whether the inode is live, 874 * our d_ops vary depending on whether the inode is live,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 2845422907fc..68aeebc69681 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -7,7 +7,8 @@
7 7
8static bool ceph_is_valid_xattr(const char *name) 8static bool ceph_is_valid_xattr(const char *name)
9{ 9{
10 return !strncmp(name, XATTR_SECURITY_PREFIX, 10 return !strncmp(name, "ceph.", 5) ||
11 !strncmp(name, XATTR_SECURITY_PREFIX,
11 XATTR_SECURITY_PREFIX_LEN) || 12 XATTR_SECURITY_PREFIX_LEN) ||
12 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 13 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
13 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); 14 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
@@ -76,14 +77,14 @@ static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val,
76} 77}
77 78
78static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { 79static struct ceph_vxattr_cb ceph_dir_vxattrs[] = {
79 { true, "user.ceph.dir.entries", ceph_vxattrcb_entries}, 80 { true, "ceph.dir.entries", ceph_vxattrcb_entries},
80 { true, "user.ceph.dir.files", ceph_vxattrcb_files}, 81 { true, "ceph.dir.files", ceph_vxattrcb_files},
81 { true, "user.ceph.dir.subdirs", ceph_vxattrcb_subdirs}, 82 { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs},
82 { true, "user.ceph.dir.rentries", ceph_vxattrcb_rentries}, 83 { true, "ceph.dir.rentries", ceph_vxattrcb_rentries},
83 { true, "user.ceph.dir.rfiles", ceph_vxattrcb_rfiles}, 84 { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles},
84 { true, "user.ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, 85 { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs},
85 { true, "user.ceph.dir.rbytes", ceph_vxattrcb_rbytes}, 86 { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes},
86 { true, "user.ceph.dir.rctime", ceph_vxattrcb_rctime}, 87 { true, "ceph.dir.rctime", ceph_vxattrcb_rctime},
87 { true, NULL, NULL } 88 { true, NULL, NULL }
88}; 89};
89 90
@@ -107,7 +108,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
107} 108}
108 109
109static struct ceph_vxattr_cb ceph_file_vxattrs[] = { 110static struct ceph_vxattr_cb ceph_file_vxattrs[] = {
110 { true, "user.ceph.layout", ceph_vxattrcb_layout}, 111 { true, "ceph.layout", ceph_vxattrcb_layout},
111 { NULL, NULL } 112 { NULL, NULL }
112}; 113};
113 114
@@ -186,12 +187,6 @@ static int __set_xattr(struct ceph_inode_info *ci,
186 ci->i_xattrs.names_size -= xattr->name_len; 187 ci->i_xattrs.names_size -= xattr->name_len;
187 ci->i_xattrs.vals_size -= xattr->val_len; 188 ci->i_xattrs.vals_size -= xattr->val_len;
188 } 189 }
189 if (!xattr) {
190 pr_err("__set_xattr ENOMEM on %p %llx.%llx xattr %s=%s\n",
191 &ci->vfs_inode, ceph_vinop(&ci->vfs_inode), name,
192 xattr->val);
193 return -ENOMEM;
194 }
195 ci->i_xattrs.names_size += name_len; 190 ci->i_xattrs.names_size += name_len;
196 ci->i_xattrs.vals_size += val_len; 191 ci->i_xattrs.vals_size += val_len;
197 if (val) 192 if (val)
@@ -574,7 +569,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
574 ci->i_xattrs.version, ci->i_xattrs.index_version); 569 ci->i_xattrs.version, ci->i_xattrs.index_version);
575 570
576 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && 571 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
577 (ci->i_xattrs.index_version > ci->i_xattrs.version)) { 572 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
578 goto list_xattr; 573 goto list_xattr;
579 } else { 574 } else {
580 spin_unlock(&inode->i_lock); 575 spin_unlock(&inode->i_lock);
@@ -622,7 +617,7 @@ out:
622static int ceph_sync_setxattr(struct dentry *dentry, const char *name, 617static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
623 const char *value, size_t size, int flags) 618 const char *value, size_t size, int flags)
624{ 619{
625 struct ceph_client *client = ceph_client(dentry->d_sb); 620 struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
626 struct inode *inode = dentry->d_inode; 621 struct inode *inode = dentry->d_inode;
627 struct ceph_inode_info *ci = ceph_inode(inode); 622 struct ceph_inode_info *ci = ceph_inode(inode);
628 struct inode *parent_inode = dentry->d_parent->d_inode; 623 struct inode *parent_inode = dentry->d_parent->d_inode;
@@ -641,7 +636,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
641 return -ENOMEM; 636 return -ENOMEM;
642 err = -ENOMEM; 637 err = -ENOMEM;
643 for (i = 0; i < nr_pages; i++) { 638 for (i = 0; i < nr_pages; i++) {
644 pages[i] = alloc_page(GFP_NOFS); 639 pages[i] = __page_cache_alloc(GFP_NOFS);
645 if (!pages[i]) { 640 if (!pages[i]) {
646 nr_pages = i; 641 nr_pages = i;
647 goto out; 642 goto out;
@@ -779,7 +774,7 @@ out:
779 774
780static int ceph_send_removexattr(struct dentry *dentry, const char *name) 775static int ceph_send_removexattr(struct dentry *dentry, const char *name)
781{ 776{
782 struct ceph_client *client = ceph_client(dentry->d_sb); 777 struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
783 struct ceph_mds_client *mdsc = &client->mdsc; 778 struct ceph_mds_client *mdsc = &client->mdsc;
784 struct inode *inode = dentry->d_inode; 779 struct inode *inode = dentry->d_inode;
785 struct inode *parent_inode = dentry->d_parent->d_inode; 780 struct inode *parent_inode = dentry->d_parent->d_inode;
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index a20bea598933..cfd1ce34e0bc 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -492,17 +492,13 @@ compare_oid(unsigned long *oid1, unsigned int oid1len,
492 492
493int 493int
494decode_negTokenInit(unsigned char *security_blob, int length, 494decode_negTokenInit(unsigned char *security_blob, int length,
495 enum securityEnum *secType) 495 struct TCP_Server_Info *server)
496{ 496{
497 struct asn1_ctx ctx; 497 struct asn1_ctx ctx;
498 unsigned char *end; 498 unsigned char *end;
499 unsigned char *sequence_end; 499 unsigned char *sequence_end;
500 unsigned long *oid = NULL; 500 unsigned long *oid = NULL;
501 unsigned int cls, con, tag, oidlen, rc; 501 unsigned int cls, con, tag, oidlen, rc;
502 bool use_ntlmssp = false;
503 bool use_kerberos = false;
504 bool use_kerberosu2u = false;
505 bool use_mskerberos = false;
506 502
507 /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */ 503 /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */
508 504
@@ -510,11 +506,11 @@ decode_negTokenInit(unsigned char *security_blob, int length,
510 506
511 /* GSSAPI header */ 507 /* GSSAPI header */
512 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 508 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
513 cFYI(1, ("Error decoding negTokenInit header")); 509 cFYI(1, "Error decoding negTokenInit header");
514 return 0; 510 return 0;
515 } else if ((cls != ASN1_APL) || (con != ASN1_CON) 511 } else if ((cls != ASN1_APL) || (con != ASN1_CON)
516 || (tag != ASN1_EOC)) { 512 || (tag != ASN1_EOC)) {
517 cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag)); 513 cFYI(1, "cls = %d con = %d tag = %d", cls, con, tag);
518 return 0; 514 return 0;
519 } 515 }
520 516
@@ -535,56 +531,52 @@ decode_negTokenInit(unsigned char *security_blob, int length,
535 531
536 /* SPNEGO OID not present or garbled -- bail out */ 532 /* SPNEGO OID not present or garbled -- bail out */
537 if (!rc) { 533 if (!rc) {
538 cFYI(1, ("Error decoding negTokenInit header")); 534 cFYI(1, "Error decoding negTokenInit header");
539 return 0; 535 return 0;
540 } 536 }
541 537
542 /* SPNEGO */ 538 /* SPNEGO */
543 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 539 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
544 cFYI(1, ("Error decoding negTokenInit")); 540 cFYI(1, "Error decoding negTokenInit");
545 return 0; 541 return 0;
546 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 542 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
547 || (tag != ASN1_EOC)) { 543 || (tag != ASN1_EOC)) {
548 cFYI(1, 544 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 0",
549 ("cls = %d con = %d tag = %d end = %p (%d) exit 0", 545 cls, con, tag, end, *end);
550 cls, con, tag, end, *end));
551 return 0; 546 return 0;
552 } 547 }
553 548
554 /* negTokenInit */ 549 /* negTokenInit */
555 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 550 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
556 cFYI(1, ("Error decoding negTokenInit")); 551 cFYI(1, "Error decoding negTokenInit");
557 return 0; 552 return 0;
558 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 553 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
559 || (tag != ASN1_SEQ)) { 554 || (tag != ASN1_SEQ)) {
560 cFYI(1, 555 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 1",
561 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 556 cls, con, tag, end, *end);
562 cls, con, tag, end, *end));
563 return 0; 557 return 0;
564 } 558 }
565 559
566 /* sequence */ 560 /* sequence */
567 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 561 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
568 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 562 cFYI(1, "Error decoding 2nd part of negTokenInit");
569 return 0; 563 return 0;
570 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 564 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
571 || (tag != ASN1_EOC)) { 565 || (tag != ASN1_EOC)) {
572 cFYI(1, 566 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 0",
573 ("cls = %d con = %d tag = %d end = %p (%d) exit 0", 567 cls, con, tag, end, *end);
574 cls, con, tag, end, *end));
575 return 0; 568 return 0;
576 } 569 }
577 570
578 /* sequence of */ 571 /* sequence of */
579 if (asn1_header_decode 572 if (asn1_header_decode
580 (&ctx, &sequence_end, &cls, &con, &tag) == 0) { 573 (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
581 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 574 cFYI(1, "Error decoding 2nd part of negTokenInit");
582 return 0; 575 return 0;
583 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 576 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
584 || (tag != ASN1_SEQ)) { 577 || (tag != ASN1_SEQ)) {
585 cFYI(1, 578 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 1",
586 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 579 cls, con, tag, end, *end);
587 cls, con, tag, end, *end));
588 return 0; 580 return 0;
589 } 581 }
590 582
@@ -592,37 +584,33 @@ decode_negTokenInit(unsigned char *security_blob, int length,
592 while (!asn1_eoc_decode(&ctx, sequence_end)) { 584 while (!asn1_eoc_decode(&ctx, sequence_end)) {
593 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); 585 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
594 if (!rc) { 586 if (!rc) {
595 cFYI(1, 587 cFYI(1, "Error decoding negTokenInit hdr exit2");
596 ("Error decoding negTokenInit hdr exit2"));
597 return 0; 588 return 0;
598 } 589 }
599 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { 590 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
600 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { 591 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
601 592
602 cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx " 593 cFYI(1, "OID len = %d oid = 0x%lx 0x%lx "
603 "0x%lx 0x%lx", oidlen, *oid, 594 "0x%lx 0x%lx", oidlen, *oid,
604 *(oid + 1), *(oid + 2), *(oid + 3))); 595 *(oid + 1), *(oid + 2), *(oid + 3));
605 596
606 if (compare_oid(oid, oidlen, MSKRB5_OID, 597 if (compare_oid(oid, oidlen, MSKRB5_OID,
607 MSKRB5_OID_LEN) && 598 MSKRB5_OID_LEN))
608 !use_mskerberos) 599 server->sec_mskerberos = true;
609 use_mskerberos = true;
610 else if (compare_oid(oid, oidlen, KRB5U2U_OID, 600 else if (compare_oid(oid, oidlen, KRB5U2U_OID,
611 KRB5U2U_OID_LEN) && 601 KRB5U2U_OID_LEN))
612 !use_kerberosu2u) 602 server->sec_kerberosu2u = true;
613 use_kerberosu2u = true;
614 else if (compare_oid(oid, oidlen, KRB5_OID, 603 else if (compare_oid(oid, oidlen, KRB5_OID,
615 KRB5_OID_LEN) && 604 KRB5_OID_LEN))
616 !use_kerberos) 605 server->sec_kerberos = true;
617 use_kerberos = true;
618 else if (compare_oid(oid, oidlen, NTLMSSP_OID, 606 else if (compare_oid(oid, oidlen, NTLMSSP_OID,
619 NTLMSSP_OID_LEN)) 607 NTLMSSP_OID_LEN))
620 use_ntlmssp = true; 608 server->sec_ntlmssp = true;
621 609
622 kfree(oid); 610 kfree(oid);
623 } 611 }
624 } else { 612 } else {
625 cFYI(1, ("Should be an oid what is going on?")); 613 cFYI(1, "Should be an oid what is going on?");
626 } 614 }
627 } 615 }
628 616
@@ -632,54 +620,47 @@ decode_negTokenInit(unsigned char *security_blob, int length,
632 no mechListMic (e.g. NTLMSSP instead of KRB5) */ 620 no mechListMic (e.g. NTLMSSP instead of KRB5) */
633 if (ctx.error == ASN1_ERR_DEC_EMPTY) 621 if (ctx.error == ASN1_ERR_DEC_EMPTY)
634 goto decode_negtoken_exit; 622 goto decode_negtoken_exit;
635 cFYI(1, ("Error decoding last part negTokenInit exit3")); 623 cFYI(1, "Error decoding last part negTokenInit exit3");
636 return 0; 624 return 0;
637 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { 625 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
638 /* tag = 3 indicating mechListMIC */ 626 /* tag = 3 indicating mechListMIC */
639 cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", 627 cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
640 cls, con, tag, end, *end)); 628 cls, con, tag, end, *end);
641 return 0; 629 return 0;
642 } 630 }
643 631
644 /* sequence */ 632 /* sequence */
645 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 633 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
646 cFYI(1, ("Error decoding last part negTokenInit exit5")); 634 cFYI(1, "Error decoding last part negTokenInit exit5");
647 return 0; 635 return 0;
648 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 636 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
649 || (tag != ASN1_SEQ)) { 637 || (tag != ASN1_SEQ)) {
650 cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)", 638 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)",
651 cls, con, tag, end, *end)); 639 cls, con, tag, end, *end);
652 } 640 }
653 641
654 /* sequence of */ 642 /* sequence of */
655 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 643 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
656 cFYI(1, ("Error decoding last part negTokenInit exit 7")); 644 cFYI(1, "Error decoding last part negTokenInit exit 7");
657 return 0; 645 return 0;
658 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { 646 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
659 cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)", 647 cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
660 cls, con, tag, end, *end)); 648 cls, con, tag, end, *end);
661 return 0; 649 return 0;
662 } 650 }
663 651
664 /* general string */ 652 /* general string */
665 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 653 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
666 cFYI(1, ("Error decoding last part negTokenInit exit9")); 654 cFYI(1, "Error decoding last part negTokenInit exit9");
667 return 0; 655 return 0;
668 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) 656 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
669 || (tag != ASN1_GENSTR)) { 657 || (tag != ASN1_GENSTR)) {
670 cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", 658 cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)",
671 cls, con, tag, end, *end)); 659 cls, con, tag, end, *end);
672 return 0; 660 return 0;
673 } 661 }
674 cFYI(1, ("Need to call asn1_octets_decode() function for %s", 662 cFYI(1, "Need to call asn1_octets_decode() function for %s",
675 ctx.pointer)); /* is this UTF-8 or ASCII? */ 663 ctx.pointer); /* is this UTF-8 or ASCII? */
676decode_negtoken_exit: 664decode_negtoken_exit:
677 if (use_kerberos)
678 *secType = Kerberos;
679 else if (use_mskerberos)
680 *secType = MSKerberos;
681 else if (use_ntlmssp)
682 *secType = RawNTLMSSP;
683
684 return 1; 665 return 1;
685} 666}
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 42cec2a7c0cf..4fce6e61b34e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -60,10 +60,10 @@ cifs_dump_mem(char *label, void *data, int length)
60#ifdef CONFIG_CIFS_DEBUG2 60#ifdef CONFIG_CIFS_DEBUG2
61void cifs_dump_detail(struct smb_hdr *smb) 61void cifs_dump_detail(struct smb_hdr *smb)
62{ 62{
63 cERROR(1, ("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError, 64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid)); 65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
66 cERROR(1, ("smb buf %p len %d", smb, smbCalcSize_LE(smb))); 66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb));
67} 67}
68 68
69 69
@@ -75,25 +75,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
75 if (server == NULL) 75 if (server == NULL)
76 return; 76 return;
77 77
78 cERROR(1, ("Dump pending requests:")); 78 cERROR(1, "Dump pending requests:");
79 spin_lock(&GlobalMid_Lock); 79 spin_lock(&GlobalMid_Lock);
80 list_for_each(tmp, &server->pending_mid_q) { 80 list_for_each(tmp, &server->pending_mid_q) {
81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
82 cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", 82 cERROR(1, "State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
83 mid_entry->midState, 83 mid_entry->midState,
84 (int)mid_entry->command, 84 (int)mid_entry->command,
85 mid_entry->pid, 85 mid_entry->pid,
86 mid_entry->tsk, 86 mid_entry->tsk,
87 mid_entry->mid)); 87 mid_entry->mid);
88#ifdef CONFIG_CIFS_STATS2 88#ifdef CONFIG_CIFS_STATS2
89 cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", 89 cERROR(1, "IsLarge: %d buf: %p time rcv: %ld now: %ld",
90 mid_entry->largeBuf, 90 mid_entry->largeBuf,
91 mid_entry->resp_buf, 91 mid_entry->resp_buf,
92 mid_entry->when_received, 92 mid_entry->when_received,
93 jiffies)); 93 jiffies);
94#endif /* STATS2 */ 94#endif /* STATS2 */
95 cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, 95 cERROR(1, "IsMult: %d IsEnd: %d", mid_entry->multiRsp,
96 mid_entry->multiEnd)); 96 mid_entry->multiEnd);
97 if (mid_entry->resp_buf) { 97 if (mid_entry->resp_buf) {
98 cifs_dump_detail(mid_entry->resp_buf); 98 cifs_dump_detail(mid_entry->resp_buf);
99 cifs_dump_mem("existing buf: ", 99 cifs_dump_mem("existing buf: ",
@@ -716,7 +716,7 @@ static const struct file_operations cifs_multiuser_mount_proc_fops = {
716 716
717static int cifs_security_flags_proc_show(struct seq_file *m, void *v) 717static int cifs_security_flags_proc_show(struct seq_file *m, void *v)
718{ 718{
719 seq_printf(m, "0x%x\n", extended_security); 719 seq_printf(m, "0x%x\n", global_secflags);
720 return 0; 720 return 0;
721} 721}
722 722
@@ -744,13 +744,13 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
744 /* single char or single char followed by null */ 744 /* single char or single char followed by null */
745 c = flags_string[0]; 745 c = flags_string[0];
746 if (c == '0' || c == 'n' || c == 'N') { 746 if (c == '0' || c == 'n' || c == 'N') {
747 extended_security = CIFSSEC_DEF; /* default */ 747 global_secflags = CIFSSEC_DEF; /* default */
748 return count; 748 return count;
749 } else if (c == '1' || c == 'y' || c == 'Y') { 749 } else if (c == '1' || c == 'y' || c == 'Y') {
750 extended_security = CIFSSEC_MAX; 750 global_secflags = CIFSSEC_MAX;
751 return count; 751 return count;
752 } else if (!isdigit(c)) { 752 } else if (!isdigit(c)) {
753 cERROR(1, ("invalid flag %c", c)); 753 cERROR(1, "invalid flag %c", c);
754 return -EINVAL; 754 return -EINVAL;
755 } 755 }
756 } 756 }
@@ -758,26 +758,26 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
758 758
759 flags = simple_strtoul(flags_string, NULL, 0); 759 flags = simple_strtoul(flags_string, NULL, 0);
760 760
761 cFYI(1, ("sec flags 0x%x", flags)); 761 cFYI(1, "sec flags 0x%x", flags);
762 762
763 if (flags <= 0) { 763 if (flags <= 0) {
764 cERROR(1, ("invalid security flags %s", flags_string)); 764 cERROR(1, "invalid security flags %s", flags_string);
765 return -EINVAL; 765 return -EINVAL;
766 } 766 }
767 767
768 if (flags & ~CIFSSEC_MASK) { 768 if (flags & ~CIFSSEC_MASK) {
769 cERROR(1, ("attempt to set unsupported security flags 0x%x", 769 cERROR(1, "attempt to set unsupported security flags 0x%x",
770 flags & ~CIFSSEC_MASK)); 770 flags & ~CIFSSEC_MASK);
771 return -EINVAL; 771 return -EINVAL;
772 } 772 }
773 /* flags look ok - update the global security flags for cifs module */ 773 /* flags look ok - update the global security flags for cifs module */
774 extended_security = flags; 774 global_secflags = flags;
775 if (extended_security & CIFSSEC_MUST_SIGN) { 775 if (global_secflags & CIFSSEC_MUST_SIGN) {
776 /* requiring signing implies signing is allowed */ 776 /* requiring signing implies signing is allowed */
777 extended_security |= CIFSSEC_MAY_SIGN; 777 global_secflags |= CIFSSEC_MAY_SIGN;
778 cFYI(1, ("packet signing now required")); 778 cFYI(1, "packet signing now required");
779 } else if ((extended_security & CIFSSEC_MAY_SIGN) == 0) { 779 } else if ((global_secflags & CIFSSEC_MAY_SIGN) == 0) {
780 cFYI(1, ("packet signing disabled")); 780 cFYI(1, "packet signing disabled");
781 } 781 }
782 /* BB should we turn on MAY flags for other MUST options? */ 782 /* BB should we turn on MAY flags for other MUST options? */
783 return count; 783 return count;
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 5eb3b83bbfa7..aa316891ac0c 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -43,34 +43,54 @@ void dump_smb(struct smb_hdr *, int);
43 */ 43 */
44#ifdef CIFS_DEBUG 44#ifdef CIFS_DEBUG
45 45
46
47/* information message: e.g., configuration, major event */ 46/* information message: e.g., configuration, major event */
48extern int cifsFYI; 47extern int cifsFYI;
49#define cifsfyi(format,arg...) if (cifsFYI & CIFS_INFO) printk(KERN_DEBUG " " __FILE__ ": " format "\n" "" , ## arg) 48#define cifsfyi(fmt, arg...) \
49do { \
50 if (cifsFYI & CIFS_INFO) \
51 printk(KERN_DEBUG "%s: " fmt "\n", __FILE__, ##arg); \
52} while (0)
50 53
51#define cFYI(button,prspec) if (button) cifsfyi prspec 54#define cFYI(set, fmt, arg...) \
55do { \
56 if (set) \
57 cifsfyi(fmt, ##arg); \
58} while (0)
52 59
53#define cifswarn(format, arg...) printk(KERN_WARNING ": " format "\n" , ## arg) 60#define cifswarn(fmt, arg...) \
61 printk(KERN_WARNING fmt "\n", ##arg)
54 62
55/* debug event message: */ 63/* debug event message: */
56extern int cifsERROR; 64extern int cifsERROR;
57 65
58#define cEVENT(format,arg...) if (cifsERROR) printk(KERN_EVENT __FILE__ ": " format "\n" , ## arg) 66#define cEVENT(fmt, arg...) \
67do { \
68 if (cifsERROR) \
69 printk(KERN_EVENT "%s: " fmt "\n", __FILE__, ##arg); \
70} while (0)
59 71
60/* error event message: e.g., i/o error */ 72/* error event message: e.g., i/o error */
61#define cifserror(format,arg...) if (cifsERROR) printk(KERN_ERR " CIFS VFS: " format "\n" "" , ## arg) 73#define cifserror(fmt, arg...) \
74do { \
75 if (cifsERROR) \
76 printk(KERN_ERR "CIFS VFS: " fmt "\n", ##arg); \
77} while (0)
62 78
63#define cERROR(button, prspec) if (button) cifserror prspec 79#define cERROR(set, fmt, arg...) \
80do { \
81 if (set) \
82 cifserror(fmt, ##arg); \
83} while (0)
64 84
65/* 85/*
66 * debug OFF 86 * debug OFF
67 * --------- 87 * ---------
68 */ 88 */
69#else /* _CIFS_DEBUG */ 89#else /* _CIFS_DEBUG */
70#define cERROR(button, prspec) 90#define cERROR(set, fmt, arg...)
71#define cEVENT(format, arg...) 91#define cEVENT(fmt, arg...)
72#define cFYI(button, prspec) 92#define cFYI(set, fmt, arg...)
73#define cifserror(format, arg...) 93#define cifserror(fmt, arg...)
74#endif /* _CIFS_DEBUG */ 94#endif /* _CIFS_DEBUG */
75 95
76#endif /* _H_CIFS_DEBUG */ 96#endif /* _H_CIFS_DEBUG */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 78e4d2a3a68b..ac19a6f3dae0 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -85,8 +85,8 @@ static char *cifs_get_share_name(const char *node_name)
85 /* find server name end */ 85 /* find server name end */
86 pSep = memchr(UNC+2, '\\', len-2); 86 pSep = memchr(UNC+2, '\\', len-2);
87 if (!pSep) { 87 if (!pSep) {
88 cERROR(1, ("%s: no server name end in node name: %s", 88 cERROR(1, "%s: no server name end in node name: %s",
89 __func__, node_name)); 89 __func__, node_name);
90 kfree(UNC); 90 kfree(UNC);
91 return ERR_PTR(-EINVAL); 91 return ERR_PTR(-EINVAL);
92 } 92 }
@@ -142,8 +142,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
142 142
143 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 143 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
144 if (rc != 0) { 144 if (rc != 0) {
145 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", 145 cERROR(1, "%s: Failed to resolve server part of %s to IP: %d",
146 __func__, *devname, rc)); 146 __func__, *devname, rc);
147 goto compose_mount_options_err; 147 goto compose_mount_options_err;
148 } 148 }
149 /* md_len = strlen(...) + 12 for 'sep+prefixpath=' 149 /* md_len = strlen(...) + 12 for 'sep+prefixpath='
@@ -217,8 +217,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
217 strcat(mountdata, fullpath + ref->path_consumed); 217 strcat(mountdata, fullpath + ref->path_consumed);
218 } 218 }
219 219
220 /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/ 220 /*cFYI(1, "%s: parent mountdata: %s", __func__,sb_mountdata);*/
221 /*cFYI(1, ("%s: submount mountdata: %s", __func__, mountdata ));*/ 221 /*cFYI(1, "%s: submount mountdata: %s", __func__, mountdata );*/
222 222
223compose_mount_options_out: 223compose_mount_options_out:
224 kfree(srvIP); 224 kfree(srvIP);
@@ -294,11 +294,11 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
294 294
295static void dump_referral(const struct dfs_info3_param *ref) 295static void dump_referral(const struct dfs_info3_param *ref)
296{ 296{
297 cFYI(1, ("DFS: ref path: %s", ref->path_name)); 297 cFYI(1, "DFS: ref path: %s", ref->path_name);
298 cFYI(1, ("DFS: node path: %s", ref->node_name)); 298 cFYI(1, "DFS: node path: %s", ref->node_name);
299 cFYI(1, ("DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type)); 299 cFYI(1, "DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type);
300 cFYI(1, ("DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag, 300 cFYI(1, "DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag,
301 ref->path_consumed)); 301 ref->path_consumed);
302} 302}
303 303
304 304
@@ -314,7 +314,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
314 int rc = 0; 314 int rc = 0;
315 struct vfsmount *mnt = ERR_PTR(-ENOENT); 315 struct vfsmount *mnt = ERR_PTR(-ENOENT);
316 316
317 cFYI(1, ("in %s", __func__)); 317 cFYI(1, "in %s", __func__);
318 BUG_ON(IS_ROOT(dentry)); 318 BUG_ON(IS_ROOT(dentry));
319 319
320 xid = GetXid(); 320 xid = GetXid();
@@ -352,15 +352,15 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
352 /* connect to a node */ 352 /* connect to a node */
353 len = strlen(referrals[i].node_name); 353 len = strlen(referrals[i].node_name);
354 if (len < 2) { 354 if (len < 2) {
355 cERROR(1, ("%s: Net Address path too short: %s", 355 cERROR(1, "%s: Net Address path too short: %s",
356 __func__, referrals[i].node_name)); 356 __func__, referrals[i].node_name);
357 rc = -EINVAL; 357 rc = -EINVAL;
358 goto out_err; 358 goto out_err;
359 } 359 }
360 mnt = cifs_dfs_do_refmount(nd->path.mnt, 360 mnt = cifs_dfs_do_refmount(nd->path.mnt,
361 nd->path.dentry, referrals + i); 361 nd->path.dentry, referrals + i);
362 cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, 362 cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
363 referrals[i].node_name, mnt)); 363 referrals[i].node_name, mnt);
364 364
365 /* complete mount procedure if we accured submount */ 365 /* complete mount procedure if we accured submount */
366 if (!IS_ERR(mnt)) 366 if (!IS_ERR(mnt))
@@ -378,7 +378,7 @@ out:
378 FreeXid(xid); 378 FreeXid(xid);
379 free_dfs_info_array(referrals, num_referrals); 379 free_dfs_info_array(referrals, num_referrals);
380 kfree(full_path); 380 kfree(full_path);
381 cFYI(1, ("leaving %s" , __func__)); 381 cFYI(1, "leaving %s" , __func__);
382 return ERR_PTR(rc); 382 return ERR_PTR(rc);
383out_err: 383out_err:
384 path_put(&nd->path); 384 path_put(&nd->path);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 310d12f69a92..379bd7d9c05f 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -133,9 +133,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
133 dp = description + strlen(description); 133 dp = description + strlen(description);
134 134
135 /* for now, only sec=krb5 and sec=mskrb5 are valid */ 135 /* for now, only sec=krb5 and sec=mskrb5 are valid */
136 if (server->secType == Kerberos) 136 if (server->sec_kerberos)
137 sprintf(dp, ";sec=krb5"); 137 sprintf(dp, ";sec=krb5");
138 else if (server->secType == MSKerberos) 138 else if (server->sec_mskerberos)
139 sprintf(dp, ";sec=mskrb5"); 139 sprintf(dp, ";sec=mskrb5");
140 else 140 else
141 goto out; 141 goto out;
@@ -149,7 +149,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
149 dp = description + strlen(description); 149 dp = description + strlen(description);
150 sprintf(dp, ";pid=0x%x", current->pid); 150 sprintf(dp, ";pid=0x%x", current->pid);
151 151
152 cFYI(1, ("key description = %s", description)); 152 cFYI(1, "key description = %s", description);
153 spnego_key = request_key(&cifs_spnego_key_type, description, ""); 153 spnego_key = request_key(&cifs_spnego_key_type, description, "");
154 154
155#ifdef CONFIG_CIFS_DEBUG2 155#ifdef CONFIG_CIFS_DEBUG2
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index d07676bd76d2..430f510a1720 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -200,9 +200,8 @@ cifs_strtoUCS(__le16 *to, const char *from, int len,
200 /* works for 2.4.0 kernel or later */ 200 /* works for 2.4.0 kernel or later */
201 charlen = codepage->char2uni(from, len, &wchar_to[i]); 201 charlen = codepage->char2uni(from, len, &wchar_to[i]);
202 if (charlen < 1) { 202 if (charlen < 1) {
203 cERROR(1, 203 cERROR(1, "strtoUCS: char2uni of %d returned %d",
204 ("strtoUCS: char2uni of %d returned %d", 204 (int)*from, charlen);
205 (int)*from, charlen));
206 /* A question mark */ 205 /* A question mark */
207 to[i] = cpu_to_le16(0x003f); 206 to[i] = cpu_to_le16(0x003f);
208 charlen = 1; 207 charlen = 1;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 9b716d044bbd..85d7cf7ff2c8 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -87,11 +87,11 @@ int match_sid(struct cifs_sid *ctsid)
87 continue; /* all sub_auth values do not match */ 87 continue; /* all sub_auth values do not match */
88 } 88 }
89 89
90 cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname)); 90 cFYI(1, "matching sid: %s\n", wksidarr[i].sidname);
91 return 0; /* sids compare/match */ 91 return 0; /* sids compare/match */
92 } 92 }
93 93
94 cFYI(1, ("No matching sid")); 94 cFYI(1, "No matching sid");
95 return -1; 95 return -1;
96} 96}
97 97
@@ -208,14 +208,14 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
208 *pbits_to_set &= ~S_IXUGO; 208 *pbits_to_set &= ~S_IXUGO;
209 return; 209 return;
210 } else if (type != ACCESS_ALLOWED) { 210 } else if (type != ACCESS_ALLOWED) {
211 cERROR(1, ("unknown access control type %d", type)); 211 cERROR(1, "unknown access control type %d", type);
212 return; 212 return;
213 } 213 }
214 /* else ACCESS_ALLOWED type */ 214 /* else ACCESS_ALLOWED type */
215 215
216 if (flags & GENERIC_ALL) { 216 if (flags & GENERIC_ALL) {
217 *pmode |= (S_IRWXUGO & (*pbits_to_set)); 217 *pmode |= (S_IRWXUGO & (*pbits_to_set));
218 cFYI(DBG2, ("all perms")); 218 cFYI(DBG2, "all perms");
219 return; 219 return;
220 } 220 }
221 if ((flags & GENERIC_WRITE) || 221 if ((flags & GENERIC_WRITE) ||
@@ -228,7 +228,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
228 ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) 228 ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
229 *pmode |= (S_IXUGO & (*pbits_to_set)); 229 *pmode |= (S_IXUGO & (*pbits_to_set));
230 230
231 cFYI(DBG2, ("access flags 0x%x mode now 0x%x", flags, *pmode)); 231 cFYI(DBG2, "access flags 0x%x mode now 0x%x", flags, *pmode);
232 return; 232 return;
233} 233}
234 234
@@ -257,7 +257,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
257 if (mode & S_IXUGO) 257 if (mode & S_IXUGO)
258 *pace_flags |= SET_FILE_EXEC_RIGHTS; 258 *pace_flags |= SET_FILE_EXEC_RIGHTS;
259 259
260 cFYI(DBG2, ("mode: 0x%x, access flags now 0x%x", mode, *pace_flags)); 260 cFYI(DBG2, "mode: 0x%x, access flags now 0x%x", mode, *pace_flags);
261 return; 261 return;
262} 262}
263 263
@@ -297,24 +297,24 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl)
297 /* validate that we do not go past end of acl */ 297 /* validate that we do not go past end of acl */
298 298
299 if (le16_to_cpu(pace->size) < 16) { 299 if (le16_to_cpu(pace->size) < 16) {
300 cERROR(1, ("ACE too small, %d", le16_to_cpu(pace->size))); 300 cERROR(1, "ACE too small %d", le16_to_cpu(pace->size));
301 return; 301 return;
302 } 302 }
303 303
304 if (end_of_acl < (char *)pace + le16_to_cpu(pace->size)) { 304 if (end_of_acl < (char *)pace + le16_to_cpu(pace->size)) {
305 cERROR(1, ("ACL too small to parse ACE")); 305 cERROR(1, "ACL too small to parse ACE");
306 return; 306 return;
307 } 307 }
308 308
309 num_subauth = pace->sid.num_subauth; 309 num_subauth = pace->sid.num_subauth;
310 if (num_subauth) { 310 if (num_subauth) {
311 int i; 311 int i;
312 cFYI(1, ("ACE revision %d num_auth %d type %d flags %d size %d", 312 cFYI(1, "ACE revision %d num_auth %d type %d flags %d size %d",
313 pace->sid.revision, pace->sid.num_subauth, pace->type, 313 pace->sid.revision, pace->sid.num_subauth, pace->type,
314 pace->flags, le16_to_cpu(pace->size))); 314 pace->flags, le16_to_cpu(pace->size));
315 for (i = 0; i < num_subauth; ++i) { 315 for (i = 0; i < num_subauth; ++i) {
316 cFYI(1, ("ACE sub_auth[%d]: 0x%x", i, 316 cFYI(1, "ACE sub_auth[%d]: 0x%x", i,
317 le32_to_cpu(pace->sid.sub_auth[i]))); 317 le32_to_cpu(pace->sid.sub_auth[i]));
318 } 318 }
319 319
320 /* BB add length check to make sure that we do not have huge 320 /* BB add length check to make sure that we do not have huge
@@ -347,13 +347,13 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
347 347
348 /* validate that we do not go past end of acl */ 348 /* validate that we do not go past end of acl */
349 if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) { 349 if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
350 cERROR(1, ("ACL too small to parse DACL")); 350 cERROR(1, "ACL too small to parse DACL");
351 return; 351 return;
352 } 352 }
353 353
354 cFYI(DBG2, ("DACL revision %d size %d num aces %d", 354 cFYI(DBG2, "DACL revision %d size %d num aces %d",
355 le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), 355 le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
356 le32_to_cpu(pdacl->num_aces))); 356 le32_to_cpu(pdacl->num_aces));
357 357
358 /* reset rwx permissions for user/group/other. 358 /* reset rwx permissions for user/group/other.
359 Also, if num_aces is 0 i.e. DACL has no ACEs, 359 Also, if num_aces is 0 i.e. DACL has no ACEs,
@@ -437,25 +437,25 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
437 /* validate that we do not go past end of ACL - sid must be at least 8 437 /* validate that we do not go past end of ACL - sid must be at least 8
438 bytes long (assuming no sub-auths - e.g. the null SID */ 438 bytes long (assuming no sub-auths - e.g. the null SID */
439 if (end_of_acl < (char *)psid + 8) { 439 if (end_of_acl < (char *)psid + 8) {
440 cERROR(1, ("ACL too small to parse SID %p", psid)); 440 cERROR(1, "ACL too small to parse SID %p", psid);
441 return -EINVAL; 441 return -EINVAL;
442 } 442 }
443 443
444 if (psid->num_subauth) { 444 if (psid->num_subauth) {
445#ifdef CONFIG_CIFS_DEBUG2 445#ifdef CONFIG_CIFS_DEBUG2
446 int i; 446 int i;
447 cFYI(1, ("SID revision %d num_auth %d", 447 cFYI(1, "SID revision %d num_auth %d",
448 psid->revision, psid->num_subauth)); 448 psid->revision, psid->num_subauth);
449 449
450 for (i = 0; i < psid->num_subauth; i++) { 450 for (i = 0; i < psid->num_subauth; i++) {
451 cFYI(1, ("SID sub_auth[%d]: 0x%x ", i, 451 cFYI(1, "SID sub_auth[%d]: 0x%x ", i,
452 le32_to_cpu(psid->sub_auth[i]))); 452 le32_to_cpu(psid->sub_auth[i]));
453 } 453 }
454 454
455 /* BB add length check to make sure that we do not have huge 455 /* BB add length check to make sure that we do not have huge
456 num auths and therefore go off the end */ 456 num auths and therefore go off the end */
457 cFYI(1, ("RID 0x%x", 457 cFYI(1, "RID 0x%x",
458 le32_to_cpu(psid->sub_auth[psid->num_subauth-1]))); 458 le32_to_cpu(psid->sub_auth[psid->num_subauth-1]));
459#endif 459#endif
460 } 460 }
461 461
@@ -482,11 +482,11 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
482 le32_to_cpu(pntsd->gsidoffset)); 482 le32_to_cpu(pntsd->gsidoffset));
483 dacloffset = le32_to_cpu(pntsd->dacloffset); 483 dacloffset = le32_to_cpu(pntsd->dacloffset);
484 dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); 484 dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
485 cFYI(DBG2, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x " 485 cFYI(DBG2, "revision %d type 0x%x ooffset 0x%x goffset 0x%x "
486 "sacloffset 0x%x dacloffset 0x%x", 486 "sacloffset 0x%x dacloffset 0x%x",
487 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset), 487 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
488 le32_to_cpu(pntsd->gsidoffset), 488 le32_to_cpu(pntsd->gsidoffset),
489 le32_to_cpu(pntsd->sacloffset), dacloffset)); 489 le32_to_cpu(pntsd->sacloffset), dacloffset);
490/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */ 490/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
491 rc = parse_sid(owner_sid_ptr, end_of_acl); 491 rc = parse_sid(owner_sid_ptr, end_of_acl);
492 if (rc) 492 if (rc)
@@ -500,7 +500,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
500 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, 500 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
501 group_sid_ptr, fattr); 501 group_sid_ptr, fattr);
502 else 502 else
503 cFYI(1, ("no ACL")); /* BB grant all or default perms? */ 503 cFYI(1, "no ACL"); /* BB grant all or default perms? */
504 504
505/* cifscred->uid = owner_sid_ptr->rid; 505/* cifscred->uid = owner_sid_ptr->rid;
506 cifscred->gid = group_sid_ptr->rid; 506 cifscred->gid = group_sid_ptr->rid;
@@ -563,7 +563,7 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
563 FreeXid(xid); 563 FreeXid(xid);
564 564
565 565
566 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); 566 cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
567 return pntsd; 567 return pntsd;
568} 568}
569 569
@@ -581,12 +581,12 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
581 &fid, &oplock, NULL, cifs_sb->local_nls, 581 &fid, &oplock, NULL, cifs_sb->local_nls,
582 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 582 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
583 if (rc) { 583 if (rc) {
584 cERROR(1, ("Unable to open file to get ACL")); 584 cERROR(1, "Unable to open file to get ACL");
585 goto out; 585 goto out;
586 } 586 }
587 587
588 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); 588 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
589 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); 589 cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
590 590
591 CIFSSMBClose(xid, cifs_sb->tcon, fid); 591 CIFSSMBClose(xid, cifs_sb->tcon, fid);
592 out: 592 out:
@@ -621,7 +621,7 @@ static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid,
621 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); 621 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
622 FreeXid(xid); 622 FreeXid(xid);
623 623
624 cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); 624 cFYI(DBG2, "SetCIFSACL rc = %d", rc);
625 return rc; 625 return rc;
626} 626}
627 627
@@ -638,12 +638,12 @@ static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
638 &fid, &oplock, NULL, cifs_sb->local_nls, 638 &fid, &oplock, NULL, cifs_sb->local_nls,
639 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 639 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
640 if (rc) { 640 if (rc) {
641 cERROR(1, ("Unable to open file to set ACL")); 641 cERROR(1, "Unable to open file to set ACL");
642 goto out; 642 goto out;
643 } 643 }
644 644
645 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); 645 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
646 cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); 646 cFYI(DBG2, "SetCIFSACL rc = %d", rc);
647 647
648 CIFSSMBClose(xid, cifs_sb->tcon, fid); 648 CIFSSMBClose(xid, cifs_sb->tcon, fid);
649 out: 649 out:
@@ -659,7 +659,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
659 struct cifsFileInfo *open_file; 659 struct cifsFileInfo *open_file;
660 int rc; 660 int rc;
661 661
662 cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); 662 cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode);
663 663
664 open_file = find_readable_file(CIFS_I(inode)); 664 open_file = find_readable_file(CIFS_I(inode));
665 if (!open_file) 665 if (!open_file)
@@ -679,7 +679,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
679 u32 acllen = 0; 679 u32 acllen = 0;
680 int rc = 0; 680 int rc = 0;
681 681
682 cFYI(DBG2, ("converting ACL to mode for %s", path)); 682 cFYI(DBG2, "converting ACL to mode for %s", path);
683 683
684 if (pfid) 684 if (pfid)
685 pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen); 685 pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen);
@@ -690,7 +690,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
690 if (pntsd) 690 if (pntsd)
691 rc = parse_sec_desc(pntsd, acllen, fattr); 691 rc = parse_sec_desc(pntsd, acllen, fattr);
692 if (rc) 692 if (rc)
693 cFYI(1, ("parse sec desc failed rc = %d", rc)); 693 cFYI(1, "parse sec desc failed rc = %d", rc);
694 694
695 kfree(pntsd); 695 kfree(pntsd);
696 return; 696 return;
@@ -704,7 +704,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
704 struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ 704 struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
705 struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ 705 struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
706 706
707 cFYI(DBG2, ("set ACL from mode for %s", path)); 707 cFYI(DBG2, "set ACL from mode for %s", path);
708 708
709 /* Get the security descriptor */ 709 /* Get the security descriptor */
710 pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen); 710 pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen);
@@ -721,19 +721,19 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
721 DEFSECDESCLEN : secdesclen; 721 DEFSECDESCLEN : secdesclen;
722 pnntsd = kmalloc(secdesclen, GFP_KERNEL); 722 pnntsd = kmalloc(secdesclen, GFP_KERNEL);
723 if (!pnntsd) { 723 if (!pnntsd) {
724 cERROR(1, ("Unable to allocate security descriptor")); 724 cERROR(1, "Unable to allocate security descriptor");
725 kfree(pntsd); 725 kfree(pntsd);
726 return -ENOMEM; 726 return -ENOMEM;
727 } 727 }
728 728
729 rc = build_sec_desc(pntsd, pnntsd, inode, nmode); 729 rc = build_sec_desc(pntsd, pnntsd, inode, nmode);
730 730
731 cFYI(DBG2, ("build_sec_desc rc: %d", rc)); 731 cFYI(DBG2, "build_sec_desc rc: %d", rc);
732 732
733 if (!rc) { 733 if (!rc) {
734 /* Set the security descriptor */ 734 /* Set the security descriptor */
735 rc = set_cifs_acl(pnntsd, secdesclen, inode, path); 735 rc = set_cifs_acl(pnntsd, secdesclen, inode, path);
736 cFYI(DBG2, ("set_cifs_acl rc: %d", rc)); 736 cFYI(DBG2, "set_cifs_acl rc: %d", rc);
737 } 737 }
738 738
739 kfree(pnntsd); 739 kfree(pnntsd);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index fbe986430d0c..847628dfdc44 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -103,7 +103,7 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
103 if (iov[i].iov_len == 0) 103 if (iov[i].iov_len == 0)
104 continue; 104 continue;
105 if (iov[i].iov_base == NULL) { 105 if (iov[i].iov_base == NULL) {
106 cERROR(1, ("null iovec entry")); 106 cERROR(1, "null iovec entry");
107 return -EIO; 107 return -EIO;
108 } 108 }
109 /* The first entry includes a length field (which does not get 109 /* The first entry includes a length field (which does not get
@@ -181,8 +181,8 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
181 181
182 /* Do not need to verify session setups with signature "BSRSPYL " */ 182 /* Do not need to verify session setups with signature "BSRSPYL " */
183 if (memcmp(cifs_pdu->Signature.SecuritySignature, "BSRSPYL ", 8) == 0) 183 if (memcmp(cifs_pdu->Signature.SecuritySignature, "BSRSPYL ", 8) == 0)
184 cFYI(1, ("dummy signature received for smb command 0x%x", 184 cFYI(1, "dummy signature received for smb command 0x%x",
185 cifs_pdu->Command)); 185 cifs_pdu->Command);
186 186
187 /* save off the origiginal signature so we can modify the smb and check 187 /* save off the origiginal signature so we can modify the smb and check
188 its signature against what the server sent */ 188 its signature against what the server sent */
@@ -291,7 +291,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
291 if (password) 291 if (password)
292 strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); 292 strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE);
293 293
294 if (!encrypt && extended_security & CIFSSEC_MAY_PLNTXT) { 294 if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) {
295 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); 295 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
296 memcpy(lnm_session_key, password_with_pad, 296 memcpy(lnm_session_key, password_with_pad,
297 CIFS_ENCPWD_SIZE); 297 CIFS_ENCPWD_SIZE);
@@ -398,7 +398,7 @@ void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
398 /* calculate buf->ntlmv2_hash */ 398 /* calculate buf->ntlmv2_hash */
399 rc = calc_ntlmv2_hash(ses, nls_cp); 399 rc = calc_ntlmv2_hash(ses, nls_cp);
400 if (rc) 400 if (rc)
401 cERROR(1, ("could not get v2 hash rc %d", rc)); 401 cERROR(1, "could not get v2 hash rc %d", rc);
402 CalcNTLMv2_response(ses, resp_buf); 402 CalcNTLMv2_response(ses, resp_buf);
403 403
404 /* now calculate the MAC key for NTLMv2 */ 404 /* now calculate the MAC key for NTLMv2 */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ad235d604a0b..78c02eb4cb1f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -49,10 +49,6 @@
49#include "cifs_spnego.h" 49#include "cifs_spnego.h"
50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ 50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
51 51
52#ifdef CONFIG_CIFS_QUOTA
53static const struct quotactl_ops cifs_quotactl_ops;
54#endif /* QUOTA */
55
56int cifsFYI = 0; 52int cifsFYI = 0;
57int cifsERROR = 1; 53int cifsERROR = 1;
58int traceSMB = 0; 54int traceSMB = 0;
@@ -61,7 +57,7 @@ unsigned int experimEnabled = 0;
61unsigned int linuxExtEnabled = 1; 57unsigned int linuxExtEnabled = 1;
62unsigned int lookupCacheEnabled = 1; 58unsigned int lookupCacheEnabled = 1;
63unsigned int multiuser_mount = 0; 59unsigned int multiuser_mount = 0;
64unsigned int extended_security = CIFSSEC_DEF; 60unsigned int global_secflags = CIFSSEC_DEF;
65/* unsigned int ntlmv2_support = 0; */ 61/* unsigned int ntlmv2_support = 0; */
66unsigned int sign_CIFS_PDUs = 1; 62unsigned int sign_CIFS_PDUs = 1;
67static const struct super_operations cifs_super_ops; 63static const struct super_operations cifs_super_ops;
@@ -86,8 +82,6 @@ extern mempool_t *cifs_sm_req_poolp;
86extern mempool_t *cifs_req_poolp; 82extern mempool_t *cifs_req_poolp;
87extern mempool_t *cifs_mid_poolp; 83extern mempool_t *cifs_mid_poolp;
88 84
89extern struct kmem_cache *cifs_oplock_cachep;
90
91static int 85static int
92cifs_read_super(struct super_block *sb, void *data, 86cifs_read_super(struct super_block *sb, void *data,
93 const char *devname, int silent) 87 const char *devname, int silent)
@@ -135,8 +129,7 @@ cifs_read_super(struct super_block *sb, void *data,
135 129
136 if (rc) { 130 if (rc) {
137 if (!silent) 131 if (!silent)
138 cERROR(1, 132 cERROR(1, "cifs_mount failed w/return code = %d", rc);
139 ("cifs_mount failed w/return code = %d", rc));
140 goto out_mount_failed; 133 goto out_mount_failed;
141 } 134 }
142 135
@@ -146,9 +139,6 @@ cifs_read_super(struct super_block *sb, void *data,
146/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) 139/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512)
147 sb->s_blocksize = 140 sb->s_blocksize =
148 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ 141 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */
149#ifdef CONFIG_CIFS_QUOTA
150 sb->s_qcop = &cifs_quotactl_ops;
151#endif
152 sb->s_blocksize = CIFS_MAX_MSGSIZE; 142 sb->s_blocksize = CIFS_MAX_MSGSIZE;
153 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ 143 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
154 inode = cifs_root_iget(sb, ROOT_I); 144 inode = cifs_root_iget(sb, ROOT_I);
@@ -168,7 +158,7 @@ cifs_read_super(struct super_block *sb, void *data,
168 158
169#ifdef CONFIG_CIFS_EXPERIMENTAL 159#ifdef CONFIG_CIFS_EXPERIMENTAL
170 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 160 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
171 cFYI(1, ("export ops supported")); 161 cFYI(1, "export ops supported");
172 sb->s_export_op = &cifs_export_ops; 162 sb->s_export_op = &cifs_export_ops;
173 } 163 }
174#endif /* EXPERIMENTAL */ 164#endif /* EXPERIMENTAL */
@@ -176,7 +166,7 @@ cifs_read_super(struct super_block *sb, void *data,
176 return 0; 166 return 0;
177 167
178out_no_root: 168out_no_root:
179 cERROR(1, ("cifs_read_super: get root inode failed")); 169 cERROR(1, "cifs_read_super: get root inode failed");
180 if (inode) 170 if (inode)
181 iput(inode); 171 iput(inode);
182 172
@@ -203,10 +193,10 @@ cifs_put_super(struct super_block *sb)
203 int rc = 0; 193 int rc = 0;
204 struct cifs_sb_info *cifs_sb; 194 struct cifs_sb_info *cifs_sb;
205 195
206 cFYI(1, ("In cifs_put_super")); 196 cFYI(1, "In cifs_put_super");
207 cifs_sb = CIFS_SB(sb); 197 cifs_sb = CIFS_SB(sb);
208 if (cifs_sb == NULL) { 198 if (cifs_sb == NULL) {
209 cFYI(1, ("Empty cifs superblock info passed to unmount")); 199 cFYI(1, "Empty cifs superblock info passed to unmount");
210 return; 200 return;
211 } 201 }
212 202
@@ -214,7 +204,7 @@ cifs_put_super(struct super_block *sb)
214 204
215 rc = cifs_umount(sb, cifs_sb); 205 rc = cifs_umount(sb, cifs_sb);
216 if (rc) 206 if (rc)
217 cERROR(1, ("cifs_umount failed with return code %d", rc)); 207 cERROR(1, "cifs_umount failed with return code %d", rc);
218#ifdef CONFIG_CIFS_DFS_UPCALL 208#ifdef CONFIG_CIFS_DFS_UPCALL
219 if (cifs_sb->mountdata) { 209 if (cifs_sb->mountdata) {
220 kfree(cifs_sb->mountdata); 210 kfree(cifs_sb->mountdata);
@@ -300,7 +290,6 @@ static int cifs_permission(struct inode *inode, int mask)
300static struct kmem_cache *cifs_inode_cachep; 290static struct kmem_cache *cifs_inode_cachep;
301static struct kmem_cache *cifs_req_cachep; 291static struct kmem_cache *cifs_req_cachep;
302static struct kmem_cache *cifs_mid_cachep; 292static struct kmem_cache *cifs_mid_cachep;
303struct kmem_cache *cifs_oplock_cachep;
304static struct kmem_cache *cifs_sm_req_cachep; 293static struct kmem_cache *cifs_sm_req_cachep;
305mempool_t *cifs_sm_req_poolp; 294mempool_t *cifs_sm_req_poolp;
306mempool_t *cifs_req_poolp; 295mempool_t *cifs_req_poolp;
@@ -432,106 +421,6 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
432 return 0; 421 return 0;
433} 422}
434 423
435#ifdef CONFIG_CIFS_QUOTA
436int cifs_xquota_set(struct super_block *sb, int quota_type, qid_t qid,
437 struct fs_disk_quota *pdquota)
438{
439 int xid;
440 int rc = 0;
441 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
442 struct cifsTconInfo *pTcon;
443
444 if (cifs_sb)
445 pTcon = cifs_sb->tcon;
446 else
447 return -EIO;
448
449
450 xid = GetXid();
451 if (pTcon) {
452 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
453 } else
454 rc = -EIO;
455
456 FreeXid(xid);
457 return rc;
458}
459
460int cifs_xquota_get(struct super_block *sb, int quota_type, qid_t qid,
461 struct fs_disk_quota *pdquota)
462{
463 int xid;
464 int rc = 0;
465 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
466 struct cifsTconInfo *pTcon;
467
468 if (cifs_sb)
469 pTcon = cifs_sb->tcon;
470 else
471 return -EIO;
472
473 xid = GetXid();
474 if (pTcon) {
475 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
476 } else
477 rc = -EIO;
478
479 FreeXid(xid);
480 return rc;
481}
482
483int cifs_xstate_set(struct super_block *sb, unsigned int flags, int operation)
484{
485 int xid;
486 int rc = 0;
487 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
488 struct cifsTconInfo *pTcon;
489
490 if (cifs_sb)
491 pTcon = cifs_sb->tcon;
492 else
493 return -EIO;
494
495 xid = GetXid();
496 if (pTcon) {
497 cFYI(1, ("flags: 0x%x operation: 0x%x", flags, operation));
498 } else
499 rc = -EIO;
500
501 FreeXid(xid);
502 return rc;
503}
504
505int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
506{
507 int xid;
508 int rc = 0;
509 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
510 struct cifsTconInfo *pTcon;
511
512 if (cifs_sb)
513 pTcon = cifs_sb->tcon;
514 else
515 return -EIO;
516
517 xid = GetXid();
518 if (pTcon) {
519 cFYI(1, ("pqstats %p", qstats));
520 } else
521 rc = -EIO;
522
523 FreeXid(xid);
524 return rc;
525}
526
527static const struct quotactl_ops cifs_quotactl_ops = {
528 .set_xquota = cifs_xquota_set,
529 .get_xquota = cifs_xquota_get,
530 .set_xstate = cifs_xstate_set,
531 .get_xstate = cifs_xstate_get,
532};
533#endif
534
535static void cifs_umount_begin(struct super_block *sb) 424static void cifs_umount_begin(struct super_block *sb)
536{ 425{
537 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 426 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -558,7 +447,7 @@ static void cifs_umount_begin(struct super_block *sb)
558 /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ 447 /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
559 /* cancel_notify_requests(tcon); */ 448 /* cancel_notify_requests(tcon); */
560 if (tcon->ses && tcon->ses->server) { 449 if (tcon->ses && tcon->ses->server) {
561 cFYI(1, ("wake up tasks now - umount begin not complete")); 450 cFYI(1, "wake up tasks now - umount begin not complete");
562 wake_up_all(&tcon->ses->server->request_q); 451 wake_up_all(&tcon->ses->server->request_q);
563 wake_up_all(&tcon->ses->server->response_q); 452 wake_up_all(&tcon->ses->server->response_q);
564 msleep(1); /* yield */ 453 msleep(1); /* yield */
@@ -609,7 +498,7 @@ cifs_get_sb(struct file_system_type *fs_type,
609 int rc; 498 int rc;
610 struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL); 499 struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL);
611 500
612 cFYI(1, ("Devname: %s flags: %d ", dev_name, flags)); 501 cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
613 502
614 if (IS_ERR(sb)) 503 if (IS_ERR(sb))
615 return PTR_ERR(sb); 504 return PTR_ERR(sb);
@@ -656,7 +545,6 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
656 return generic_file_llseek_unlocked(file, offset, origin); 545 return generic_file_llseek_unlocked(file, offset, origin);
657} 546}
658 547
659#ifdef CONFIG_CIFS_EXPERIMENTAL
660static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) 548static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
661{ 549{
662 /* note that this is called by vfs setlease with the BKL held 550 /* note that this is called by vfs setlease with the BKL held
@@ -685,7 +573,6 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
685 else 573 else
686 return -EAGAIN; 574 return -EAGAIN;
687} 575}
688#endif
689 576
690struct file_system_type cifs_fs_type = { 577struct file_system_type cifs_fs_type = {
691 .owner = THIS_MODULE, 578 .owner = THIS_MODULE,
@@ -762,10 +649,7 @@ const struct file_operations cifs_file_ops = {
762#ifdef CONFIG_CIFS_POSIX 649#ifdef CONFIG_CIFS_POSIX
763 .unlocked_ioctl = cifs_ioctl, 650 .unlocked_ioctl = cifs_ioctl,
764#endif /* CONFIG_CIFS_POSIX */ 651#endif /* CONFIG_CIFS_POSIX */
765
766#ifdef CONFIG_CIFS_EXPERIMENTAL
767 .setlease = cifs_setlease, 652 .setlease = cifs_setlease,
768#endif /* CONFIG_CIFS_EXPERIMENTAL */
769}; 653};
770 654
771const struct file_operations cifs_file_direct_ops = { 655const struct file_operations cifs_file_direct_ops = {
@@ -784,9 +668,7 @@ const struct file_operations cifs_file_direct_ops = {
784 .unlocked_ioctl = cifs_ioctl, 668 .unlocked_ioctl = cifs_ioctl,
785#endif /* CONFIG_CIFS_POSIX */ 669#endif /* CONFIG_CIFS_POSIX */
786 .llseek = cifs_llseek, 670 .llseek = cifs_llseek,
787#ifdef CONFIG_CIFS_EXPERIMENTAL
788 .setlease = cifs_setlease, 671 .setlease = cifs_setlease,
789#endif /* CONFIG_CIFS_EXPERIMENTAL */
790}; 672};
791const struct file_operations cifs_file_nobrl_ops = { 673const struct file_operations cifs_file_nobrl_ops = {
792 .read = do_sync_read, 674 .read = do_sync_read,
@@ -803,10 +685,7 @@ const struct file_operations cifs_file_nobrl_ops = {
803#ifdef CONFIG_CIFS_POSIX 685#ifdef CONFIG_CIFS_POSIX
804 .unlocked_ioctl = cifs_ioctl, 686 .unlocked_ioctl = cifs_ioctl,
805#endif /* CONFIG_CIFS_POSIX */ 687#endif /* CONFIG_CIFS_POSIX */
806
807#ifdef CONFIG_CIFS_EXPERIMENTAL
808 .setlease = cifs_setlease, 688 .setlease = cifs_setlease,
809#endif /* CONFIG_CIFS_EXPERIMENTAL */
810}; 689};
811 690
812const struct file_operations cifs_file_direct_nobrl_ops = { 691const struct file_operations cifs_file_direct_nobrl_ops = {
@@ -824,9 +703,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
824 .unlocked_ioctl = cifs_ioctl, 703 .unlocked_ioctl = cifs_ioctl,
825#endif /* CONFIG_CIFS_POSIX */ 704#endif /* CONFIG_CIFS_POSIX */
826 .llseek = cifs_llseek, 705 .llseek = cifs_llseek,
827#ifdef CONFIG_CIFS_EXPERIMENTAL
828 .setlease = cifs_setlease, 706 .setlease = cifs_setlease,
829#endif /* CONFIG_CIFS_EXPERIMENTAL */
830}; 707};
831 708
832const struct file_operations cifs_dir_ops = { 709const struct file_operations cifs_dir_ops = {
@@ -878,7 +755,7 @@ cifs_init_request_bufs(void)
878 } else { 755 } else {
879 CIFSMaxBufSize &= 0x1FE00; /* Round size to even 512 byte mult*/ 756 CIFSMaxBufSize &= 0x1FE00; /* Round size to even 512 byte mult*/
880 } 757 }
881/* cERROR(1,("CIFSMaxBufSize %d 0x%x",CIFSMaxBufSize,CIFSMaxBufSize)); */ 758/* cERROR(1, "CIFSMaxBufSize %d 0x%x",CIFSMaxBufSize,CIFSMaxBufSize); */
882 cifs_req_cachep = kmem_cache_create("cifs_request", 759 cifs_req_cachep = kmem_cache_create("cifs_request",
883 CIFSMaxBufSize + 760 CIFSMaxBufSize +
884 MAX_CIFS_HDR_SIZE, 0, 761 MAX_CIFS_HDR_SIZE, 0,
@@ -890,7 +767,7 @@ cifs_init_request_bufs(void)
890 cifs_min_rcv = 1; 767 cifs_min_rcv = 1;
891 else if (cifs_min_rcv > 64) { 768 else if (cifs_min_rcv > 64) {
892 cifs_min_rcv = 64; 769 cifs_min_rcv = 64;
893 cERROR(1, ("cifs_min_rcv set to maximum (64)")); 770 cERROR(1, "cifs_min_rcv set to maximum (64)");
894 } 771 }
895 772
896 cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv, 773 cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv,
@@ -921,7 +798,7 @@ cifs_init_request_bufs(void)
921 cifs_min_small = 2; 798 cifs_min_small = 2;
922 else if (cifs_min_small > 256) { 799 else if (cifs_min_small > 256) {
923 cifs_min_small = 256; 800 cifs_min_small = 256;
924 cFYI(1, ("cifs_min_small set to maximum (256)")); 801 cFYI(1, "cifs_min_small set to maximum (256)");
925 } 802 }
926 803
927 cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small, 804 cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small,
@@ -962,15 +839,6 @@ cifs_init_mids(void)
962 return -ENOMEM; 839 return -ENOMEM;
963 } 840 }
964 841
965 cifs_oplock_cachep = kmem_cache_create("cifs_oplock_structs",
966 sizeof(struct oplock_q_entry), 0,
967 SLAB_HWCACHE_ALIGN, NULL);
968 if (cifs_oplock_cachep == NULL) {
969 mempool_destroy(cifs_mid_poolp);
970 kmem_cache_destroy(cifs_mid_cachep);
971 return -ENOMEM;
972 }
973
974 return 0; 842 return 0;
975} 843}
976 844
@@ -979,7 +847,6 @@ cifs_destroy_mids(void)
979{ 847{
980 mempool_destroy(cifs_mid_poolp); 848 mempool_destroy(cifs_mid_poolp);
981 kmem_cache_destroy(cifs_mid_cachep); 849 kmem_cache_destroy(cifs_mid_cachep);
982 kmem_cache_destroy(cifs_oplock_cachep);
983} 850}
984 851
985static int __init 852static int __init
@@ -1019,10 +886,10 @@ init_cifs(void)
1019 886
1020 if (cifs_max_pending < 2) { 887 if (cifs_max_pending < 2) {
1021 cifs_max_pending = 2; 888 cifs_max_pending = 2;
1022 cFYI(1, ("cifs_max_pending set to min of 2")); 889 cFYI(1, "cifs_max_pending set to min of 2");
1023 } else if (cifs_max_pending > 256) { 890 } else if (cifs_max_pending > 256) {
1024 cifs_max_pending = 256; 891 cifs_max_pending = 256;
1025 cFYI(1, ("cifs_max_pending set to max of 256")); 892 cFYI(1, "cifs_max_pending set to max of 256");
1026 } 893 }
1027 894
1028 rc = cifs_init_inodecache(); 895 rc = cifs_init_inodecache();
@@ -1080,7 +947,7 @@ init_cifs(void)
1080static void __exit 947static void __exit
1081exit_cifs(void) 948exit_cifs(void)
1082{ 949{
1083 cFYI(DBG2, ("exit_cifs")); 950 cFYI(DBG2, "exit_cifs");
1084 cifs_proc_clean(); 951 cifs_proc_clean();
1085#ifdef CONFIG_CIFS_DFS_UPCALL 952#ifdef CONFIG_CIFS_DFS_UPCALL
1086 cifs_dfs_release_automount_timer(); 953 cifs_dfs_release_automount_timer();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 7aa57ecdc437..0242ff9cbf41 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -114,5 +114,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
114extern const struct export_operations cifs_export_ops; 114extern const struct export_operations cifs_export_ops;
115#endif /* EXPERIMENTAL */ 115#endif /* EXPERIMENTAL */
116 116
117#define CIFS_VERSION "1.62" 117#define CIFS_VERSION "1.64"
118#endif /* _CIFSFS_H */ 118#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0c2fd17439c8..a88479ceaad5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -87,7 +87,6 @@ enum securityEnum {
87 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ 87 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
88/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */ 88/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */
89 Kerberos, /* Kerberos via SPNEGO */ 89 Kerberos, /* Kerberos via SPNEGO */
90 MSKerberos, /* MS Kerberos via SPNEGO */
91}; 90};
92 91
93enum protocolEnum { 92enum protocolEnum {
@@ -185,6 +184,12 @@ struct TCP_Server_Info {
185 struct mac_key mac_signing_key; 184 struct mac_key mac_signing_key;
186 char ntlmv2_hash[16]; 185 char ntlmv2_hash[16];
187 unsigned long lstrp; /* when we got last response from this server */ 186 unsigned long lstrp; /* when we got last response from this server */
187 u16 dialect; /* dialect index that server chose */
188 /* extended security flavors that server supports */
189 bool sec_kerberos; /* supports plain Kerberos */
190 bool sec_mskerberos; /* supports legacy MS Kerberos */
191 bool sec_kerberosu2u; /* supports U2U Kerberos */
192 bool sec_ntlmssp; /* supports NTLMSSP */
188}; 193};
189 194
190/* 195/*
@@ -718,7 +723,7 @@ GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
718GLOBAL_EXTERN unsigned int oplockEnabled; 723GLOBAL_EXTERN unsigned int oplockEnabled;
719GLOBAL_EXTERN unsigned int experimEnabled; 724GLOBAL_EXTERN unsigned int experimEnabled;
720GLOBAL_EXTERN unsigned int lookupCacheEnabled; 725GLOBAL_EXTERN unsigned int lookupCacheEnabled;
721GLOBAL_EXTERN unsigned int extended_security; /* if on, session setup sent 726GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
722 with more secure ntlmssp2 challenge/resp */ 727 with more secure ntlmssp2 challenge/resp */
723GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */ 728GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
724GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/ 729GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 39e47f46dea5..fb1657e0fdb8 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -39,8 +39,20 @@ extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *,
39 unsigned int /* length */); 39 unsigned int /* length */);
40extern unsigned int _GetXid(void); 40extern unsigned int _GetXid(void);
41extern void _FreeXid(unsigned int); 41extern void _FreeXid(unsigned int);
42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current_fsuid())); 42#define GetXid() \
43#define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__func__,curr_xid,(int)rc));} 43({ \
44 int __xid = (int)_GetXid(); \
45 cFYI(1, "CIFS VFS: in %s as Xid: %d with uid: %d", \
46 __func__, __xid, current_fsuid()); \
47 __xid; \
48})
49
50#define FreeXid(curr_xid) \
51do { \
52 _FreeXid(curr_xid); \
53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \
54 __func__, curr_xid, (int)rc); \
55} while (0)
44extern char *build_path_from_dentry(struct dentry *); 56extern char *build_path_from_dentry(struct dentry *);
45extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb); 57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb);
46extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 58extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
@@ -73,7 +85,7 @@ extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *);
73extern unsigned int smbCalcSize(struct smb_hdr *ptr); 85extern unsigned int smbCalcSize(struct smb_hdr *ptr);
74extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); 86extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
75extern int decode_negTokenInit(unsigned char *security_blob, int length, 87extern int decode_negTokenInit(unsigned char *security_blob, int length,
76 enum securityEnum *secType); 88 struct TCP_Server_Info *server);
77extern int cifs_convert_address(char *src, void *dst); 89extern int cifs_convert_address(char *src, void *dst);
78extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 90extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
79extern void header_assemble(struct smb_hdr *, char /* command */ , 91extern void header_assemble(struct smb_hdr *, char /* command */ ,
@@ -83,7 +95,6 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
83 struct cifsSesInfo *ses, 95 struct cifsSesInfo *ses,
84 void **request_buf); 96 void **request_buf);
85extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 97extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
86 const int stage,
87 const struct nls_table *nls_cp); 98 const struct nls_table *nls_cp);
88extern __u16 GetNextMid(struct TCP_Server_Info *server); 99extern __u16 GetNextMid(struct TCP_Server_Info *server);
89extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); 100extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
@@ -95,8 +106,11 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
95 __u16 fileHandle, struct file *file, 106 __u16 fileHandle, struct file *file,
96 struct vfsmount *mnt, unsigned int oflags); 107 struct vfsmount *mnt, unsigned int oflags);
97extern int cifs_posix_open(char *full_path, struct inode **pinode, 108extern int cifs_posix_open(char *full_path, struct inode **pinode,
98 struct vfsmount *mnt, int mode, int oflags, 109 struct vfsmount *mnt,
99 __u32 *poplock, __u16 *pnetfid, int xid); 110 struct super_block *sb,
111 int mode, int oflags,
112 __u32 *poplock, __u16 *pnetfid, int xid);
113void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr);
100extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, 114extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
101 FILE_UNIX_BASIC_INFO *info, 115 FILE_UNIX_BASIC_INFO *info,
102 struct cifs_sb_info *cifs_sb); 116 struct cifs_sb_info *cifs_sb);
@@ -125,7 +139,9 @@ extern void cifs_dfs_release_automount_timer(void);
125void cifs_proc_init(void); 139void cifs_proc_init(void);
126void cifs_proc_clean(void); 140void cifs_proc_clean(void);
127 141
128extern int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, 142extern int cifs_negotiate_protocol(unsigned int xid,
143 struct cifsSesInfo *ses);
144extern int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
129 struct nls_table *nls_info); 145 struct nls_table *nls_info);
130extern int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses); 146extern int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses);
131 147
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5d3f29fef532..c65c3419dd37 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifssmb.c 2 * fs/cifs/cifssmb.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2009 4 * Copyright (C) International Business Machines Corp., 2002,2010
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * Contains the routines for constructing the SMB PDUs themselves 7 * Contains the routines for constructing the SMB PDUs themselves
@@ -130,8 +130,8 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
130 if (smb_command != SMB_COM_WRITE_ANDX && 130 if (smb_command != SMB_COM_WRITE_ANDX &&
131 smb_command != SMB_COM_OPEN_ANDX && 131 smb_command != SMB_COM_OPEN_ANDX &&
132 smb_command != SMB_COM_TREE_DISCONNECT) { 132 smb_command != SMB_COM_TREE_DISCONNECT) {
133 cFYI(1, ("can not send cmd %d while umounting", 133 cFYI(1, "can not send cmd %d while umounting",
134 smb_command)); 134 smb_command);
135 return -ENODEV; 135 return -ENODEV;
136 } 136 }
137 } 137 }
@@ -157,7 +157,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
157 * back on-line 157 * back on-line
158 */ 158 */
159 if (!tcon->retry || ses->status == CifsExiting) { 159 if (!tcon->retry || ses->status == CifsExiting) {
160 cFYI(1, ("gave up waiting on reconnect in smb_init")); 160 cFYI(1, "gave up waiting on reconnect in smb_init");
161 return -EHOSTDOWN; 161 return -EHOSTDOWN;
162 } 162 }
163 } 163 }
@@ -172,7 +172,8 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
172 * reconnect the same SMB session 172 * reconnect the same SMB session
173 */ 173 */
174 mutex_lock(&ses->session_mutex); 174 mutex_lock(&ses->session_mutex);
175 if (ses->need_reconnect) 175 rc = cifs_negotiate_protocol(0, ses);
176 if (rc == 0 && ses->need_reconnect)
176 rc = cifs_setup_session(0, ses, nls_codepage); 177 rc = cifs_setup_session(0, ses, nls_codepage);
177 178
178 /* do we need to reconnect tcon? */ 179 /* do we need to reconnect tcon? */
@@ -184,7 +185,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
184 mark_open_files_invalid(tcon); 185 mark_open_files_invalid(tcon);
185 rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage); 186 rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage);
186 mutex_unlock(&ses->session_mutex); 187 mutex_unlock(&ses->session_mutex);
187 cFYI(1, ("reconnect tcon rc = %d", rc)); 188 cFYI(1, "reconnect tcon rc = %d", rc);
188 189
189 if (rc) 190 if (rc)
190 goto out; 191 goto out;
@@ -355,7 +356,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
355 struct TCP_Server_Info *server; 356 struct TCP_Server_Info *server;
356 u16 count; 357 u16 count;
357 unsigned int secFlags; 358 unsigned int secFlags;
358 u16 dialect;
359 359
360 if (ses->server) 360 if (ses->server)
361 server = ses->server; 361 server = ses->server;
@@ -372,9 +372,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
372 if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) 372 if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
373 secFlags = ses->overrideSecFlg; /* BB FIXME fix sign flags? */ 373 secFlags = ses->overrideSecFlg; /* BB FIXME fix sign flags? */
374 else /* if override flags set only sign/seal OR them with global auth */ 374 else /* if override flags set only sign/seal OR them with global auth */
375 secFlags = extended_security | ses->overrideSecFlg; 375 secFlags = global_secflags | ses->overrideSecFlg;
376 376
377 cFYI(1, ("secFlags 0x%x", secFlags)); 377 cFYI(1, "secFlags 0x%x", secFlags);
378 378
379 pSMB->hdr.Mid = GetNextMid(server); 379 pSMB->hdr.Mid = GetNextMid(server);
380 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); 380 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
@@ -382,14 +382,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
382 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) 382 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
383 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 383 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
384 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) { 384 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
385 cFYI(1, ("Kerberos only mechanism, enable extended security")); 385 cFYI(1, "Kerberos only mechanism, enable extended security");
386 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 386 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
387 } 387 }
388#ifdef CONFIG_CIFS_EXPERIMENTAL 388#ifdef CONFIG_CIFS_EXPERIMENTAL
389 else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) 389 else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
390 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 390 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
391 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { 391 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) {
392 cFYI(1, ("NTLMSSP only mechanism, enable extended security")); 392 cFYI(1, "NTLMSSP only mechanism, enable extended security");
393 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 393 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
394 } 394 }
395#endif 395#endif
@@ -408,10 +408,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
408 if (rc != 0) 408 if (rc != 0)
409 goto neg_err_exit; 409 goto neg_err_exit;
410 410
411 dialect = le16_to_cpu(pSMBr->DialectIndex); 411 server->dialect = le16_to_cpu(pSMBr->DialectIndex);
412 cFYI(1, ("Dialect: %d", dialect)); 412 cFYI(1, "Dialect: %d", server->dialect);
413 /* Check wct = 1 error case */ 413 /* Check wct = 1 error case */
414 if ((pSMBr->hdr.WordCount < 13) || (dialect == BAD_PROT)) { 414 if ((pSMBr->hdr.WordCount < 13) || (server->dialect == BAD_PROT)) {
415 /* core returns wct = 1, but we do not ask for core - otherwise 415 /* core returns wct = 1, but we do not ask for core - otherwise
416 small wct just comes when dialect index is -1 indicating we 416 small wct just comes when dialect index is -1 indicating we
417 could not negotiate a common dialect */ 417 could not negotiate a common dialect */
@@ -419,8 +419,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
419 goto neg_err_exit; 419 goto neg_err_exit;
420#ifdef CONFIG_CIFS_WEAK_PW_HASH 420#ifdef CONFIG_CIFS_WEAK_PW_HASH
421 } else if ((pSMBr->hdr.WordCount == 13) 421 } else if ((pSMBr->hdr.WordCount == 13)
422 && ((dialect == LANMAN_PROT) 422 && ((server->dialect == LANMAN_PROT)
423 || (dialect == LANMAN2_PROT))) { 423 || (server->dialect == LANMAN2_PROT))) {
424 __s16 tmp; 424 __s16 tmp;
425 struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr; 425 struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr;
426 426
@@ -428,8 +428,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
428 (secFlags & CIFSSEC_MAY_PLNTXT)) 428 (secFlags & CIFSSEC_MAY_PLNTXT))
429 server->secType = LANMAN; 429 server->secType = LANMAN;
430 else { 430 else {
431 cERROR(1, ("mount failed weak security disabled" 431 cERROR(1, "mount failed weak security disabled"
432 " in /proc/fs/cifs/SecurityFlags")); 432 " in /proc/fs/cifs/SecurityFlags");
433 rc = -EOPNOTSUPP; 433 rc = -EOPNOTSUPP;
434 goto neg_err_exit; 434 goto neg_err_exit;
435 } 435 }
@@ -462,9 +462,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
462 utc = CURRENT_TIME; 462 utc = CURRENT_TIME;
463 ts = cnvrtDosUnixTm(rsp->SrvTime.Date, 463 ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
464 rsp->SrvTime.Time, 0); 464 rsp->SrvTime.Time, 0);
465 cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d", 465 cFYI(1, "SrvTime %d sec since 1970 (utc: %d) diff: %d",
466 (int)ts.tv_sec, (int)utc.tv_sec, 466 (int)ts.tv_sec, (int)utc.tv_sec,
467 (int)(utc.tv_sec - ts.tv_sec))); 467 (int)(utc.tv_sec - ts.tv_sec));
468 val = (int)(utc.tv_sec - ts.tv_sec); 468 val = (int)(utc.tv_sec - ts.tv_sec);
469 seconds = abs(val); 469 seconds = abs(val);
470 result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; 470 result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
@@ -478,7 +478,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
478 server->timeAdj = (int)tmp; 478 server->timeAdj = (int)tmp;
479 server->timeAdj *= 60; /* also in seconds */ 479 server->timeAdj *= 60; /* also in seconds */
480 } 480 }
481 cFYI(1, ("server->timeAdj: %d seconds", server->timeAdj)); 481 cFYI(1, "server->timeAdj: %d seconds", server->timeAdj);
482 482
483 483
484 /* BB get server time for time conversions and add 484 /* BB get server time for time conversions and add
@@ -493,14 +493,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
493 goto neg_err_exit; 493 goto neg_err_exit;
494 } 494 }
495 495
496 cFYI(1, ("LANMAN negotiated")); 496 cFYI(1, "LANMAN negotiated");
497 /* we will not end up setting signing flags - as no signing 497 /* we will not end up setting signing flags - as no signing
498 was in LANMAN and server did not return the flags on */ 498 was in LANMAN and server did not return the flags on */
499 goto signing_check; 499 goto signing_check;
500#else /* weak security disabled */ 500#else /* weak security disabled */
501 } else if (pSMBr->hdr.WordCount == 13) { 501 } else if (pSMBr->hdr.WordCount == 13) {
502 cERROR(1, ("mount failed, cifs module not built " 502 cERROR(1, "mount failed, cifs module not built "
503 "with CIFS_WEAK_PW_HASH support")); 503 "with CIFS_WEAK_PW_HASH support");
504 rc = -EOPNOTSUPP; 504 rc = -EOPNOTSUPP;
505#endif /* WEAK_PW_HASH */ 505#endif /* WEAK_PW_HASH */
506 goto neg_err_exit; 506 goto neg_err_exit;
@@ -512,14 +512,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
512 /* else wct == 17 NTLM */ 512 /* else wct == 17 NTLM */
513 server->secMode = pSMBr->SecurityMode; 513 server->secMode = pSMBr->SecurityMode;
514 if ((server->secMode & SECMODE_USER) == 0) 514 if ((server->secMode & SECMODE_USER) == 0)
515 cFYI(1, ("share mode security")); 515 cFYI(1, "share mode security");
516 516
517 if ((server->secMode & SECMODE_PW_ENCRYPT) == 0) 517 if ((server->secMode & SECMODE_PW_ENCRYPT) == 0)
518#ifdef CONFIG_CIFS_WEAK_PW_HASH 518#ifdef CONFIG_CIFS_WEAK_PW_HASH
519 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0) 519 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0)
520#endif /* CIFS_WEAK_PW_HASH */ 520#endif /* CIFS_WEAK_PW_HASH */
521 cERROR(1, ("Server requests plain text password" 521 cERROR(1, "Server requests plain text password"
522 " but client support disabled")); 522 " but client support disabled");
523 523
524 if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) 524 if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
525 server->secType = NTLMv2; 525 server->secType = NTLMv2;
@@ -539,7 +539,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
539#endif */ 539#endif */
540 else { 540 else {
541 rc = -EOPNOTSUPP; 541 rc = -EOPNOTSUPP;
542 cERROR(1, ("Invalid security type")); 542 cERROR(1, "Invalid security type");
543 goto neg_err_exit; 543 goto neg_err_exit;
544 } 544 }
545 /* else ... any others ...? */ 545 /* else ... any others ...? */
@@ -551,7 +551,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
551 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize), 551 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize),
552 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 552 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
553 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); 553 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
554 cFYI(DBG2, ("Max buf = %d", ses->server->maxBuf)); 554 cFYI(DBG2, "Max buf = %d", ses->server->maxBuf);
555 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); 555 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
556 server->capabilities = le32_to_cpu(pSMBr->Capabilities); 556 server->capabilities = le32_to_cpu(pSMBr->Capabilities);
557 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); 557 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
@@ -582,7 +582,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
582 if (memcmp(server->server_GUID, 582 if (memcmp(server->server_GUID,
583 pSMBr->u.extended_response. 583 pSMBr->u.extended_response.
584 GUID, 16) != 0) { 584 GUID, 16) != 0) {
585 cFYI(1, ("server UID changed")); 585 cFYI(1, "server UID changed");
586 memcpy(server->server_GUID, 586 memcpy(server->server_GUID,
587 pSMBr->u.extended_response.GUID, 587 pSMBr->u.extended_response.GUID,
588 16); 588 16);
@@ -597,13 +597,19 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
597 server->secType = RawNTLMSSP; 597 server->secType = RawNTLMSSP;
598 } else { 598 } else {
599 rc = decode_negTokenInit(pSMBr->u.extended_response. 599 rc = decode_negTokenInit(pSMBr->u.extended_response.
600 SecurityBlob, 600 SecurityBlob, count - 16,
601 count - 16, 601 server);
602 &server->secType);
603 if (rc == 1) 602 if (rc == 1)
604 rc = 0; 603 rc = 0;
605 else 604 else
606 rc = -EINVAL; 605 rc = -EINVAL;
606
607 if (server->sec_kerberos || server->sec_mskerberos)
608 server->secType = Kerberos;
609 else if (server->sec_ntlmssp)
610 server->secType = RawNTLMSSP;
611 else
612 rc = -EOPNOTSUPP;
607 } 613 }
608 } else 614 } else
609 server->capabilities &= ~CAP_EXTENDED_SECURITY; 615 server->capabilities &= ~CAP_EXTENDED_SECURITY;
@@ -614,22 +620,21 @@ signing_check:
614 if ((secFlags & CIFSSEC_MAY_SIGN) == 0) { 620 if ((secFlags & CIFSSEC_MAY_SIGN) == 0) {
615 /* MUST_SIGN already includes the MAY_SIGN FLAG 621 /* MUST_SIGN already includes the MAY_SIGN FLAG
616 so if this is zero it means that signing is disabled */ 622 so if this is zero it means that signing is disabled */
617 cFYI(1, ("Signing disabled")); 623 cFYI(1, "Signing disabled");
618 if (server->secMode & SECMODE_SIGN_REQUIRED) { 624 if (server->secMode & SECMODE_SIGN_REQUIRED) {
619 cERROR(1, ("Server requires " 625 cERROR(1, "Server requires "
620 "packet signing to be enabled in " 626 "packet signing to be enabled in "
621 "/proc/fs/cifs/SecurityFlags.")); 627 "/proc/fs/cifs/SecurityFlags.");
622 rc = -EOPNOTSUPP; 628 rc = -EOPNOTSUPP;
623 } 629 }
624 server->secMode &= 630 server->secMode &=
625 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); 631 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
626 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { 632 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
627 /* signing required */ 633 /* signing required */
628 cFYI(1, ("Must sign - secFlags 0x%x", secFlags)); 634 cFYI(1, "Must sign - secFlags 0x%x", secFlags);
629 if ((server->secMode & 635 if ((server->secMode &
630 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) { 636 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) {
631 cERROR(1, 637 cERROR(1, "signing required but server lacks support");
632 ("signing required but server lacks support"));
633 rc = -EOPNOTSUPP; 638 rc = -EOPNOTSUPP;
634 } else 639 } else
635 server->secMode |= SECMODE_SIGN_REQUIRED; 640 server->secMode |= SECMODE_SIGN_REQUIRED;
@@ -643,7 +648,7 @@ signing_check:
643neg_err_exit: 648neg_err_exit:
644 cifs_buf_release(pSMB); 649 cifs_buf_release(pSMB);
645 650
646 cFYI(1, ("negprot rc %d", rc)); 651 cFYI(1, "negprot rc %d", rc);
647 return rc; 652 return rc;
648} 653}
649 654
@@ -653,7 +658,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
653 struct smb_hdr *smb_buffer; 658 struct smb_hdr *smb_buffer;
654 int rc = 0; 659 int rc = 0;
655 660
656 cFYI(1, ("In tree disconnect")); 661 cFYI(1, "In tree disconnect");
657 662
658 /* BB: do we need to check this? These should never be NULL. */ 663 /* BB: do we need to check this? These should never be NULL. */
659 if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) 664 if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
@@ -675,7 +680,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
675 680
676 rc = SendReceiveNoRsp(xid, tcon->ses, smb_buffer, 0); 681 rc = SendReceiveNoRsp(xid, tcon->ses, smb_buffer, 0);
677 if (rc) 682 if (rc)
678 cFYI(1, ("Tree disconnect failed %d", rc)); 683 cFYI(1, "Tree disconnect failed %d", rc);
679 684
680 /* No need to return error on this operation if tid invalidated and 685 /* No need to return error on this operation if tid invalidated and
681 closed on server already e.g. due to tcp session crashing */ 686 closed on server already e.g. due to tcp session crashing */
@@ -691,7 +696,7 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
691 LOGOFF_ANDX_REQ *pSMB; 696 LOGOFF_ANDX_REQ *pSMB;
692 int rc = 0; 697 int rc = 0;
693 698
694 cFYI(1, ("In SMBLogoff for session disconnect")); 699 cFYI(1, "In SMBLogoff for session disconnect");
695 700
696 /* 701 /*
697 * BB: do we need to check validity of ses and server? They should 702 * BB: do we need to check validity of ses and server? They should
@@ -744,7 +749,7 @@ CIFSPOSIXDelFile(const int xid, struct cifsTconInfo *tcon, const char *fileName,
744 int bytes_returned = 0; 749 int bytes_returned = 0;
745 __u16 params, param_offset, offset, byte_count; 750 __u16 params, param_offset, offset, byte_count;
746 751
747 cFYI(1, ("In POSIX delete")); 752 cFYI(1, "In POSIX delete");
748PsxDelete: 753PsxDelete:
749 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 754 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
750 (void **) &pSMBr); 755 (void **) &pSMBr);
@@ -796,7 +801,7 @@ PsxDelete:
796 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 801 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
797 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 802 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
798 if (rc) 803 if (rc)
799 cFYI(1, ("Posix delete returned %d", rc)); 804 cFYI(1, "Posix delete returned %d", rc);
800 cifs_buf_release(pSMB); 805 cifs_buf_release(pSMB);
801 806
802 cifs_stats_inc(&tcon->num_deletes); 807 cifs_stats_inc(&tcon->num_deletes);
@@ -843,7 +848,7 @@ DelFileRetry:
843 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 848 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
844 cifs_stats_inc(&tcon->num_deletes); 849 cifs_stats_inc(&tcon->num_deletes);
845 if (rc) 850 if (rc)
846 cFYI(1, ("Error in RMFile = %d", rc)); 851 cFYI(1, "Error in RMFile = %d", rc);
847 852
848 cifs_buf_release(pSMB); 853 cifs_buf_release(pSMB);
849 if (rc == -EAGAIN) 854 if (rc == -EAGAIN)
@@ -862,7 +867,7 @@ CIFSSMBRmDir(const int xid, struct cifsTconInfo *tcon, const char *dirName,
862 int bytes_returned; 867 int bytes_returned;
863 int name_len; 868 int name_len;
864 869
865 cFYI(1, ("In CIFSSMBRmDir")); 870 cFYI(1, "In CIFSSMBRmDir");
866RmDirRetry: 871RmDirRetry:
867 rc = smb_init(SMB_COM_DELETE_DIRECTORY, 0, tcon, (void **) &pSMB, 872 rc = smb_init(SMB_COM_DELETE_DIRECTORY, 0, tcon, (void **) &pSMB,
868 (void **) &pSMBr); 873 (void **) &pSMBr);
@@ -887,7 +892,7 @@ RmDirRetry:
887 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 892 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
888 cifs_stats_inc(&tcon->num_rmdirs); 893 cifs_stats_inc(&tcon->num_rmdirs);
889 if (rc) 894 if (rc)
890 cFYI(1, ("Error in RMDir = %d", rc)); 895 cFYI(1, "Error in RMDir = %d", rc);
891 896
892 cifs_buf_release(pSMB); 897 cifs_buf_release(pSMB);
893 if (rc == -EAGAIN) 898 if (rc == -EAGAIN)
@@ -905,7 +910,7 @@ CIFSSMBMkDir(const int xid, struct cifsTconInfo *tcon,
905 int bytes_returned; 910 int bytes_returned;
906 int name_len; 911 int name_len;
907 912
908 cFYI(1, ("In CIFSSMBMkDir")); 913 cFYI(1, "In CIFSSMBMkDir");
909MkDirRetry: 914MkDirRetry:
910 rc = smb_init(SMB_COM_CREATE_DIRECTORY, 0, tcon, (void **) &pSMB, 915 rc = smb_init(SMB_COM_CREATE_DIRECTORY, 0, tcon, (void **) &pSMB,
911 (void **) &pSMBr); 916 (void **) &pSMBr);
@@ -930,7 +935,7 @@ MkDirRetry:
930 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 935 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
931 cifs_stats_inc(&tcon->num_mkdirs); 936 cifs_stats_inc(&tcon->num_mkdirs);
932 if (rc) 937 if (rc)
933 cFYI(1, ("Error in Mkdir = %d", rc)); 938 cFYI(1, "Error in Mkdir = %d", rc);
934 939
935 cifs_buf_release(pSMB); 940 cifs_buf_release(pSMB);
936 if (rc == -EAGAIN) 941 if (rc == -EAGAIN)
@@ -953,7 +958,7 @@ CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags,
953 OPEN_PSX_REQ *pdata; 958 OPEN_PSX_REQ *pdata;
954 OPEN_PSX_RSP *psx_rsp; 959 OPEN_PSX_RSP *psx_rsp;
955 960
956 cFYI(1, ("In POSIX Create")); 961 cFYI(1, "In POSIX Create");
957PsxCreat: 962PsxCreat:
958 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 963 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
959 (void **) &pSMBr); 964 (void **) &pSMBr);
@@ -1007,11 +1012,11 @@ PsxCreat:
1007 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1012 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1008 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1013 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
1009 if (rc) { 1014 if (rc) {
1010 cFYI(1, ("Posix create returned %d", rc)); 1015 cFYI(1, "Posix create returned %d", rc);
1011 goto psx_create_err; 1016 goto psx_create_err;
1012 } 1017 }
1013 1018
1014 cFYI(1, ("copying inode info")); 1019 cFYI(1, "copying inode info");
1015 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1020 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1016 1021
1017 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) { 1022 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) {
@@ -1033,11 +1038,11 @@ PsxCreat:
1033 /* check to make sure response data is there */ 1038 /* check to make sure response data is there */
1034 if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) { 1039 if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) {
1035 pRetData->Type = cpu_to_le32(-1); /* unknown */ 1040 pRetData->Type = cpu_to_le32(-1); /* unknown */
1036 cFYI(DBG2, ("unknown type")); 1041 cFYI(DBG2, "unknown type");
1037 } else { 1042 } else {
1038 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) 1043 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
1039 + sizeof(FILE_UNIX_BASIC_INFO)) { 1044 + sizeof(FILE_UNIX_BASIC_INFO)) {
1040 cERROR(1, ("Open response data too small")); 1045 cERROR(1, "Open response data too small");
1041 pRetData->Type = cpu_to_le32(-1); 1046 pRetData->Type = cpu_to_le32(-1);
1042 goto psx_create_err; 1047 goto psx_create_err;
1043 } 1048 }
@@ -1084,7 +1089,7 @@ static __u16 convert_disposition(int disposition)
1084 ofun = SMBOPEN_OCREATE | SMBOPEN_OTRUNC; 1089 ofun = SMBOPEN_OCREATE | SMBOPEN_OTRUNC;
1085 break; 1090 break;
1086 default: 1091 default:
1087 cFYI(1, ("unknown disposition %d", disposition)); 1092 cFYI(1, "unknown disposition %d", disposition);
1088 ofun = SMBOPEN_OAPPEND; /* regular open */ 1093 ofun = SMBOPEN_OAPPEND; /* regular open */
1089 } 1094 }
1090 return ofun; 1095 return ofun;
@@ -1175,7 +1180,7 @@ OldOpenRetry:
1175 (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP); 1180 (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP);
1176 cifs_stats_inc(&tcon->num_opens); 1181 cifs_stats_inc(&tcon->num_opens);
1177 if (rc) { 1182 if (rc) {
1178 cFYI(1, ("Error in Open = %d", rc)); 1183 cFYI(1, "Error in Open = %d", rc);
1179 } else { 1184 } else {
1180 /* BB verify if wct == 15 */ 1185 /* BB verify if wct == 15 */
1181 1186
@@ -1288,7 +1293,7 @@ openRetry:
1288 (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP); 1293 (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP);
1289 cifs_stats_inc(&tcon->num_opens); 1294 cifs_stats_inc(&tcon->num_opens);
1290 if (rc) { 1295 if (rc) {
1291 cFYI(1, ("Error in Open = %d", rc)); 1296 cFYI(1, "Error in Open = %d", rc);
1292 } else { 1297 } else {
1293 *pOplock = pSMBr->OplockLevel; /* 1 byte no need to le_to_cpu */ 1298 *pOplock = pSMBr->OplockLevel; /* 1 byte no need to le_to_cpu */
1294 *netfid = pSMBr->Fid; /* cifs fid stays in le */ 1299 *netfid = pSMBr->Fid; /* cifs fid stays in le */
@@ -1326,7 +1331,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1326 int resp_buf_type = 0; 1331 int resp_buf_type = 0;
1327 struct kvec iov[1]; 1332 struct kvec iov[1];
1328 1333
1329 cFYI(1, ("Reading %d bytes on fid %d", count, netfid)); 1334 cFYI(1, "Reading %d bytes on fid %d", count, netfid);
1330 if (tcon->ses->capabilities & CAP_LARGE_FILES) 1335 if (tcon->ses->capabilities & CAP_LARGE_FILES)
1331 wct = 12; 1336 wct = 12;
1332 else { 1337 else {
@@ -1371,7 +1376,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1371 cifs_stats_inc(&tcon->num_reads); 1376 cifs_stats_inc(&tcon->num_reads);
1372 pSMBr = (READ_RSP *)iov[0].iov_base; 1377 pSMBr = (READ_RSP *)iov[0].iov_base;
1373 if (rc) { 1378 if (rc) {
1374 cERROR(1, ("Send error in read = %d", rc)); 1379 cERROR(1, "Send error in read = %d", rc);
1375 } else { 1380 } else {
1376 int data_length = le16_to_cpu(pSMBr->DataLengthHigh); 1381 int data_length = le16_to_cpu(pSMBr->DataLengthHigh);
1377 data_length = data_length << 16; 1382 data_length = data_length << 16;
@@ -1381,15 +1386,15 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1381 /*check that DataLength would not go beyond end of SMB */ 1386 /*check that DataLength would not go beyond end of SMB */
1382 if ((data_length > CIFSMaxBufSize) 1387 if ((data_length > CIFSMaxBufSize)
1383 || (data_length > count)) { 1388 || (data_length > count)) {
1384 cFYI(1, ("bad length %d for count %d", 1389 cFYI(1, "bad length %d for count %d",
1385 data_length, count)); 1390 data_length, count);
1386 rc = -EIO; 1391 rc = -EIO;
1387 *nbytes = 0; 1392 *nbytes = 0;
1388 } else { 1393 } else {
1389 pReadData = (char *) (&pSMBr->hdr.Protocol) + 1394 pReadData = (char *) (&pSMBr->hdr.Protocol) +
1390 le16_to_cpu(pSMBr->DataOffset); 1395 le16_to_cpu(pSMBr->DataOffset);
1391/* if (rc = copy_to_user(buf, pReadData, data_length)) { 1396/* if (rc = copy_to_user(buf, pReadData, data_length)) {
1392 cERROR(1,("Faulting on read rc = %d",rc)); 1397 cERROR(1, "Faulting on read rc = %d",rc);
1393 rc = -EFAULT; 1398 rc = -EFAULT;
1394 }*/ /* can not use copy_to_user when using page cache*/ 1399 }*/ /* can not use copy_to_user when using page cache*/
1395 if (*buf) 1400 if (*buf)
@@ -1433,7 +1438,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1433 1438
1434 *nbytes = 0; 1439 *nbytes = 0;
1435 1440
1436 /* cFYI(1, ("write at %lld %d bytes", offset, count));*/ 1441 /* cFYI(1, "write at %lld %d bytes", offset, count);*/
1437 if (tcon->ses == NULL) 1442 if (tcon->ses == NULL)
1438 return -ECONNABORTED; 1443 return -ECONNABORTED;
1439 1444
@@ -1514,7 +1519,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1514 (struct smb_hdr *) pSMBr, &bytes_returned, long_op); 1519 (struct smb_hdr *) pSMBr, &bytes_returned, long_op);
1515 cifs_stats_inc(&tcon->num_writes); 1520 cifs_stats_inc(&tcon->num_writes);
1516 if (rc) { 1521 if (rc) {
1517 cFYI(1, ("Send error in write = %d", rc)); 1522 cFYI(1, "Send error in write = %d", rc);
1518 } else { 1523 } else {
1519 *nbytes = le16_to_cpu(pSMBr->CountHigh); 1524 *nbytes = le16_to_cpu(pSMBr->CountHigh);
1520 *nbytes = (*nbytes) << 16; 1525 *nbytes = (*nbytes) << 16;
@@ -1551,7 +1556,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1551 1556
1552 *nbytes = 0; 1557 *nbytes = 0;
1553 1558
1554 cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count)); 1559 cFYI(1, "write2 at %lld %d bytes", (long long)offset, count);
1555 1560
1556 if (tcon->ses->capabilities & CAP_LARGE_FILES) { 1561 if (tcon->ses->capabilities & CAP_LARGE_FILES) {
1557 wct = 14; 1562 wct = 14;
@@ -1606,7 +1611,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1606 long_op); 1611 long_op);
1607 cifs_stats_inc(&tcon->num_writes); 1612 cifs_stats_inc(&tcon->num_writes);
1608 if (rc) { 1613 if (rc) {
1609 cFYI(1, ("Send error Write2 = %d", rc)); 1614 cFYI(1, "Send error Write2 = %d", rc);
1610 } else if (resp_buf_type == 0) { 1615 } else if (resp_buf_type == 0) {
1611 /* presumably this can not happen, but best to be safe */ 1616 /* presumably this can not happen, but best to be safe */
1612 rc = -EIO; 1617 rc = -EIO;
@@ -1651,7 +1656,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1651 int timeout = 0; 1656 int timeout = 0;
1652 __u16 count; 1657 __u16 count;
1653 1658
1654 cFYI(1, ("CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock)); 1659 cFYI(1, "CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock);
1655 rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); 1660 rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB);
1656 1661
1657 if (rc) 1662 if (rc)
@@ -1699,7 +1704,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1699 } 1704 }
1700 cifs_stats_inc(&tcon->num_locks); 1705 cifs_stats_inc(&tcon->num_locks);
1701 if (rc) 1706 if (rc)
1702 cFYI(1, ("Send error in Lock = %d", rc)); 1707 cFYI(1, "Send error in Lock = %d", rc);
1703 1708
1704 /* Note: On -EAGAIN error only caller can retry on handle based calls 1709 /* Note: On -EAGAIN error only caller can retry on handle based calls
1705 since file handle passed in no longer valid */ 1710 since file handle passed in no longer valid */
@@ -1722,7 +1727,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1722 __u16 params, param_offset, offset, byte_count, count; 1727 __u16 params, param_offset, offset, byte_count, count;
1723 struct kvec iov[1]; 1728 struct kvec iov[1];
1724 1729
1725 cFYI(1, ("Posix Lock")); 1730 cFYI(1, "Posix Lock");
1726 1731
1727 if (pLockData == NULL) 1732 if (pLockData == NULL)
1728 return -EINVAL; 1733 return -EINVAL;
@@ -1792,7 +1797,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1792 } 1797 }
1793 1798
1794 if (rc) { 1799 if (rc) {
1795 cFYI(1, ("Send error in Posix Lock = %d", rc)); 1800 cFYI(1, "Send error in Posix Lock = %d", rc);
1796 } else if (get_flag) { 1801 } else if (get_flag) {
1797 /* lock structure can be returned on get */ 1802 /* lock structure can be returned on get */
1798 __u16 data_offset; 1803 __u16 data_offset;
@@ -1849,7 +1854,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1849{ 1854{
1850 int rc = 0; 1855 int rc = 0;
1851 CLOSE_REQ *pSMB = NULL; 1856 CLOSE_REQ *pSMB = NULL;
1852 cFYI(1, ("In CIFSSMBClose")); 1857 cFYI(1, "In CIFSSMBClose");
1853 1858
1854/* do not retry on dead session on close */ 1859/* do not retry on dead session on close */
1855 rc = small_smb_init(SMB_COM_CLOSE, 3, tcon, (void **) &pSMB); 1860 rc = small_smb_init(SMB_COM_CLOSE, 3, tcon, (void **) &pSMB);
@@ -1866,7 +1871,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1866 if (rc) { 1871 if (rc) {
1867 if (rc != -EINTR) { 1872 if (rc != -EINTR) {
1868 /* EINTR is expected when user ctl-c to kill app */ 1873 /* EINTR is expected when user ctl-c to kill app */
1869 cERROR(1, ("Send error in Close = %d", rc)); 1874 cERROR(1, "Send error in Close = %d", rc);
1870 } 1875 }
1871 } 1876 }
1872 1877
@@ -1882,7 +1887,7 @@ CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1882{ 1887{
1883 int rc = 0; 1888 int rc = 0;
1884 FLUSH_REQ *pSMB = NULL; 1889 FLUSH_REQ *pSMB = NULL;
1885 cFYI(1, ("In CIFSSMBFlush")); 1890 cFYI(1, "In CIFSSMBFlush");
1886 1891
1887 rc = small_smb_init(SMB_COM_FLUSH, 1, tcon, (void **) &pSMB); 1892 rc = small_smb_init(SMB_COM_FLUSH, 1, tcon, (void **) &pSMB);
1888 if (rc) 1893 if (rc)
@@ -1893,7 +1898,7 @@ CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1893 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 1898 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
1894 cifs_stats_inc(&tcon->num_flushes); 1899 cifs_stats_inc(&tcon->num_flushes);
1895 if (rc) 1900 if (rc)
1896 cERROR(1, ("Send error in Flush = %d", rc)); 1901 cERROR(1, "Send error in Flush = %d", rc);
1897 1902
1898 return rc; 1903 return rc;
1899} 1904}
@@ -1910,7 +1915,7 @@ CIFSSMBRename(const int xid, struct cifsTconInfo *tcon,
1910 int name_len, name_len2; 1915 int name_len, name_len2;
1911 __u16 count; 1916 __u16 count;
1912 1917
1913 cFYI(1, ("In CIFSSMBRename")); 1918 cFYI(1, "In CIFSSMBRename");
1914renameRetry: 1919renameRetry:
1915 rc = smb_init(SMB_COM_RENAME, 1, tcon, (void **) &pSMB, 1920 rc = smb_init(SMB_COM_RENAME, 1, tcon, (void **) &pSMB,
1916 (void **) &pSMBr); 1921 (void **) &pSMBr);
@@ -1956,7 +1961,7 @@ renameRetry:
1956 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1961 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
1957 cifs_stats_inc(&tcon->num_renames); 1962 cifs_stats_inc(&tcon->num_renames);
1958 if (rc) 1963 if (rc)
1959 cFYI(1, ("Send error in rename = %d", rc)); 1964 cFYI(1, "Send error in rename = %d", rc);
1960 1965
1961 cifs_buf_release(pSMB); 1966 cifs_buf_release(pSMB);
1962 1967
@@ -1980,7 +1985,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
1980 int len_of_str; 1985 int len_of_str;
1981 __u16 params, param_offset, offset, count, byte_count; 1986 __u16 params, param_offset, offset, count, byte_count;
1982 1987
1983 cFYI(1, ("Rename to File by handle")); 1988 cFYI(1, "Rename to File by handle");
1984 rc = smb_init(SMB_COM_TRANSACTION2, 15, pTcon, (void **) &pSMB, 1989 rc = smb_init(SMB_COM_TRANSACTION2, 15, pTcon, (void **) &pSMB,
1985 (void **) &pSMBr); 1990 (void **) &pSMBr);
1986 if (rc) 1991 if (rc)
@@ -2035,7 +2040,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2035 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2040 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2036 cifs_stats_inc(&pTcon->num_t2renames); 2041 cifs_stats_inc(&pTcon->num_t2renames);
2037 if (rc) 2042 if (rc)
2038 cFYI(1, ("Send error in Rename (by file handle) = %d", rc)); 2043 cFYI(1, "Send error in Rename (by file handle) = %d", rc);
2039 2044
2040 cifs_buf_release(pSMB); 2045 cifs_buf_release(pSMB);
2041 2046
@@ -2057,7 +2062,7 @@ CIFSSMBCopy(const int xid, struct cifsTconInfo *tcon, const char *fromName,
2057 int name_len, name_len2; 2062 int name_len, name_len2;
2058 __u16 count; 2063 __u16 count;
2059 2064
2060 cFYI(1, ("In CIFSSMBCopy")); 2065 cFYI(1, "In CIFSSMBCopy");
2061copyRetry: 2066copyRetry:
2062 rc = smb_init(SMB_COM_COPY, 1, tcon, (void **) &pSMB, 2067 rc = smb_init(SMB_COM_COPY, 1, tcon, (void **) &pSMB,
2063 (void **) &pSMBr); 2068 (void **) &pSMBr);
@@ -2102,8 +2107,8 @@ copyRetry:
2102 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2107 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2103 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2108 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2104 if (rc) { 2109 if (rc) {
2105 cFYI(1, ("Send error in copy = %d with %d files copied", 2110 cFYI(1, "Send error in copy = %d with %d files copied",
2106 rc, le16_to_cpu(pSMBr->CopyCount))); 2111 rc, le16_to_cpu(pSMBr->CopyCount));
2107 } 2112 }
2108 cifs_buf_release(pSMB); 2113 cifs_buf_release(pSMB);
2109 2114
@@ -2127,7 +2132,7 @@ CIFSUnixCreateSymLink(const int xid, struct cifsTconInfo *tcon,
2127 int bytes_returned = 0; 2132 int bytes_returned = 0;
2128 __u16 params, param_offset, offset, byte_count; 2133 __u16 params, param_offset, offset, byte_count;
2129 2134
2130 cFYI(1, ("In Symlink Unix style")); 2135 cFYI(1, "In Symlink Unix style");
2131createSymLinkRetry: 2136createSymLinkRetry:
2132 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2137 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
2133 (void **) &pSMBr); 2138 (void **) &pSMBr);
@@ -2192,7 +2197,7 @@ createSymLinkRetry:
2192 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2197 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2193 cifs_stats_inc(&tcon->num_symlinks); 2198 cifs_stats_inc(&tcon->num_symlinks);
2194 if (rc) 2199 if (rc)
2195 cFYI(1, ("Send error in SetPathInfo create symlink = %d", rc)); 2200 cFYI(1, "Send error in SetPathInfo create symlink = %d", rc);
2196 2201
2197 cifs_buf_release(pSMB); 2202 cifs_buf_release(pSMB);
2198 2203
@@ -2216,7 +2221,7 @@ CIFSUnixCreateHardLink(const int xid, struct cifsTconInfo *tcon,
2216 int bytes_returned = 0; 2221 int bytes_returned = 0;
2217 __u16 params, param_offset, offset, byte_count; 2222 __u16 params, param_offset, offset, byte_count;
2218 2223
2219 cFYI(1, ("In Create Hard link Unix style")); 2224 cFYI(1, "In Create Hard link Unix style");
2220createHardLinkRetry: 2225createHardLinkRetry:
2221 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2226 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
2222 (void **) &pSMBr); 2227 (void **) &pSMBr);
@@ -2278,7 +2283,7 @@ createHardLinkRetry:
2278 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2283 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2279 cifs_stats_inc(&tcon->num_hardlinks); 2284 cifs_stats_inc(&tcon->num_hardlinks);
2280 if (rc) 2285 if (rc)
2281 cFYI(1, ("Send error in SetPathInfo (hard link) = %d", rc)); 2286 cFYI(1, "Send error in SetPathInfo (hard link) = %d", rc);
2282 2287
2283 cifs_buf_release(pSMB); 2288 cifs_buf_release(pSMB);
2284 if (rc == -EAGAIN) 2289 if (rc == -EAGAIN)
@@ -2299,7 +2304,7 @@ CIFSCreateHardLink(const int xid, struct cifsTconInfo *tcon,
2299 int name_len, name_len2; 2304 int name_len, name_len2;
2300 __u16 count; 2305 __u16 count;
2301 2306
2302 cFYI(1, ("In CIFSCreateHardLink")); 2307 cFYI(1, "In CIFSCreateHardLink");
2303winCreateHardLinkRetry: 2308winCreateHardLinkRetry:
2304 2309
2305 rc = smb_init(SMB_COM_NT_RENAME, 4, tcon, (void **) &pSMB, 2310 rc = smb_init(SMB_COM_NT_RENAME, 4, tcon, (void **) &pSMB,
@@ -2350,7 +2355,7 @@ winCreateHardLinkRetry:
2350 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2355 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2351 cifs_stats_inc(&tcon->num_hardlinks); 2356 cifs_stats_inc(&tcon->num_hardlinks);
2352 if (rc) 2357 if (rc)
2353 cFYI(1, ("Send error in hard link (NT rename) = %d", rc)); 2358 cFYI(1, "Send error in hard link (NT rename) = %d", rc);
2354 2359
2355 cifs_buf_release(pSMB); 2360 cifs_buf_release(pSMB);
2356 if (rc == -EAGAIN) 2361 if (rc == -EAGAIN)
@@ -2373,7 +2378,7 @@ CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon,
2373 __u16 params, byte_count; 2378 __u16 params, byte_count;
2374 char *data_start; 2379 char *data_start;
2375 2380
2376 cFYI(1, ("In QPathSymLinkInfo (Unix) for path %s", searchName)); 2381 cFYI(1, "In QPathSymLinkInfo (Unix) for path %s", searchName);
2377 2382
2378querySymLinkRetry: 2383querySymLinkRetry:
2379 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2384 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -2420,7 +2425,7 @@ querySymLinkRetry:
2420 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2425 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2421 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2426 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2422 if (rc) { 2427 if (rc) {
2423 cFYI(1, ("Send error in QuerySymLinkInfo = %d", rc)); 2428 cFYI(1, "Send error in QuerySymLinkInfo = %d", rc);
2424 } else { 2429 } else {
2425 /* decode response */ 2430 /* decode response */
2426 2431
@@ -2521,21 +2526,21 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
2521 2526
2522 /* should we also check that parm and data areas do not overlap? */ 2527 /* should we also check that parm and data areas do not overlap? */
2523 if (*ppparm > end_of_smb) { 2528 if (*ppparm > end_of_smb) {
2524 cFYI(1, ("parms start after end of smb")); 2529 cFYI(1, "parms start after end of smb");
2525 return -EINVAL; 2530 return -EINVAL;
2526 } else if (parm_count + *ppparm > end_of_smb) { 2531 } else if (parm_count + *ppparm > end_of_smb) {
2527 cFYI(1, ("parm end after end of smb")); 2532 cFYI(1, "parm end after end of smb");
2528 return -EINVAL; 2533 return -EINVAL;
2529 } else if (*ppdata > end_of_smb) { 2534 } else if (*ppdata > end_of_smb) {
2530 cFYI(1, ("data starts after end of smb")); 2535 cFYI(1, "data starts after end of smb");
2531 return -EINVAL; 2536 return -EINVAL;
2532 } else if (data_count + *ppdata > end_of_smb) { 2537 } else if (data_count + *ppdata > end_of_smb) {
2533 cFYI(1, ("data %p + count %d (%p) ends after end of smb %p start %p", 2538 cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
2534 *ppdata, data_count, (data_count + *ppdata), 2539 *ppdata, data_count, (data_count + *ppdata),
2535 end_of_smb, pSMBr)); 2540 end_of_smb, pSMBr);
2536 return -EINVAL; 2541 return -EINVAL;
2537 } else if (parm_count + data_count > pSMBr->ByteCount) { 2542 } else if (parm_count + data_count > pSMBr->ByteCount) {
2538 cFYI(1, ("parm count and data count larger than SMB")); 2543 cFYI(1, "parm count and data count larger than SMB");
2539 return -EINVAL; 2544 return -EINVAL;
2540 } 2545 }
2541 *pdatalen = data_count; 2546 *pdatalen = data_count;
@@ -2554,7 +2559,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2554 struct smb_com_transaction_ioctl_req *pSMB; 2559 struct smb_com_transaction_ioctl_req *pSMB;
2555 struct smb_com_transaction_ioctl_rsp *pSMBr; 2560 struct smb_com_transaction_ioctl_rsp *pSMBr;
2556 2561
2557 cFYI(1, ("In Windows reparse style QueryLink for path %s", searchName)); 2562 cFYI(1, "In Windows reparse style QueryLink for path %s", searchName);
2558 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, 2563 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
2559 (void **) &pSMBr); 2564 (void **) &pSMBr);
2560 if (rc) 2565 if (rc)
@@ -2583,7 +2588,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2583 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2588 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2584 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2589 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2585 if (rc) { 2590 if (rc) {
2586 cFYI(1, ("Send error in QueryReparseLinkInfo = %d", rc)); 2591 cFYI(1, "Send error in QueryReparseLinkInfo = %d", rc);
2587 } else { /* decode response */ 2592 } else { /* decode response */
2588 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); 2593 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
2589 __u32 data_count = le32_to_cpu(pSMBr->DataCount); 2594 __u32 data_count = le32_to_cpu(pSMBr->DataCount);
@@ -2607,7 +2612,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2607 if ((reparse_buf->LinkNamesBuf + 2612 if ((reparse_buf->LinkNamesBuf +
2608 reparse_buf->TargetNameOffset + 2613 reparse_buf->TargetNameOffset +
2609 reparse_buf->TargetNameLen) > end_of_smb) { 2614 reparse_buf->TargetNameLen) > end_of_smb) {
2610 cFYI(1, ("reparse buf beyond SMB")); 2615 cFYI(1, "reparse buf beyond SMB");
2611 rc = -EIO; 2616 rc = -EIO;
2612 goto qreparse_out; 2617 goto qreparse_out;
2613 } 2618 }
@@ -2628,12 +2633,12 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2628 } 2633 }
2629 } else { 2634 } else {
2630 rc = -EIO; 2635 rc = -EIO;
2631 cFYI(1, ("Invalid return data count on " 2636 cFYI(1, "Invalid return data count on "
2632 "get reparse info ioctl")); 2637 "get reparse info ioctl");
2633 } 2638 }
2634 symlinkinfo[buflen] = 0; /* just in case so the caller 2639 symlinkinfo[buflen] = 0; /* just in case so the caller
2635 does not go off the end of the buffer */ 2640 does not go off the end of the buffer */
2636 cFYI(1, ("readlink result - %s", symlinkinfo)); 2641 cFYI(1, "readlink result - %s", symlinkinfo);
2637 } 2642 }
2638 2643
2639qreparse_out: 2644qreparse_out:
@@ -2656,7 +2661,7 @@ static void cifs_convert_ace(posix_acl_xattr_entry *ace,
2656 ace->e_perm = cpu_to_le16(cifs_ace->cifs_e_perm); 2661 ace->e_perm = cpu_to_le16(cifs_ace->cifs_e_perm);
2657 ace->e_tag = cpu_to_le16(cifs_ace->cifs_e_tag); 2662 ace->e_tag = cpu_to_le16(cifs_ace->cifs_e_tag);
2658 ace->e_id = cpu_to_le32(le64_to_cpu(cifs_ace->cifs_uid)); 2663 ace->e_id = cpu_to_le32(le64_to_cpu(cifs_ace->cifs_uid));
2659 /* cFYI(1,("perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id)); */ 2664 /* cFYI(1, "perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id); */
2660 2665
2661 return; 2666 return;
2662} 2667}
@@ -2682,8 +2687,8 @@ static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen,
2682 size += sizeof(struct cifs_posix_ace) * count; 2687 size += sizeof(struct cifs_posix_ace) * count;
2683 /* check if we would go beyond end of SMB */ 2688 /* check if we would go beyond end of SMB */
2684 if (size_of_data_area < size) { 2689 if (size_of_data_area < size) {
2685 cFYI(1, ("bad CIFS POSIX ACL size %d vs. %d", 2690 cFYI(1, "bad CIFS POSIX ACL size %d vs. %d",
2686 size_of_data_area, size)); 2691 size_of_data_area, size);
2687 return -EINVAL; 2692 return -EINVAL;
2688 } 2693 }
2689 } else if (acl_type & ACL_TYPE_DEFAULT) { 2694 } else if (acl_type & ACL_TYPE_DEFAULT) {
@@ -2730,7 +2735,7 @@ static __u16 convert_ace_to_cifs_ace(struct cifs_posix_ace *cifs_ace,
2730 cifs_ace->cifs_uid = cpu_to_le64(-1); 2735 cifs_ace->cifs_uid = cpu_to_le64(-1);
2731 } else 2736 } else
2732 cifs_ace->cifs_uid = cpu_to_le64(le32_to_cpu(local_ace->e_id)); 2737 cifs_ace->cifs_uid = cpu_to_le64(le32_to_cpu(local_ace->e_id));
2733 /*cFYI(1,("perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id));*/ 2738 /*cFYI(1, "perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id);*/
2734 return rc; 2739 return rc;
2735} 2740}
2736 2741
@@ -2748,12 +2753,12 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
2748 return 0; 2753 return 0;
2749 2754
2750 count = posix_acl_xattr_count((size_t)buflen); 2755 count = posix_acl_xattr_count((size_t)buflen);
2751 cFYI(1, ("setting acl with %d entries from buf of length %d and " 2756 cFYI(1, "setting acl with %d entries from buf of length %d and "
2752 "version of %d", 2757 "version of %d",
2753 count, buflen, le32_to_cpu(local_acl->a_version))); 2758 count, buflen, le32_to_cpu(local_acl->a_version));
2754 if (le32_to_cpu(local_acl->a_version) != 2) { 2759 if (le32_to_cpu(local_acl->a_version) != 2) {
2755 cFYI(1, ("unknown POSIX ACL version %d", 2760 cFYI(1, "unknown POSIX ACL version %d",
2756 le32_to_cpu(local_acl->a_version))); 2761 le32_to_cpu(local_acl->a_version));
2757 return 0; 2762 return 0;
2758 } 2763 }
2759 cifs_acl->version = cpu_to_le16(1); 2764 cifs_acl->version = cpu_to_le16(1);
@@ -2762,7 +2767,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
2762 else if (acl_type == ACL_TYPE_DEFAULT) 2767 else if (acl_type == ACL_TYPE_DEFAULT)
2763 cifs_acl->default_entry_count = cpu_to_le16(count); 2768 cifs_acl->default_entry_count = cpu_to_le16(count);
2764 else { 2769 else {
2765 cFYI(1, ("unknown ACL type %d", acl_type)); 2770 cFYI(1, "unknown ACL type %d", acl_type);
2766 return 0; 2771 return 0;
2767 } 2772 }
2768 for (i = 0; i < count; i++) { 2773 for (i = 0; i < count; i++) {
@@ -2795,7 +2800,7 @@ CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon,
2795 int name_len; 2800 int name_len;
2796 __u16 params, byte_count; 2801 __u16 params, byte_count;
2797 2802
2798 cFYI(1, ("In GetPosixACL (Unix) for path %s", searchName)); 2803 cFYI(1, "In GetPosixACL (Unix) for path %s", searchName);
2799 2804
2800queryAclRetry: 2805queryAclRetry:
2801 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2806 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -2847,7 +2852,7 @@ queryAclRetry:
2847 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2852 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2848 cifs_stats_inc(&tcon->num_acl_get); 2853 cifs_stats_inc(&tcon->num_acl_get);
2849 if (rc) { 2854 if (rc) {
2850 cFYI(1, ("Send error in Query POSIX ACL = %d", rc)); 2855 cFYI(1, "Send error in Query POSIX ACL = %d", rc);
2851 } else { 2856 } else {
2852 /* decode response */ 2857 /* decode response */
2853 2858
@@ -2884,7 +2889,7 @@ CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon,
2884 int bytes_returned = 0; 2889 int bytes_returned = 0;
2885 __u16 params, byte_count, data_count, param_offset, offset; 2890 __u16 params, byte_count, data_count, param_offset, offset;
2886 2891
2887 cFYI(1, ("In SetPosixACL (Unix) for path %s", fileName)); 2892 cFYI(1, "In SetPosixACL (Unix) for path %s", fileName);
2888setAclRetry: 2893setAclRetry:
2889 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2894 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
2890 (void **) &pSMBr); 2895 (void **) &pSMBr);
@@ -2939,7 +2944,7 @@ setAclRetry:
2939 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2944 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2940 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2945 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2941 if (rc) 2946 if (rc)
2942 cFYI(1, ("Set POSIX ACL returned %d", rc)); 2947 cFYI(1, "Set POSIX ACL returned %d", rc);
2943 2948
2944setACLerrorExit: 2949setACLerrorExit:
2945 cifs_buf_release(pSMB); 2950 cifs_buf_release(pSMB);
@@ -2959,7 +2964,7 @@ CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon,
2959 int bytes_returned; 2964 int bytes_returned;
2960 __u16 params, byte_count; 2965 __u16 params, byte_count;
2961 2966
2962 cFYI(1, ("In GetExtAttr")); 2967 cFYI(1, "In GetExtAttr");
2963 if (tcon == NULL) 2968 if (tcon == NULL)
2964 return -ENODEV; 2969 return -ENODEV;
2965 2970
@@ -2998,7 +3003,7 @@ GetExtAttrRetry:
2998 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3003 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2999 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3004 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3000 if (rc) { 3005 if (rc) {
3001 cFYI(1, ("error %d in GetExtAttr", rc)); 3006 cFYI(1, "error %d in GetExtAttr", rc);
3002 } else { 3007 } else {
3003 /* decode response */ 3008 /* decode response */
3004 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3009 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -3013,7 +3018,7 @@ GetExtAttrRetry:
3013 struct file_chattr_info *pfinfo; 3018 struct file_chattr_info *pfinfo;
3014 /* BB Do we need a cast or hash here ? */ 3019 /* BB Do we need a cast or hash here ? */
3015 if (count != 16) { 3020 if (count != 16) {
3016 cFYI(1, ("Illegal size ret in GetExtAttr")); 3021 cFYI(1, "Illegal size ret in GetExtAttr");
3017 rc = -EIO; 3022 rc = -EIO;
3018 goto GetExtAttrOut; 3023 goto GetExtAttrOut;
3019 } 3024 }
@@ -3043,7 +3048,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3043 QUERY_SEC_DESC_REQ *pSMB; 3048 QUERY_SEC_DESC_REQ *pSMB;
3044 struct kvec iov[1]; 3049 struct kvec iov[1];
3045 3050
3046 cFYI(1, ("GetCifsACL")); 3051 cFYI(1, "GetCifsACL");
3047 3052
3048 *pbuflen = 0; 3053 *pbuflen = 0;
3049 *acl_inf = NULL; 3054 *acl_inf = NULL;
@@ -3068,7 +3073,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3068 CIFS_STD_OP); 3073 CIFS_STD_OP);
3069 cifs_stats_inc(&tcon->num_acl_get); 3074 cifs_stats_inc(&tcon->num_acl_get);
3070 if (rc) { 3075 if (rc) {
3071 cFYI(1, ("Send error in QuerySecDesc = %d", rc)); 3076 cFYI(1, "Send error in QuerySecDesc = %d", rc);
3072 } else { /* decode response */ 3077 } else { /* decode response */
3073 __le32 *parm; 3078 __le32 *parm;
3074 __u32 parm_len; 3079 __u32 parm_len;
@@ -3083,7 +3088,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3083 goto qsec_out; 3088 goto qsec_out;
3084 pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base; 3089 pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
3085 3090
3086 cFYI(1, ("smb %p parm %p data %p", pSMBr, parm, *acl_inf)); 3091 cFYI(1, "smb %p parm %p data %p", pSMBr, parm, *acl_inf);
3087 3092
3088 if (le32_to_cpu(pSMBr->ParameterCount) != 4) { 3093 if (le32_to_cpu(pSMBr->ParameterCount) != 4) {
3089 rc = -EIO; /* bad smb */ 3094 rc = -EIO; /* bad smb */
@@ -3095,8 +3100,8 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3095 3100
3096 acl_len = le32_to_cpu(*parm); 3101 acl_len = le32_to_cpu(*parm);
3097 if (acl_len != *pbuflen) { 3102 if (acl_len != *pbuflen) {
3098 cERROR(1, ("acl length %d does not match %d", 3103 cERROR(1, "acl length %d does not match %d",
3099 acl_len, *pbuflen)); 3104 acl_len, *pbuflen);
3100 if (*pbuflen > acl_len) 3105 if (*pbuflen > acl_len)
3101 *pbuflen = acl_len; 3106 *pbuflen = acl_len;
3102 } 3107 }
@@ -3105,7 +3110,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3105 header followed by the smallest SID */ 3110 header followed by the smallest SID */
3106 if ((*pbuflen < sizeof(struct cifs_ntsd) + 8) || 3111 if ((*pbuflen < sizeof(struct cifs_ntsd) + 8) ||
3107 (*pbuflen >= 64 * 1024)) { 3112 (*pbuflen >= 64 * 1024)) {
3108 cERROR(1, ("bad acl length %d", *pbuflen)); 3113 cERROR(1, "bad acl length %d", *pbuflen);
3109 rc = -EINVAL; 3114 rc = -EINVAL;
3110 *pbuflen = 0; 3115 *pbuflen = 0;
3111 } else { 3116 } else {
@@ -3179,9 +3184,9 @@ setCifsAclRetry:
3179 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3184 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3180 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3185 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3181 3186
3182 cFYI(1, ("SetCIFSACL bytes_returned: %d, rc: %d", bytes_returned, rc)); 3187 cFYI(1, "SetCIFSACL bytes_returned: %d, rc: %d", bytes_returned, rc);
3183 if (rc) 3188 if (rc)
3184 cFYI(1, ("Set CIFS ACL returned %d", rc)); 3189 cFYI(1, "Set CIFS ACL returned %d", rc);
3185 cifs_buf_release(pSMB); 3190 cifs_buf_release(pSMB);
3186 3191
3187 if (rc == -EAGAIN) 3192 if (rc == -EAGAIN)
@@ -3205,7 +3210,7 @@ int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
3205 int bytes_returned; 3210 int bytes_returned;
3206 int name_len; 3211 int name_len;
3207 3212
3208 cFYI(1, ("In SMBQPath path %s", searchName)); 3213 cFYI(1, "In SMBQPath path %s", searchName);
3209QInfRetry: 3214QInfRetry:
3210 rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB, 3215 rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB,
3211 (void **) &pSMBr); 3216 (void **) &pSMBr);
@@ -3231,7 +3236,7 @@ QInfRetry:
3231 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3236 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3232 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3237 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3233 if (rc) { 3238 if (rc) {
3234 cFYI(1, ("Send error in QueryInfo = %d", rc)); 3239 cFYI(1, "Send error in QueryInfo = %d", rc);
3235 } else if (pFinfo) { 3240 } else if (pFinfo) {
3236 struct timespec ts; 3241 struct timespec ts;
3237 __u32 time = le32_to_cpu(pSMBr->last_write_time); 3242 __u32 time = le32_to_cpu(pSMBr->last_write_time);
@@ -3305,7 +3310,7 @@ QFileInfoRetry:
3305 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3310 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3306 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3311 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3307 if (rc) { 3312 if (rc) {
3308 cFYI(1, ("Send error in QPathInfo = %d", rc)); 3313 cFYI(1, "Send error in QPathInfo = %d", rc);
3309 } else { /* decode response */ 3314 } else { /* decode response */
3310 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3315 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3311 3316
@@ -3343,7 +3348,7 @@ CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon,
3343 int name_len; 3348 int name_len;
3344 __u16 params, byte_count; 3349 __u16 params, byte_count;
3345 3350
3346/* cFYI(1, ("In QPathInfo path %s", searchName)); */ 3351/* cFYI(1, "In QPathInfo path %s", searchName); */
3347QPathInfoRetry: 3352QPathInfoRetry:
3348 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 3353 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
3349 (void **) &pSMBr); 3354 (void **) &pSMBr);
@@ -3393,7 +3398,7 @@ QPathInfoRetry:
3393 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3398 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3394 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3399 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3395 if (rc) { 3400 if (rc) {
3396 cFYI(1, ("Send error in QPathInfo = %d", rc)); 3401 cFYI(1, "Send error in QPathInfo = %d", rc);
3397 } else { /* decode response */ 3402 } else { /* decode response */
3398 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3403 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3399 3404
@@ -3473,14 +3478,14 @@ UnixQFileInfoRetry:
3473 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3478 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3474 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3479 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3475 if (rc) { 3480 if (rc) {
3476 cFYI(1, ("Send error in QPathInfo = %d", rc)); 3481 cFYI(1, "Send error in QPathInfo = %d", rc);
3477 } else { /* decode response */ 3482 } else { /* decode response */
3478 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3483 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3479 3484
3480 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3485 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
3481 cERROR(1, ("Malformed FILE_UNIX_BASIC_INFO response.\n" 3486 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3482 "Unix Extensions can be disabled on mount " 3487 "Unix Extensions can be disabled on mount "
3483 "by specifying the nosfu mount option.")); 3488 "by specifying the nosfu mount option.");
3484 rc = -EIO; /* bad smb */ 3489 rc = -EIO; /* bad smb */
3485 } else { 3490 } else {
3486 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3491 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3512,7 +3517,7 @@ CIFSSMBUnixQPathInfo(const int xid, struct cifsTconInfo *tcon,
3512 int name_len; 3517 int name_len;
3513 __u16 params, byte_count; 3518 __u16 params, byte_count;
3514 3519
3515 cFYI(1, ("In QPathInfo (Unix) the path %s", searchName)); 3520 cFYI(1, "In QPathInfo (Unix) the path %s", searchName);
3516UnixQPathInfoRetry: 3521UnixQPathInfoRetry:
3517 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 3522 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
3518 (void **) &pSMBr); 3523 (void **) &pSMBr);
@@ -3559,14 +3564,14 @@ UnixQPathInfoRetry:
3559 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3564 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3560 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3565 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3561 if (rc) { 3566 if (rc) {
3562 cFYI(1, ("Send error in QPathInfo = %d", rc)); 3567 cFYI(1, "Send error in QPathInfo = %d", rc);
3563 } else { /* decode response */ 3568 } else { /* decode response */
3564 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3569 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3565 3570
3566 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3571 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
3567 cERROR(1, ("Malformed FILE_UNIX_BASIC_INFO response.\n" 3572 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3568 "Unix Extensions can be disabled on mount " 3573 "Unix Extensions can be disabled on mount "
3569 "by specifying the nosfu mount option.")); 3574 "by specifying the nosfu mount option.");
3570 rc = -EIO; /* bad smb */ 3575 rc = -EIO; /* bad smb */
3571 } else { 3576 } else {
3572 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3577 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3600,7 +3605,7 @@ CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
3600 int name_len; 3605 int name_len;
3601 __u16 params, byte_count; 3606 __u16 params, byte_count;
3602 3607
3603 cFYI(1, ("In FindFirst for %s", searchName)); 3608 cFYI(1, "In FindFirst for %s", searchName);
3604 3609
3605findFirstRetry: 3610findFirstRetry:
3606 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 3611 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -3677,7 +3682,7 @@ findFirstRetry:
3677 if (rc) {/* BB add logic to retry regular search if Unix search 3682 if (rc) {/* BB add logic to retry regular search if Unix search
3678 rejected unexpectedly by server */ 3683 rejected unexpectedly by server */
3679 /* BB Add code to handle unsupported level rc */ 3684 /* BB Add code to handle unsupported level rc */
3680 cFYI(1, ("Error in FindFirst = %d", rc)); 3685 cFYI(1, "Error in FindFirst = %d", rc);
3681 3686
3682 cifs_buf_release(pSMB); 3687 cifs_buf_release(pSMB);
3683 3688
@@ -3716,7 +3721,7 @@ findFirstRetry:
3716 lnoff = le16_to_cpu(parms->LastNameOffset); 3721 lnoff = le16_to_cpu(parms->LastNameOffset);
3717 if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < 3722 if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE <
3718 lnoff) { 3723 lnoff) {
3719 cERROR(1, ("ignoring corrupt resume name")); 3724 cERROR(1, "ignoring corrupt resume name");
3720 psrch_inf->last_entry = NULL; 3725 psrch_inf->last_entry = NULL;
3721 return rc; 3726 return rc;
3722 } 3727 }
@@ -3744,7 +3749,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3744 int bytes_returned, name_len; 3749 int bytes_returned, name_len;
3745 __u16 params, byte_count; 3750 __u16 params, byte_count;
3746 3751
3747 cFYI(1, ("In FindNext")); 3752 cFYI(1, "In FindNext");
3748 3753
3749 if (psrch_inf->endOfSearch) 3754 if (psrch_inf->endOfSearch)
3750 return -ENOENT; 3755 return -ENOENT;
@@ -3808,7 +3813,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3808 cifs_buf_release(pSMB); 3813 cifs_buf_release(pSMB);
3809 rc = 0; /* search probably was closed at end of search*/ 3814 rc = 0; /* search probably was closed at end of search*/
3810 } else 3815 } else
3811 cFYI(1, ("FindNext returned = %d", rc)); 3816 cFYI(1, "FindNext returned = %d", rc);
3812 } else { /* decode response */ 3817 } else { /* decode response */
3813 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3818 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3814 3819
@@ -3844,15 +3849,15 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3844 lnoff = le16_to_cpu(parms->LastNameOffset); 3849 lnoff = le16_to_cpu(parms->LastNameOffset);
3845 if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < 3850 if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE <
3846 lnoff) { 3851 lnoff) {
3847 cERROR(1, ("ignoring corrupt resume name")); 3852 cERROR(1, "ignoring corrupt resume name");
3848 psrch_inf->last_entry = NULL; 3853 psrch_inf->last_entry = NULL;
3849 return rc; 3854 return rc;
3850 } else 3855 } else
3851 psrch_inf->last_entry = 3856 psrch_inf->last_entry =
3852 psrch_inf->srch_entries_start + lnoff; 3857 psrch_inf->srch_entries_start + lnoff;
3853 3858
3854/* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", 3859/* cFYI(1, "fnxt2 entries in buf %d index_of_last %d",
3855 psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ 3860 psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry); */
3856 3861
3857 /* BB fixme add unlock here */ 3862 /* BB fixme add unlock here */
3858 } 3863 }
@@ -3877,7 +3882,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
3877 int rc = 0; 3882 int rc = 0;
3878 FINDCLOSE_REQ *pSMB = NULL; 3883 FINDCLOSE_REQ *pSMB = NULL;
3879 3884
3880 cFYI(1, ("In CIFSSMBFindClose")); 3885 cFYI(1, "In CIFSSMBFindClose");
3881 rc = small_smb_init(SMB_COM_FIND_CLOSE2, 1, tcon, (void **)&pSMB); 3886 rc = small_smb_init(SMB_COM_FIND_CLOSE2, 1, tcon, (void **)&pSMB);
3882 3887
3883 /* no sense returning error if session restarted 3888 /* no sense returning error if session restarted
@@ -3891,7 +3896,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
3891 pSMB->ByteCount = 0; 3896 pSMB->ByteCount = 0;
3892 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 3897 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
3893 if (rc) 3898 if (rc)
3894 cERROR(1, ("Send error in FindClose = %d", rc)); 3899 cERROR(1, "Send error in FindClose = %d", rc);
3895 3900
3896 cifs_stats_inc(&tcon->num_fclose); 3901 cifs_stats_inc(&tcon->num_fclose);
3897 3902
@@ -3914,7 +3919,7 @@ CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
3914 int name_len, bytes_returned; 3919 int name_len, bytes_returned;
3915 __u16 params, byte_count; 3920 __u16 params, byte_count;
3916 3921
3917 cFYI(1, ("In GetSrvInodeNum for %s", searchName)); 3922 cFYI(1, "In GetSrvInodeNum for %s", searchName);
3918 if (tcon == NULL) 3923 if (tcon == NULL)
3919 return -ENODEV; 3924 return -ENODEV;
3920 3925
@@ -3964,7 +3969,7 @@ GetInodeNumberRetry:
3964 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3969 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3965 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3970 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3966 if (rc) { 3971 if (rc) {
3967 cFYI(1, ("error %d in QueryInternalInfo", rc)); 3972 cFYI(1, "error %d in QueryInternalInfo", rc);
3968 } else { 3973 } else {
3969 /* decode response */ 3974 /* decode response */
3970 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3975 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -3979,7 +3984,7 @@ GetInodeNumberRetry:
3979 struct file_internal_info *pfinfo; 3984 struct file_internal_info *pfinfo;
3980 /* BB Do we need a cast or hash here ? */ 3985 /* BB Do we need a cast or hash here ? */
3981 if (count < 8) { 3986 if (count < 8) {
3982 cFYI(1, ("Illegal size ret in QryIntrnlInf")); 3987 cFYI(1, "Illegal size ret in QryIntrnlInf");
3983 rc = -EIO; 3988 rc = -EIO;
3984 goto GetInodeNumOut; 3989 goto GetInodeNumOut;
3985 } 3990 }
@@ -4020,16 +4025,16 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
4020 *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals); 4025 *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
4021 4026
4022 if (*num_of_nodes < 1) { 4027 if (*num_of_nodes < 1) {
4023 cERROR(1, ("num_referrals: must be at least > 0," 4028 cERROR(1, "num_referrals: must be at least > 0,"
4024 "but we get num_referrals = %d\n", *num_of_nodes)); 4029 "but we get num_referrals = %d\n", *num_of_nodes);
4025 rc = -EINVAL; 4030 rc = -EINVAL;
4026 goto parse_DFS_referrals_exit; 4031 goto parse_DFS_referrals_exit;
4027 } 4032 }
4028 4033
4029 ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals); 4034 ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals);
4030 if (ref->VersionNumber != cpu_to_le16(3)) { 4035 if (ref->VersionNumber != cpu_to_le16(3)) {
4031 cERROR(1, ("Referrals of V%d version are not supported," 4036 cERROR(1, "Referrals of V%d version are not supported,"
4032 "should be V3", le16_to_cpu(ref->VersionNumber))); 4037 "should be V3", le16_to_cpu(ref->VersionNumber));
4033 rc = -EINVAL; 4038 rc = -EINVAL;
4034 goto parse_DFS_referrals_exit; 4039 goto parse_DFS_referrals_exit;
4035 } 4040 }
@@ -4038,14 +4043,14 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
4038 data_end = (char *)(&(pSMBr->PathConsumed)) + 4043 data_end = (char *)(&(pSMBr->PathConsumed)) +
4039 le16_to_cpu(pSMBr->t2.DataCount); 4044 le16_to_cpu(pSMBr->t2.DataCount);
4040 4045
4041 cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n", 4046 cFYI(1, "num_referrals: %d dfs flags: 0x%x ...\n",
4042 *num_of_nodes, 4047 *num_of_nodes,
4043 le32_to_cpu(pSMBr->DFSFlags))); 4048 le32_to_cpu(pSMBr->DFSFlags));
4044 4049
4045 *target_nodes = kzalloc(sizeof(struct dfs_info3_param) * 4050 *target_nodes = kzalloc(sizeof(struct dfs_info3_param) *
4046 *num_of_nodes, GFP_KERNEL); 4051 *num_of_nodes, GFP_KERNEL);
4047 if (*target_nodes == NULL) { 4052 if (*target_nodes == NULL) {
4048 cERROR(1, ("Failed to allocate buffer for target_nodes\n")); 4053 cERROR(1, "Failed to allocate buffer for target_nodes\n");
4049 rc = -ENOMEM; 4054 rc = -ENOMEM;
4050 goto parse_DFS_referrals_exit; 4055 goto parse_DFS_referrals_exit;
4051 } 4056 }
@@ -4121,7 +4126,7 @@ CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses,
4121 *num_of_nodes = 0; 4126 *num_of_nodes = 0;
4122 *target_nodes = NULL; 4127 *target_nodes = NULL;
4123 4128
4124 cFYI(1, ("In GetDFSRefer the path %s", searchName)); 4129 cFYI(1, "In GetDFSRefer the path %s", searchName);
4125 if (ses == NULL) 4130 if (ses == NULL)
4126 return -ENODEV; 4131 return -ENODEV;
4127getDFSRetry: 4132getDFSRetry:
@@ -4188,7 +4193,7 @@ getDFSRetry:
4188 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 4193 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
4189 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4194 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4190 if (rc) { 4195 if (rc) {
4191 cFYI(1, ("Send error in GetDFSRefer = %d", rc)); 4196 cFYI(1, "Send error in GetDFSRefer = %d", rc);
4192 goto GetDFSRefExit; 4197 goto GetDFSRefExit;
4193 } 4198 }
4194 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4199 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -4199,9 +4204,9 @@ getDFSRetry:
4199 goto GetDFSRefExit; 4204 goto GetDFSRefExit;
4200 } 4205 }
4201 4206
4202 cFYI(1, ("Decoding GetDFSRefer response BCC: %d Offset %d", 4207 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d",
4203 pSMBr->ByteCount, 4208 pSMBr->ByteCount,
4204 le16_to_cpu(pSMBr->t2.DataOffset))); 4209 le16_to_cpu(pSMBr->t2.DataOffset));
4205 4210
4206 /* parse returned result into more usable form */ 4211 /* parse returned result into more usable form */
4207 rc = parse_DFS_referrals(pSMBr, num_of_nodes, 4212 rc = parse_DFS_referrals(pSMBr, num_of_nodes,
@@ -4229,7 +4234,7 @@ SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData)
4229 int bytes_returned = 0; 4234 int bytes_returned = 0;
4230 __u16 params, byte_count; 4235 __u16 params, byte_count;
4231 4236
4232 cFYI(1, ("OldQFSInfo")); 4237 cFYI(1, "OldQFSInfo");
4233oldQFSInfoRetry: 4238oldQFSInfoRetry:
4234 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4239 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4235 (void **) &pSMBr); 4240 (void **) &pSMBr);
@@ -4262,7 +4267,7 @@ oldQFSInfoRetry:
4262 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4267 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4263 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4268 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4264 if (rc) { 4269 if (rc) {
4265 cFYI(1, ("Send error in QFSInfo = %d", rc)); 4270 cFYI(1, "Send error in QFSInfo = %d", rc);
4266 } else { /* decode response */ 4271 } else { /* decode response */
4267 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4272 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4268 4273
@@ -4270,8 +4275,8 @@ oldQFSInfoRetry:
4270 rc = -EIO; /* bad smb */ 4275 rc = -EIO; /* bad smb */
4271 else { 4276 else {
4272 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4277 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
4273 cFYI(1, ("qfsinf resp BCC: %d Offset %d", 4278 cFYI(1, "qfsinf resp BCC: %d Offset %d",
4274 pSMBr->ByteCount, data_offset)); 4279 pSMBr->ByteCount, data_offset);
4275 4280
4276 response_data = (FILE_SYSTEM_ALLOC_INFO *) 4281 response_data = (FILE_SYSTEM_ALLOC_INFO *)
4277 (((char *) &pSMBr->hdr.Protocol) + data_offset); 4282 (((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4283,11 +4288,10 @@ oldQFSInfoRetry:
4283 le32_to_cpu(response_data->TotalAllocationUnits); 4288 le32_to_cpu(response_data->TotalAllocationUnits);
4284 FSData->f_bfree = FSData->f_bavail = 4289 FSData->f_bfree = FSData->f_bavail =
4285 le32_to_cpu(response_data->FreeAllocationUnits); 4290 le32_to_cpu(response_data->FreeAllocationUnits);
4286 cFYI(1, 4291 cFYI(1, "Blocks: %lld Free: %lld Block size %ld",
4287 ("Blocks: %lld Free: %lld Block size %ld", 4292 (unsigned long long)FSData->f_blocks,
4288 (unsigned long long)FSData->f_blocks, 4293 (unsigned long long)FSData->f_bfree,
4289 (unsigned long long)FSData->f_bfree, 4294 FSData->f_bsize);
4290 FSData->f_bsize));
4291 } 4295 }
4292 } 4296 }
4293 cifs_buf_release(pSMB); 4297 cifs_buf_release(pSMB);
@@ -4309,7 +4313,7 @@ CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData)
4309 int bytes_returned = 0; 4313 int bytes_returned = 0;
4310 __u16 params, byte_count; 4314 __u16 params, byte_count;
4311 4315
4312 cFYI(1, ("In QFSInfo")); 4316 cFYI(1, "In QFSInfo");
4313QFSInfoRetry: 4317QFSInfoRetry:
4314 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4318 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4315 (void **) &pSMBr); 4319 (void **) &pSMBr);
@@ -4342,7 +4346,7 @@ QFSInfoRetry:
4342 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4346 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4343 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4347 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4344 if (rc) { 4348 if (rc) {
4345 cFYI(1, ("Send error in QFSInfo = %d", rc)); 4349 cFYI(1, "Send error in QFSInfo = %d", rc);
4346 } else { /* decode response */ 4350 } else { /* decode response */
4347 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4351 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4348 4352
@@ -4363,11 +4367,10 @@ QFSInfoRetry:
4363 le64_to_cpu(response_data->TotalAllocationUnits); 4367 le64_to_cpu(response_data->TotalAllocationUnits);
4364 FSData->f_bfree = FSData->f_bavail = 4368 FSData->f_bfree = FSData->f_bavail =
4365 le64_to_cpu(response_data->FreeAllocationUnits); 4369 le64_to_cpu(response_data->FreeAllocationUnits);
4366 cFYI(1, 4370 cFYI(1, "Blocks: %lld Free: %lld Block size %ld",
4367 ("Blocks: %lld Free: %lld Block size %ld", 4371 (unsigned long long)FSData->f_blocks,
4368 (unsigned long long)FSData->f_blocks, 4372 (unsigned long long)FSData->f_bfree,
4369 (unsigned long long)FSData->f_bfree, 4373 FSData->f_bsize);
4370 FSData->f_bsize));
4371 } 4374 }
4372 } 4375 }
4373 cifs_buf_release(pSMB); 4376 cifs_buf_release(pSMB);
@@ -4389,7 +4392,7 @@ CIFSSMBQFSAttributeInfo(const int xid, struct cifsTconInfo *tcon)
4389 int bytes_returned = 0; 4392 int bytes_returned = 0;
4390 __u16 params, byte_count; 4393 __u16 params, byte_count;
4391 4394
4392 cFYI(1, ("In QFSAttributeInfo")); 4395 cFYI(1, "In QFSAttributeInfo");
4393QFSAttributeRetry: 4396QFSAttributeRetry:
4394 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4397 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4395 (void **) &pSMBr); 4398 (void **) &pSMBr);
@@ -4423,7 +4426,7 @@ QFSAttributeRetry:
4423 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4426 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4424 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4427 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4425 if (rc) { 4428 if (rc) {
4426 cERROR(1, ("Send error in QFSAttributeInfo = %d", rc)); 4429 cERROR(1, "Send error in QFSAttributeInfo = %d", rc);
4427 } else { /* decode response */ 4430 } else { /* decode response */
4428 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4431 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4429 4432
@@ -4459,7 +4462,7 @@ CIFSSMBQFSDeviceInfo(const int xid, struct cifsTconInfo *tcon)
4459 int bytes_returned = 0; 4462 int bytes_returned = 0;
4460 __u16 params, byte_count; 4463 __u16 params, byte_count;
4461 4464
4462 cFYI(1, ("In QFSDeviceInfo")); 4465 cFYI(1, "In QFSDeviceInfo");
4463QFSDeviceRetry: 4466QFSDeviceRetry:
4464 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4467 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4465 (void **) &pSMBr); 4468 (void **) &pSMBr);
@@ -4494,7 +4497,7 @@ QFSDeviceRetry:
4494 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4497 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4495 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4498 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4496 if (rc) { 4499 if (rc) {
4497 cFYI(1, ("Send error in QFSDeviceInfo = %d", rc)); 4500 cFYI(1, "Send error in QFSDeviceInfo = %d", rc);
4498 } else { /* decode response */ 4501 } else { /* decode response */
4499 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4502 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4500 4503
@@ -4529,7 +4532,7 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon)
4529 int bytes_returned = 0; 4532 int bytes_returned = 0;
4530 __u16 params, byte_count; 4533 __u16 params, byte_count;
4531 4534
4532 cFYI(1, ("In QFSUnixInfo")); 4535 cFYI(1, "In QFSUnixInfo");
4533QFSUnixRetry: 4536QFSUnixRetry:
4534 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4537 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4535 (void **) &pSMBr); 4538 (void **) &pSMBr);
@@ -4563,7 +4566,7 @@ QFSUnixRetry:
4563 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4566 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4564 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4567 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4565 if (rc) { 4568 if (rc) {
4566 cERROR(1, ("Send error in QFSUnixInfo = %d", rc)); 4569 cERROR(1, "Send error in QFSUnixInfo = %d", rc);
4567 } else { /* decode response */ 4570 } else { /* decode response */
4568 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4571 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4569 4572
@@ -4598,7 +4601,7 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap)
4598 int bytes_returned = 0; 4601 int bytes_returned = 0;
4599 __u16 params, param_offset, offset, byte_count; 4602 __u16 params, param_offset, offset, byte_count;
4600 4603
4601 cFYI(1, ("In SETFSUnixInfo")); 4604 cFYI(1, "In SETFSUnixInfo");
4602SETFSUnixRetry: 4605SETFSUnixRetry:
4603 /* BB switch to small buf init to save memory */ 4606 /* BB switch to small buf init to save memory */
4604 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4607 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -4646,7 +4649,7 @@ SETFSUnixRetry:
4646 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4649 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4647 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4650 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4648 if (rc) { 4651 if (rc) {
4649 cERROR(1, ("Send error in SETFSUnixInfo = %d", rc)); 4652 cERROR(1, "Send error in SETFSUnixInfo = %d", rc);
4650 } else { /* decode response */ 4653 } else { /* decode response */
4651 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4654 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4652 if (rc) 4655 if (rc)
@@ -4674,7 +4677,7 @@ CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
4674 int bytes_returned = 0; 4677 int bytes_returned = 0;
4675 __u16 params, byte_count; 4678 __u16 params, byte_count;
4676 4679
4677 cFYI(1, ("In QFSPosixInfo")); 4680 cFYI(1, "In QFSPosixInfo");
4678QFSPosixRetry: 4681QFSPosixRetry:
4679 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4682 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4680 (void **) &pSMBr); 4683 (void **) &pSMBr);
@@ -4708,7 +4711,7 @@ QFSPosixRetry:
4708 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4711 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4709 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4712 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4710 if (rc) { 4713 if (rc) {
4711 cFYI(1, ("Send error in QFSUnixInfo = %d", rc)); 4714 cFYI(1, "Send error in QFSUnixInfo = %d", rc);
4712 } else { /* decode response */ 4715 } else { /* decode response */
4713 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4716 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4714 4717
@@ -4768,7 +4771,7 @@ CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName,
4768 int bytes_returned = 0; 4771 int bytes_returned = 0;
4769 __u16 params, byte_count, data_count, param_offset, offset; 4772 __u16 params, byte_count, data_count, param_offset, offset;
4770 4773
4771 cFYI(1, ("In SetEOF")); 4774 cFYI(1, "In SetEOF");
4772SetEOFRetry: 4775SetEOFRetry:
4773 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4776 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4774 (void **) &pSMBr); 4777 (void **) &pSMBr);
@@ -4834,7 +4837,7 @@ SetEOFRetry:
4834 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4837 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4835 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4838 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4836 if (rc) 4839 if (rc)
4837 cFYI(1, ("SetPathInfo (file size) returned %d", rc)); 4840 cFYI(1, "SetPathInfo (file size) returned %d", rc);
4838 4841
4839 cifs_buf_release(pSMB); 4842 cifs_buf_release(pSMB);
4840 4843
@@ -4854,8 +4857,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4854 int rc = 0; 4857 int rc = 0;
4855 __u16 params, param_offset, offset, byte_count, count; 4858 __u16 params, param_offset, offset, byte_count, count;
4856 4859
4857 cFYI(1, ("SetFileSize (via SetFileInfo) %lld", 4860 cFYI(1, "SetFileSize (via SetFileInfo) %lld",
4858 (long long)size)); 4861 (long long)size);
4859 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); 4862 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
4860 4863
4861 if (rc) 4864 if (rc)
@@ -4914,9 +4917,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4914 pSMB->ByteCount = cpu_to_le16(byte_count); 4917 pSMB->ByteCount = cpu_to_le16(byte_count);
4915 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4918 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4916 if (rc) { 4919 if (rc) {
4917 cFYI(1, 4920 cFYI(1, "Send error in SetFileInfo (SetFileSize) = %d", rc);
4918 ("Send error in SetFileInfo (SetFileSize) = %d",
4919 rc));
4920 } 4921 }
4921 4922
4922 /* Note: On -EAGAIN error only caller can retry on handle based calls 4923 /* Note: On -EAGAIN error only caller can retry on handle based calls
@@ -4940,7 +4941,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
4940 int rc = 0; 4941 int rc = 0;
4941 __u16 params, param_offset, offset, byte_count, count; 4942 __u16 params, param_offset, offset, byte_count, count;
4942 4943
4943 cFYI(1, ("Set Times (via SetFileInfo)")); 4944 cFYI(1, "Set Times (via SetFileInfo)");
4944 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); 4945 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
4945 4946
4946 if (rc) 4947 if (rc)
@@ -4985,7 +4986,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
4985 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 4986 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
4986 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4987 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4987 if (rc) 4988 if (rc)
4988 cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc)); 4989 cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc);
4989 4990
4990 /* Note: On -EAGAIN error only caller can retry on handle based calls 4991 /* Note: On -EAGAIN error only caller can retry on handle based calls
4991 since file handle passed in no longer valid */ 4992 since file handle passed in no longer valid */
@@ -5002,7 +5003,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5002 int rc = 0; 5003 int rc = 0;
5003 __u16 params, param_offset, offset, byte_count, count; 5004 __u16 params, param_offset, offset, byte_count, count;
5004 5005
5005 cFYI(1, ("Set File Disposition (via SetFileInfo)")); 5006 cFYI(1, "Set File Disposition (via SetFileInfo)");
5006 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); 5007 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
5007 5008
5008 if (rc) 5009 if (rc)
@@ -5044,7 +5045,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5044 *data_offset = delete_file ? 1 : 0; 5045 *data_offset = delete_file ? 1 : 0;
5045 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5046 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
5046 if (rc) 5047 if (rc)
5047 cFYI(1, ("Send error in SetFileDisposition = %d", rc)); 5048 cFYI(1, "Send error in SetFileDisposition = %d", rc);
5048 5049
5049 return rc; 5050 return rc;
5050} 5051}
@@ -5062,7 +5063,7 @@ CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
5062 char *data_offset; 5063 char *data_offset;
5063 __u16 params, param_offset, offset, byte_count, count; 5064 __u16 params, param_offset, offset, byte_count, count;
5064 5065
5065 cFYI(1, ("In SetTimes")); 5066 cFYI(1, "In SetTimes");
5066 5067
5067SetTimesRetry: 5068SetTimesRetry:
5068 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 5069 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -5118,7 +5119,7 @@ SetTimesRetry:
5118 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5119 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5119 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5120 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5120 if (rc) 5121 if (rc)
5121 cFYI(1, ("SetPathInfo (times) returned %d", rc)); 5122 cFYI(1, "SetPathInfo (times) returned %d", rc);
5122 5123
5123 cifs_buf_release(pSMB); 5124 cifs_buf_release(pSMB);
5124 5125
@@ -5143,7 +5144,7 @@ CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, char *fileName,
5143 int bytes_returned; 5144 int bytes_returned;
5144 int name_len; 5145 int name_len;
5145 5146
5146 cFYI(1, ("In SetAttrLegacy")); 5147 cFYI(1, "In SetAttrLegacy");
5147 5148
5148SetAttrLgcyRetry: 5149SetAttrLgcyRetry:
5149 rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB, 5150 rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB,
@@ -5169,7 +5170,7 @@ SetAttrLgcyRetry:
5169 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5170 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5170 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5171 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5171 if (rc) 5172 if (rc)
5172 cFYI(1, ("Error in LegacySetAttr = %d", rc)); 5173 cFYI(1, "Error in LegacySetAttr = %d", rc);
5173 5174
5174 cifs_buf_release(pSMB); 5175 cifs_buf_release(pSMB);
5175 5176
@@ -5231,7 +5232,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5231 int rc = 0; 5232 int rc = 0;
5232 u16 params, param_offset, offset, byte_count, count; 5233 u16 params, param_offset, offset, byte_count, count;
5233 5234
5234 cFYI(1, ("Set Unix Info (via SetFileInfo)")); 5235 cFYI(1, "Set Unix Info (via SetFileInfo)");
5235 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); 5236 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
5236 5237
5237 if (rc) 5238 if (rc)
@@ -5276,7 +5277,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5276 5277
5277 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5278 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
5278 if (rc) 5279 if (rc)
5279 cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc)); 5280 cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc);
5280 5281
5281 /* Note: On -EAGAIN error only caller can retry on handle based calls 5282 /* Note: On -EAGAIN error only caller can retry on handle based calls
5282 since file handle passed in no longer valid */ 5283 since file handle passed in no longer valid */
@@ -5297,7 +5298,7 @@ CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
5297 FILE_UNIX_BASIC_INFO *data_offset; 5298 FILE_UNIX_BASIC_INFO *data_offset;
5298 __u16 params, param_offset, offset, count, byte_count; 5299 __u16 params, param_offset, offset, count, byte_count;
5299 5300
5300 cFYI(1, ("In SetUID/GID/Mode")); 5301 cFYI(1, "In SetUID/GID/Mode");
5301setPermsRetry: 5302setPermsRetry:
5302 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 5303 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
5303 (void **) &pSMBr); 5304 (void **) &pSMBr);
@@ -5353,7 +5354,7 @@ setPermsRetry:
5353 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5354 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5354 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5355 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5355 if (rc) 5356 if (rc)
5356 cFYI(1, ("SetPathInfo (perms) returned %d", rc)); 5357 cFYI(1, "SetPathInfo (perms) returned %d", rc);
5357 5358
5358 cifs_buf_release(pSMB); 5359 cifs_buf_release(pSMB);
5359 if (rc == -EAGAIN) 5360 if (rc == -EAGAIN)
@@ -5372,7 +5373,7 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5372 struct dir_notify_req *dnotify_req; 5373 struct dir_notify_req *dnotify_req;
5373 int bytes_returned; 5374 int bytes_returned;
5374 5375
5375 cFYI(1, ("In CIFSSMBNotify for file handle %d", (int)netfid)); 5376 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5376 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, 5377 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5377 (void **) &pSMBr); 5378 (void **) &pSMBr);
5378 if (rc) 5379 if (rc)
@@ -5406,7 +5407,7 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5406 (struct smb_hdr *)pSMBr, &bytes_returned, 5407 (struct smb_hdr *)pSMBr, &bytes_returned,
5407 CIFS_ASYNC_OP); 5408 CIFS_ASYNC_OP);
5408 if (rc) { 5409 if (rc) {
5409 cFYI(1, ("Error in Notify = %d", rc)); 5410 cFYI(1, "Error in Notify = %d", rc);
5410 } else { 5411 } else {
5411 /* Add file to outstanding requests */ 5412 /* Add file to outstanding requests */
5412 /* BB change to kmem cache alloc */ 5413 /* BB change to kmem cache alloc */
@@ -5462,7 +5463,7 @@ CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
5462 char *end_of_smb; 5463 char *end_of_smb;
5463 __u16 params, byte_count, data_offset; 5464 __u16 params, byte_count, data_offset;
5464 5465
5465 cFYI(1, ("In Query All EAs path %s", searchName)); 5466 cFYI(1, "In Query All EAs path %s", searchName);
5466QAllEAsRetry: 5467QAllEAsRetry:
5467 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 5468 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
5468 (void **) &pSMBr); 5469 (void **) &pSMBr);
@@ -5509,7 +5510,7 @@ QAllEAsRetry:
5509 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5510 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5510 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5511 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5511 if (rc) { 5512 if (rc) {
5512 cFYI(1, ("Send error in QueryAllEAs = %d", rc)); 5513 cFYI(1, "Send error in QueryAllEAs = %d", rc);
5513 goto QAllEAsOut; 5514 goto QAllEAsOut;
5514 } 5515 }
5515 5516
@@ -5537,16 +5538,16 @@ QAllEAsRetry:
5537 (((char *) &pSMBr->hdr.Protocol) + data_offset); 5538 (((char *) &pSMBr->hdr.Protocol) + data_offset);
5538 5539
5539 list_len = le32_to_cpu(ea_response_data->list_len); 5540 list_len = le32_to_cpu(ea_response_data->list_len);
5540 cFYI(1, ("ea length %d", list_len)); 5541 cFYI(1, "ea length %d", list_len);
5541 if (list_len <= 8) { 5542 if (list_len <= 8) {
5542 cFYI(1, ("empty EA list returned from server")); 5543 cFYI(1, "empty EA list returned from server");
5543 goto QAllEAsOut; 5544 goto QAllEAsOut;
5544 } 5545 }
5545 5546
5546 /* make sure list_len doesn't go past end of SMB */ 5547 /* make sure list_len doesn't go past end of SMB */
5547 end_of_smb = (char *)pByteArea(&pSMBr->hdr) + BCC(&pSMBr->hdr); 5548 end_of_smb = (char *)pByteArea(&pSMBr->hdr) + BCC(&pSMBr->hdr);
5548 if ((char *)ea_response_data + list_len > end_of_smb) { 5549 if ((char *)ea_response_data + list_len > end_of_smb) {
5549 cFYI(1, ("EA list appears to go beyond SMB")); 5550 cFYI(1, "EA list appears to go beyond SMB");
5550 rc = -EIO; 5551 rc = -EIO;
5551 goto QAllEAsOut; 5552 goto QAllEAsOut;
5552 } 5553 }
@@ -5563,7 +5564,7 @@ QAllEAsRetry:
5563 temp_ptr += 4; 5564 temp_ptr += 4;
5564 /* make sure we can read name_len and value_len */ 5565 /* make sure we can read name_len and value_len */
5565 if (list_len < 0) { 5566 if (list_len < 0) {
5566 cFYI(1, ("EA entry goes beyond length of list")); 5567 cFYI(1, "EA entry goes beyond length of list");
5567 rc = -EIO; 5568 rc = -EIO;
5568 goto QAllEAsOut; 5569 goto QAllEAsOut;
5569 } 5570 }
@@ -5572,7 +5573,7 @@ QAllEAsRetry:
5572 value_len = le16_to_cpu(temp_fea->value_len); 5573 value_len = le16_to_cpu(temp_fea->value_len);
5573 list_len -= name_len + 1 + value_len; 5574 list_len -= name_len + 1 + value_len;
5574 if (list_len < 0) { 5575 if (list_len < 0) {
5575 cFYI(1, ("EA entry goes beyond length of list")); 5576 cFYI(1, "EA entry goes beyond length of list");
5576 rc = -EIO; 5577 rc = -EIO;
5577 goto QAllEAsOut; 5578 goto QAllEAsOut;
5578 } 5579 }
@@ -5639,7 +5640,7 @@ CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, const char *fileName,
5639 int bytes_returned = 0; 5640 int bytes_returned = 0;
5640 __u16 params, param_offset, byte_count, offset, count; 5641 __u16 params, param_offset, byte_count, offset, count;
5641 5642
5642 cFYI(1, ("In SetEA")); 5643 cFYI(1, "In SetEA");
5643SetEARetry: 5644SetEARetry:
5644 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 5645 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
5645 (void **) &pSMBr); 5646 (void **) &pSMBr);
@@ -5721,7 +5722,7 @@ SetEARetry:
5721 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5722 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5722 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5723 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5723 if (rc) 5724 if (rc)
5724 cFYI(1, ("SetPathInfo (EA) returned %d", rc)); 5725 cFYI(1, "SetPathInfo (EA) returned %d", rc);
5725 5726
5726 cifs_buf_release(pSMB); 5727 cifs_buf_release(pSMB);
5727 5728
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d9566bf8f917..2208f06e4c45 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -102,6 +102,7 @@ struct smb_vol {
102 bool sockopt_tcp_nodelay:1; 102 bool sockopt_tcp_nodelay:1;
103 unsigned short int port; 103 unsigned short int port;
104 char *prepath; 104 char *prepath;
105 struct nls_table *local_nls;
105}; 106};
106 107
107static int ipv4_connect(struct TCP_Server_Info *server); 108static int ipv4_connect(struct TCP_Server_Info *server);
@@ -135,7 +136,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
135 spin_unlock(&GlobalMid_Lock); 136 spin_unlock(&GlobalMid_Lock);
136 server->maxBuf = 0; 137 server->maxBuf = 0;
137 138
138 cFYI(1, ("Reconnecting tcp session")); 139 cFYI(1, "Reconnecting tcp session");
139 140
140 /* before reconnecting the tcp session, mark the smb session (uid) 141 /* before reconnecting the tcp session, mark the smb session (uid)
141 and the tid bad so they are not used until reconnected */ 142 and the tid bad so they are not used until reconnected */
@@ -153,12 +154,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
153 /* do not want to be sending data on a socket we are freeing */ 154 /* do not want to be sending data on a socket we are freeing */
154 mutex_lock(&server->srv_mutex); 155 mutex_lock(&server->srv_mutex);
155 if (server->ssocket) { 156 if (server->ssocket) {
156 cFYI(1, ("State: 0x%x Flags: 0x%lx", server->ssocket->state, 157 cFYI(1, "State: 0x%x Flags: 0x%lx", server->ssocket->state,
157 server->ssocket->flags)); 158 server->ssocket->flags);
158 kernel_sock_shutdown(server->ssocket, SHUT_WR); 159 kernel_sock_shutdown(server->ssocket, SHUT_WR);
159 cFYI(1, ("Post shutdown state: 0x%x Flags: 0x%lx", 160 cFYI(1, "Post shutdown state: 0x%x Flags: 0x%lx",
160 server->ssocket->state, 161 server->ssocket->state,
161 server->ssocket->flags)); 162 server->ssocket->flags);
162 sock_release(server->ssocket); 163 sock_release(server->ssocket);
163 server->ssocket = NULL; 164 server->ssocket = NULL;
164 } 165 }
@@ -187,7 +188,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
187 else 188 else
188 rc = ipv4_connect(server); 189 rc = ipv4_connect(server);
189 if (rc) { 190 if (rc) {
190 cFYI(1, ("reconnect error %d", rc)); 191 cFYI(1, "reconnect error %d", rc);
191 msleep(3000); 192 msleep(3000);
192 } else { 193 } else {
193 atomic_inc(&tcpSesReconnectCount); 194 atomic_inc(&tcpSesReconnectCount);
@@ -223,7 +224,7 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
223 /* check for plausible wct, bcc and t2 data and parm sizes */ 224 /* check for plausible wct, bcc and t2 data and parm sizes */
224 /* check for parm and data offset going beyond end of smb */ 225 /* check for parm and data offset going beyond end of smb */
225 if (pSMB->WordCount != 10) { /* coalesce_t2 depends on this */ 226 if (pSMB->WordCount != 10) { /* coalesce_t2 depends on this */
226 cFYI(1, ("invalid transact2 word count")); 227 cFYI(1, "invalid transact2 word count");
227 return -EINVAL; 228 return -EINVAL;
228 } 229 }
229 230
@@ -237,15 +238,15 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
237 if (remaining == 0) 238 if (remaining == 0)
238 return 0; 239 return 0;
239 else if (remaining < 0) { 240 else if (remaining < 0) {
240 cFYI(1, ("total data %d smaller than data in frame %d", 241 cFYI(1, "total data %d smaller than data in frame %d",
241 total_data_size, data_in_this_rsp)); 242 total_data_size, data_in_this_rsp);
242 return -EINVAL; 243 return -EINVAL;
243 } else { 244 } else {
244 cFYI(1, ("missing %d bytes from transact2, check next response", 245 cFYI(1, "missing %d bytes from transact2, check next response",
245 remaining)); 246 remaining);
246 if (total_data_size > maxBufSize) { 247 if (total_data_size > maxBufSize) {
247 cERROR(1, ("TotalDataSize %d is over maximum buffer %d", 248 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
248 total_data_size, maxBufSize)); 249 total_data_size, maxBufSize);
249 return -EINVAL; 250 return -EINVAL;
250 } 251 }
251 return remaining; 252 return remaining;
@@ -267,7 +268,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
267 total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount); 268 total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount);
268 269
269 if (total_data_size != le16_to_cpu(pSMB2->t2_rsp.TotalDataCount)) { 270 if (total_data_size != le16_to_cpu(pSMB2->t2_rsp.TotalDataCount)) {
270 cFYI(1, ("total data size of primary and secondary t2 differ")); 271 cFYI(1, "total data size of primary and secondary t2 differ");
271 } 272 }
272 273
273 total_in_buf = le16_to_cpu(pSMBt->t2_rsp.DataCount); 274 total_in_buf = le16_to_cpu(pSMBt->t2_rsp.DataCount);
@@ -282,7 +283,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
282 283
283 total_in_buf2 = le16_to_cpu(pSMB2->t2_rsp.DataCount); 284 total_in_buf2 = le16_to_cpu(pSMB2->t2_rsp.DataCount);
284 if (remaining < total_in_buf2) { 285 if (remaining < total_in_buf2) {
285 cFYI(1, ("transact2 2nd response contains too much data")); 286 cFYI(1, "transact2 2nd response contains too much data");
286 } 287 }
287 288
288 /* find end of first SMB data area */ 289 /* find end of first SMB data area */
@@ -311,7 +312,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
311 pTargetSMB->smb_buf_length = byte_count; 312 pTargetSMB->smb_buf_length = byte_count;
312 313
313 if (remaining == total_in_buf2) { 314 if (remaining == total_in_buf2) {
314 cFYI(1, ("found the last secondary response")); 315 cFYI(1, "found the last secondary response");
315 return 0; /* we are done */ 316 return 0; /* we are done */
316 } else /* more responses to go */ 317 } else /* more responses to go */
317 return 1; 318 return 1;
@@ -339,7 +340,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
339 int reconnect; 340 int reconnect;
340 341
341 current->flags |= PF_MEMALLOC; 342 current->flags |= PF_MEMALLOC;
342 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); 343 cFYI(1, "Demultiplex PID: %d", task_pid_nr(current));
343 344
344 length = atomic_inc_return(&tcpSesAllocCount); 345 length = atomic_inc_return(&tcpSesAllocCount);
345 if (length > 1) 346 if (length > 1)
@@ -353,7 +354,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
353 if (bigbuf == NULL) { 354 if (bigbuf == NULL) {
354 bigbuf = cifs_buf_get(); 355 bigbuf = cifs_buf_get();
355 if (!bigbuf) { 356 if (!bigbuf) {
356 cERROR(1, ("No memory for large SMB response")); 357 cERROR(1, "No memory for large SMB response");
357 msleep(3000); 358 msleep(3000);
358 /* retry will check if exiting */ 359 /* retry will check if exiting */
359 continue; 360 continue;
@@ -366,7 +367,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
366 if (smallbuf == NULL) { 367 if (smallbuf == NULL) {
367 smallbuf = cifs_small_buf_get(); 368 smallbuf = cifs_small_buf_get();
368 if (!smallbuf) { 369 if (!smallbuf) {
369 cERROR(1, ("No memory for SMB response")); 370 cERROR(1, "No memory for SMB response");
370 msleep(1000); 371 msleep(1000);
371 /* retry will check if exiting */ 372 /* retry will check if exiting */
372 continue; 373 continue;
@@ -391,9 +392,9 @@ incomplete_rcv:
391 if (server->tcpStatus == CifsExiting) { 392 if (server->tcpStatus == CifsExiting) {
392 break; 393 break;
393 } else if (server->tcpStatus == CifsNeedReconnect) { 394 } else if (server->tcpStatus == CifsNeedReconnect) {
394 cFYI(1, ("Reconnect after server stopped responding")); 395 cFYI(1, "Reconnect after server stopped responding");
395 cifs_reconnect(server); 396 cifs_reconnect(server);
396 cFYI(1, ("call to reconnect done")); 397 cFYI(1, "call to reconnect done");
397 csocket = server->ssocket; 398 csocket = server->ssocket;
398 continue; 399 continue;
399 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { 400 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) {
@@ -411,7 +412,7 @@ incomplete_rcv:
411 continue; 412 continue;
412 } else if (length <= 0) { 413 } else if (length <= 0) {
413 if (server->tcpStatus == CifsNew) { 414 if (server->tcpStatus == CifsNew) {
414 cFYI(1, ("tcp session abend after SMBnegprot")); 415 cFYI(1, "tcp session abend after SMBnegprot");
415 /* some servers kill the TCP session rather than 416 /* some servers kill the TCP session rather than
416 returning an SMB negprot error, in which 417 returning an SMB negprot error, in which
417 case reconnecting here is not going to help, 418 case reconnecting here is not going to help,
@@ -419,18 +420,18 @@ incomplete_rcv:
419 break; 420 break;
420 } 421 }
421 if (!try_to_freeze() && (length == -EINTR)) { 422 if (!try_to_freeze() && (length == -EINTR)) {
422 cFYI(1, ("cifsd thread killed")); 423 cFYI(1, "cifsd thread killed");
423 break; 424 break;
424 } 425 }
425 cFYI(1, ("Reconnect after unexpected peek error %d", 426 cFYI(1, "Reconnect after unexpected peek error %d",
426 length)); 427 length);
427 cifs_reconnect(server); 428 cifs_reconnect(server);
428 csocket = server->ssocket; 429 csocket = server->ssocket;
429 wake_up(&server->response_q); 430 wake_up(&server->response_q);
430 continue; 431 continue;
431 } else if (length < pdu_length) { 432 } else if (length < pdu_length) {
432 cFYI(1, ("requested %d bytes but only got %d bytes", 433 cFYI(1, "requested %d bytes but only got %d bytes",
433 pdu_length, length)); 434 pdu_length, length);
434 pdu_length -= length; 435 pdu_length -= length;
435 msleep(1); 436 msleep(1);
436 goto incomplete_rcv; 437 goto incomplete_rcv;
@@ -450,18 +451,18 @@ incomplete_rcv:
450 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length); 451 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length);
451 smb_buffer->smb_buf_length = pdu_length; 452 smb_buffer->smb_buf_length = pdu_length;
452 453
453 cFYI(1, ("rfc1002 length 0x%x", pdu_length+4)); 454 cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
454 455
455 if (temp == (char) RFC1002_SESSION_KEEP_ALIVE) { 456 if (temp == (char) RFC1002_SESSION_KEEP_ALIVE) {
456 continue; 457 continue;
457 } else if (temp == (char)RFC1002_POSITIVE_SESSION_RESPONSE) { 458 } else if (temp == (char)RFC1002_POSITIVE_SESSION_RESPONSE) {
458 cFYI(1, ("Good RFC 1002 session rsp")); 459 cFYI(1, "Good RFC 1002 session rsp");
459 continue; 460 continue;
460 } else if (temp == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) { 461 } else if (temp == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) {
461 /* we get this from Windows 98 instead of 462 /* we get this from Windows 98 instead of
462 an error on SMB negprot response */ 463 an error on SMB negprot response */
463 cFYI(1, ("Negative RFC1002 Session Response Error 0x%x)", 464 cFYI(1, "Negative RFC1002 Session Response Error 0x%x)",
464 pdu_length)); 465 pdu_length);
465 if (server->tcpStatus == CifsNew) { 466 if (server->tcpStatus == CifsNew) {
466 /* if nack on negprot (rather than 467 /* if nack on negprot (rather than
467 ret of smb negprot error) reconnecting 468 ret of smb negprot error) reconnecting
@@ -484,7 +485,7 @@ incomplete_rcv:
484 continue; 485 continue;
485 } 486 }
486 } else if (temp != (char) 0) { 487 } else if (temp != (char) 0) {
487 cERROR(1, ("Unknown RFC 1002 frame")); 488 cERROR(1, "Unknown RFC 1002 frame");
488 cifs_dump_mem(" Received Data: ", (char *)smb_buffer, 489 cifs_dump_mem(" Received Data: ", (char *)smb_buffer,
489 length); 490 length);
490 cifs_reconnect(server); 491 cifs_reconnect(server);
@@ -495,8 +496,8 @@ incomplete_rcv:
495 /* else we have an SMB response */ 496 /* else we have an SMB response */
496 if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) || 497 if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) ||
497 (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) { 498 (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) {
498 cERROR(1, ("Invalid size SMB length %d pdu_length %d", 499 cERROR(1, "Invalid size SMB length %d pdu_length %d",
499 length, pdu_length+4)); 500 length, pdu_length+4);
500 cifs_reconnect(server); 501 cifs_reconnect(server);
501 csocket = server->ssocket; 502 csocket = server->ssocket;
502 wake_up(&server->response_q); 503 wake_up(&server->response_q);
@@ -539,8 +540,8 @@ incomplete_rcv:
539 length = 0; 540 length = 0;
540 continue; 541 continue;
541 } else if (length <= 0) { 542 } else if (length <= 0) {
542 cERROR(1, ("Received no data, expecting %d", 543 cERROR(1, "Received no data, expecting %d",
543 pdu_length - total_read)); 544 pdu_length - total_read);
544 cifs_reconnect(server); 545 cifs_reconnect(server);
545 csocket = server->ssocket; 546 csocket = server->ssocket;
546 reconnect = 1; 547 reconnect = 1;
@@ -588,7 +589,7 @@ incomplete_rcv:
588 } 589 }
589 } else { 590 } else {
590 if (!isLargeBuf) { 591 if (!isLargeBuf) {
591 cERROR(1,("1st trans2 resp needs bigbuf")); 592 cERROR(1, "1st trans2 resp needs bigbuf");
592 /* BB maybe we can fix this up, switch 593 /* BB maybe we can fix this up, switch
593 to already allocated large buffer? */ 594 to already allocated large buffer? */
594 } else { 595 } else {
@@ -630,8 +631,8 @@ multi_t2_fnd:
630 wake_up_process(task_to_wake); 631 wake_up_process(task_to_wake);
631 } else if (!is_valid_oplock_break(smb_buffer, server) && 632 } else if (!is_valid_oplock_break(smb_buffer, server) &&
632 !isMultiRsp) { 633 !isMultiRsp) {
633 cERROR(1, ("No task to wake, unknown frame received! " 634 cERROR(1, "No task to wake, unknown frame received! "
634 "NumMids %d", midCount.counter)); 635 "NumMids %d", midCount.counter);
635 cifs_dump_mem("Received Data is: ", (char *)smb_buffer, 636 cifs_dump_mem("Received Data is: ", (char *)smb_buffer,
636 sizeof(struct smb_hdr)); 637 sizeof(struct smb_hdr));
637#ifdef CONFIG_CIFS_DEBUG2 638#ifdef CONFIG_CIFS_DEBUG2
@@ -708,8 +709,8 @@ multi_t2_fnd:
708 list_for_each(tmp, &server->pending_mid_q) { 709 list_for_each(tmp, &server->pending_mid_q) {
709 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 710 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
710 if (mid_entry->midState == MID_REQUEST_SUBMITTED) { 711 if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
711 cFYI(1, ("Clearing Mid 0x%x - waking up ", 712 cFYI(1, "Clearing Mid 0x%x - waking up ",
712 mid_entry->mid)); 713 mid_entry->mid);
713 task_to_wake = mid_entry->tsk; 714 task_to_wake = mid_entry->tsk;
714 if (task_to_wake) 715 if (task_to_wake)
715 wake_up_process(task_to_wake); 716 wake_up_process(task_to_wake);
@@ -728,7 +729,7 @@ multi_t2_fnd:
728 to wait at least 45 seconds before giving up 729 to wait at least 45 seconds before giving up
729 on a request getting a response and going ahead 730 on a request getting a response and going ahead
730 and killing cifsd */ 731 and killing cifsd */
731 cFYI(1, ("Wait for exit from demultiplex thread")); 732 cFYI(1, "Wait for exit from demultiplex thread");
732 msleep(46000); 733 msleep(46000);
733 /* if threads still have not exited they are probably never 734 /* if threads still have not exited they are probably never
734 coming home not much else we can do but free the memory */ 735 coming home not much else we can do but free the memory */
@@ -849,7 +850,7 @@ cifs_parse_mount_options(char *options, const char *devname,
849 separator[0] = options[4]; 850 separator[0] = options[4];
850 options += 5; 851 options += 5;
851 } else { 852 } else {
852 cFYI(1, ("Null separator not allowed")); 853 cFYI(1, "Null separator not allowed");
853 } 854 }
854 } 855 }
855 856
@@ -974,7 +975,7 @@ cifs_parse_mount_options(char *options, const char *devname,
974 } 975 }
975 } else if (strnicmp(data, "sec", 3) == 0) { 976 } else if (strnicmp(data, "sec", 3) == 0) {
976 if (!value || !*value) { 977 if (!value || !*value) {
977 cERROR(1, ("no security value specified")); 978 cERROR(1, "no security value specified");
978 continue; 979 continue;
979 } else if (strnicmp(value, "krb5i", 5) == 0) { 980 } else if (strnicmp(value, "krb5i", 5) == 0) {
980 vol->secFlg |= CIFSSEC_MAY_KRB5 | 981 vol->secFlg |= CIFSSEC_MAY_KRB5 |
@@ -982,7 +983,7 @@ cifs_parse_mount_options(char *options, const char *devname,
982 } else if (strnicmp(value, "krb5p", 5) == 0) { 983 } else if (strnicmp(value, "krb5p", 5) == 0) {
983 /* vol->secFlg |= CIFSSEC_MUST_SEAL | 984 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
984 CIFSSEC_MAY_KRB5; */ 985 CIFSSEC_MAY_KRB5; */
985 cERROR(1, ("Krb5 cifs privacy not supported")); 986 cERROR(1, "Krb5 cifs privacy not supported");
986 return 1; 987 return 1;
987 } else if (strnicmp(value, "krb5", 4) == 0) { 988 } else if (strnicmp(value, "krb5", 4) == 0) {
988 vol->secFlg |= CIFSSEC_MAY_KRB5; 989 vol->secFlg |= CIFSSEC_MAY_KRB5;
@@ -1014,7 +1015,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1014 } else if (strnicmp(value, "none", 4) == 0) { 1015 } else if (strnicmp(value, "none", 4) == 0) {
1015 vol->nullauth = 1; 1016 vol->nullauth = 1;
1016 } else { 1017 } else {
1017 cERROR(1, ("bad security option: %s", value)); 1018 cERROR(1, "bad security option: %s", value);
1018 return 1; 1019 return 1;
1019 } 1020 }
1020 } else if ((strnicmp(data, "unc", 3) == 0) 1021 } else if ((strnicmp(data, "unc", 3) == 0)
@@ -1053,7 +1054,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1053 a domain name and need special handling? */ 1054 a domain name and need special handling? */
1054 if (strnlen(value, 256) < 256) { 1055 if (strnlen(value, 256) < 256) {
1055 vol->domainname = value; 1056 vol->domainname = value;
1056 cFYI(1, ("Domain name set")); 1057 cFYI(1, "Domain name set");
1057 } else { 1058 } else {
1058 printk(KERN_WARNING "CIFS: domain name too " 1059 printk(KERN_WARNING "CIFS: domain name too "
1059 "long\n"); 1060 "long\n");
@@ -1076,7 +1077,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1076 strcpy(vol->prepath+1, value); 1077 strcpy(vol->prepath+1, value);
1077 } else 1078 } else
1078 strcpy(vol->prepath, value); 1079 strcpy(vol->prepath, value);
1079 cFYI(1, ("prefix path %s", vol->prepath)); 1080 cFYI(1, "prefix path %s", vol->prepath);
1080 } else { 1081 } else {
1081 printk(KERN_WARNING "CIFS: prefix too long\n"); 1082 printk(KERN_WARNING "CIFS: prefix too long\n");
1082 return 1; 1083 return 1;
@@ -1092,7 +1093,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1092 vol->iocharset = value; 1093 vol->iocharset = value;
1093 /* if iocharset not set then load_nls_default 1094 /* if iocharset not set then load_nls_default
1094 is used by caller */ 1095 is used by caller */
1095 cFYI(1, ("iocharset set to %s", value)); 1096 cFYI(1, "iocharset set to %s", value);
1096 } else { 1097 } else {
1097 printk(KERN_WARNING "CIFS: iocharset name " 1098 printk(KERN_WARNING "CIFS: iocharset name "
1098 "too long.\n"); 1099 "too long.\n");
@@ -1144,14 +1145,14 @@ cifs_parse_mount_options(char *options, const char *devname,
1144 } 1145 }
1145 } else if (strnicmp(data, "sockopt", 5) == 0) { 1146 } else if (strnicmp(data, "sockopt", 5) == 0) {
1146 if (!value || !*value) { 1147 if (!value || !*value) {
1147 cERROR(1, ("no socket option specified")); 1148 cERROR(1, "no socket option specified");
1148 continue; 1149 continue;
1149 } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) { 1150 } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) {
1150 vol->sockopt_tcp_nodelay = 1; 1151 vol->sockopt_tcp_nodelay = 1;
1151 } 1152 }
1152 } else if (strnicmp(data, "netbiosname", 4) == 0) { 1153 } else if (strnicmp(data, "netbiosname", 4) == 0) {
1153 if (!value || !*value || (*value == ' ')) { 1154 if (!value || !*value || (*value == ' ')) {
1154 cFYI(1, ("invalid (empty) netbiosname")); 1155 cFYI(1, "invalid (empty) netbiosname");
1155 } else { 1156 } else {
1156 memset(vol->source_rfc1001_name, 0x20, 15); 1157 memset(vol->source_rfc1001_name, 0x20, 15);
1157 for (i = 0; i < 15; i++) { 1158 for (i = 0; i < 15; i++) {
@@ -1175,7 +1176,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1175 } else if (strnicmp(data, "servern", 7) == 0) { 1176 } else if (strnicmp(data, "servern", 7) == 0) {
1176 /* servernetbiosname specified override *SMBSERVER */ 1177 /* servernetbiosname specified override *SMBSERVER */
1177 if (!value || !*value || (*value == ' ')) { 1178 if (!value || !*value || (*value == ' ')) {
1178 cFYI(1, ("empty server netbiosname specified")); 1179 cFYI(1, "empty server netbiosname specified");
1179 } else { 1180 } else {
1180 /* last byte, type, is 0x20 for servr type */ 1181 /* last byte, type, is 0x20 for servr type */
1181 memset(vol->target_rfc1001_name, 0x20, 16); 1182 memset(vol->target_rfc1001_name, 0x20, 16);
@@ -1434,7 +1435,7 @@ cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
1434 1435
1435 ++server->srv_count; 1436 ++server->srv_count;
1436 write_unlock(&cifs_tcp_ses_lock); 1437 write_unlock(&cifs_tcp_ses_lock);
1437 cFYI(1, ("Existing tcp session with server found")); 1438 cFYI(1, "Existing tcp session with server found");
1438 return server; 1439 return server;
1439 } 1440 }
1440 write_unlock(&cifs_tcp_ses_lock); 1441 write_unlock(&cifs_tcp_ses_lock);
@@ -1475,7 +1476,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1475 1476
1476 memset(&addr, 0, sizeof(struct sockaddr_storage)); 1477 memset(&addr, 0, sizeof(struct sockaddr_storage));
1477 1478
1478 cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip)); 1479 cFYI(1, "UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip);
1479 1480
1480 if (volume_info->UNCip && volume_info->UNC) { 1481 if (volume_info->UNCip && volume_info->UNC) {
1481 rc = cifs_convert_address(volume_info->UNCip, &addr); 1482 rc = cifs_convert_address(volume_info->UNCip, &addr);
@@ -1487,13 +1488,12 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1487 } else if (volume_info->UNCip) { 1488 } else if (volume_info->UNCip) {
1488 /* BB using ip addr as tcp_ses name to connect to the 1489 /* BB using ip addr as tcp_ses name to connect to the
1489 DFS root below */ 1490 DFS root below */
1490 cERROR(1, ("Connecting to DFS root not implemented yet")); 1491 cERROR(1, "Connecting to DFS root not implemented yet");
1491 rc = -EINVAL; 1492 rc = -EINVAL;
1492 goto out_err; 1493 goto out_err;
1493 } else /* which tcp_sess DFS root would we conect to */ { 1494 } else /* which tcp_sess DFS root would we conect to */ {
1494 cERROR(1, 1495 cERROR(1, "CIFS mount error: No UNC path (e.g. -o "
1495 ("CIFS mount error: No UNC path (e.g. -o " 1496 "unc=//192.168.1.100/public) specified");
1496 "unc=//192.168.1.100/public) specified"));
1497 rc = -EINVAL; 1497 rc = -EINVAL;
1498 goto out_err; 1498 goto out_err;
1499 } 1499 }
@@ -1540,7 +1540,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1540 ++tcp_ses->srv_count; 1540 ++tcp_ses->srv_count;
1541 1541
1542 if (addr.ss_family == AF_INET6) { 1542 if (addr.ss_family == AF_INET6) {
1543 cFYI(1, ("attempting ipv6 connect")); 1543 cFYI(1, "attempting ipv6 connect");
1544 /* BB should we allow ipv6 on port 139? */ 1544 /* BB should we allow ipv6 on port 139? */
1545 /* other OS never observed in Wild doing 139 with v6 */ 1545 /* other OS never observed in Wild doing 139 with v6 */
1546 sin_server6->sin6_port = htons(volume_info->port); 1546 sin_server6->sin6_port = htons(volume_info->port);
@@ -1554,7 +1554,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1554 rc = ipv4_connect(tcp_ses); 1554 rc = ipv4_connect(tcp_ses);
1555 } 1555 }
1556 if (rc < 0) { 1556 if (rc < 0) {
1557 cERROR(1, ("Error connecting to socket. Aborting operation")); 1557 cERROR(1, "Error connecting to socket. Aborting operation");
1558 goto out_err; 1558 goto out_err;
1559 } 1559 }
1560 1560
@@ -1567,7 +1567,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1567 tcp_ses, "cifsd"); 1567 tcp_ses, "cifsd");
1568 if (IS_ERR(tcp_ses->tsk)) { 1568 if (IS_ERR(tcp_ses->tsk)) {
1569 rc = PTR_ERR(tcp_ses->tsk); 1569 rc = PTR_ERR(tcp_ses->tsk);
1570 cERROR(1, ("error %d create cifsd thread", rc)); 1570 cERROR(1, "error %d create cifsd thread", rc);
1571 module_put(THIS_MODULE); 1571 module_put(THIS_MODULE);
1572 goto out_err; 1572 goto out_err;
1573 } 1573 }
@@ -1616,6 +1616,7 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1616 int xid; 1616 int xid;
1617 struct TCP_Server_Info *server = ses->server; 1617 struct TCP_Server_Info *server = ses->server;
1618 1618
1619 cFYI(1, "%s: ses_count=%d\n", __func__, ses->ses_count);
1619 write_lock(&cifs_tcp_ses_lock); 1620 write_lock(&cifs_tcp_ses_lock);
1620 if (--ses->ses_count > 0) { 1621 if (--ses->ses_count > 0) {
1621 write_unlock(&cifs_tcp_ses_lock); 1622 write_unlock(&cifs_tcp_ses_lock);
@@ -1634,6 +1635,102 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1634 cifs_put_tcp_session(server); 1635 cifs_put_tcp_session(server);
1635} 1636}
1636 1637
1638static struct cifsSesInfo *
1639cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1640{
1641 int rc = -ENOMEM, xid;
1642 struct cifsSesInfo *ses;
1643
1644 xid = GetXid();
1645
1646 ses = cifs_find_smb_ses(server, volume_info->username);
1647 if (ses) {
1648 cFYI(1, "Existing smb sess found (status=%d)", ses->status);
1649
1650 /* existing SMB ses has a server reference already */
1651 cifs_put_tcp_session(server);
1652
1653 mutex_lock(&ses->session_mutex);
1654 rc = cifs_negotiate_protocol(xid, ses);
1655 if (rc) {
1656 mutex_unlock(&ses->session_mutex);
1657 /* problem -- put our ses reference */
1658 cifs_put_smb_ses(ses);
1659 FreeXid(xid);
1660 return ERR_PTR(rc);
1661 }
1662 if (ses->need_reconnect) {
1663 cFYI(1, "Session needs reconnect");
1664 rc = cifs_setup_session(xid, ses,
1665 volume_info->local_nls);
1666 if (rc) {
1667 mutex_unlock(&ses->session_mutex);
1668 /* problem -- put our reference */
1669 cifs_put_smb_ses(ses);
1670 FreeXid(xid);
1671 return ERR_PTR(rc);
1672 }
1673 }
1674 mutex_unlock(&ses->session_mutex);
1675 FreeXid(xid);
1676 return ses;
1677 }
1678
1679 cFYI(1, "Existing smb sess not found");
1680 ses = sesInfoAlloc();
1681 if (ses == NULL)
1682 goto get_ses_fail;
1683
1684 /* new SMB session uses our server ref */
1685 ses->server = server;
1686 if (server->addr.sockAddr6.sin6_family == AF_INET6)
1687 sprintf(ses->serverName, "%pI6",
1688 &server->addr.sockAddr6.sin6_addr);
1689 else
1690 sprintf(ses->serverName, "%pI4",
1691 &server->addr.sockAddr.sin_addr.s_addr);
1692
1693 if (volume_info->username)
1694 strncpy(ses->userName, volume_info->username,
1695 MAX_USERNAME_SIZE);
1696
1697 /* volume_info->password freed at unmount */
1698 if (volume_info->password) {
1699 ses->password = kstrdup(volume_info->password, GFP_KERNEL);
1700 if (!ses->password)
1701 goto get_ses_fail;
1702 }
1703 if (volume_info->domainname) {
1704 int len = strlen(volume_info->domainname);
1705 ses->domainName = kmalloc(len + 1, GFP_KERNEL);
1706 if (ses->domainName)
1707 strcpy(ses->domainName, volume_info->domainname);
1708 }
1709 ses->linux_uid = volume_info->linux_uid;
1710 ses->overrideSecFlg = volume_info->secFlg;
1711
1712 mutex_lock(&ses->session_mutex);
1713 rc = cifs_negotiate_protocol(xid, ses);
1714 if (!rc)
1715 rc = cifs_setup_session(xid, ses, volume_info->local_nls);
1716 mutex_unlock(&ses->session_mutex);
1717 if (rc)
1718 goto get_ses_fail;
1719
1720 /* success, put it on the list */
1721 write_lock(&cifs_tcp_ses_lock);
1722 list_add(&ses->smb_ses_list, &server->smb_ses_list);
1723 write_unlock(&cifs_tcp_ses_lock);
1724
1725 FreeXid(xid);
1726 return ses;
1727
1728get_ses_fail:
1729 sesInfoFree(ses);
1730 FreeXid(xid);
1731 return ERR_PTR(rc);
1732}
1733
1637static struct cifsTconInfo * 1734static struct cifsTconInfo *
1638cifs_find_tcon(struct cifsSesInfo *ses, const char *unc) 1735cifs_find_tcon(struct cifsSesInfo *ses, const char *unc)
1639{ 1736{
@@ -1662,6 +1759,7 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
1662 int xid; 1759 int xid;
1663 struct cifsSesInfo *ses = tcon->ses; 1760 struct cifsSesInfo *ses = tcon->ses;
1664 1761
1762 cFYI(1, "%s: tc_count=%d\n", __func__, tcon->tc_count);
1665 write_lock(&cifs_tcp_ses_lock); 1763 write_lock(&cifs_tcp_ses_lock);
1666 if (--tcon->tc_count > 0) { 1764 if (--tcon->tc_count > 0) {
1667 write_unlock(&cifs_tcp_ses_lock); 1765 write_unlock(&cifs_tcp_ses_lock);
@@ -1679,6 +1777,80 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
1679 cifs_put_smb_ses(ses); 1777 cifs_put_smb_ses(ses);
1680} 1778}
1681 1779
1780static struct cifsTconInfo *
1781cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info)
1782{
1783 int rc, xid;
1784 struct cifsTconInfo *tcon;
1785
1786 tcon = cifs_find_tcon(ses, volume_info->UNC);
1787 if (tcon) {
1788 cFYI(1, "Found match on UNC path");
1789 /* existing tcon already has a reference */
1790 cifs_put_smb_ses(ses);
1791 if (tcon->seal != volume_info->seal)
1792 cERROR(1, "transport encryption setting "
1793 "conflicts with existing tid");
1794 return tcon;
1795 }
1796
1797 tcon = tconInfoAlloc();
1798 if (tcon == NULL) {
1799 rc = -ENOMEM;
1800 goto out_fail;
1801 }
1802
1803 tcon->ses = ses;
1804 if (volume_info->password) {
1805 tcon->password = kstrdup(volume_info->password, GFP_KERNEL);
1806 if (!tcon->password) {
1807 rc = -ENOMEM;
1808 goto out_fail;
1809 }
1810 }
1811
1812 if (strchr(volume_info->UNC + 3, '\\') == NULL
1813 && strchr(volume_info->UNC + 3, '/') == NULL) {
1814 cERROR(1, "Missing share name");
1815 rc = -ENODEV;
1816 goto out_fail;
1817 }
1818
1819 /* BB Do we need to wrap session_mutex around
1820 * this TCon call and Unix SetFS as
1821 * we do on SessSetup and reconnect? */
1822 xid = GetXid();
1823 rc = CIFSTCon(xid, ses, volume_info->UNC, tcon, volume_info->local_nls);
1824 FreeXid(xid);
1825 cFYI(1, "CIFS Tcon rc = %d", rc);
1826 if (rc)
1827 goto out_fail;
1828
1829 if (volume_info->nodfs) {
1830 tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
1831 cFYI(1, "DFS disabled (%d)", tcon->Flags);
1832 }
1833 tcon->seal = volume_info->seal;
1834 /* we can have only one retry value for a connection
1835 to a share so for resources mounted more than once
1836 to the same server share the last value passed in
1837 for the retry flag is used */
1838 tcon->retry = volume_info->retry;
1839 tcon->nocase = volume_info->nocase;
1840 tcon->local_lease = volume_info->local_lease;
1841
1842 write_lock(&cifs_tcp_ses_lock);
1843 list_add(&tcon->tcon_list, &ses->tcon_list);
1844 write_unlock(&cifs_tcp_ses_lock);
1845
1846 return tcon;
1847
1848out_fail:
1849 tconInfoFree(tcon);
1850 return ERR_PTR(rc);
1851}
1852
1853
1682int 1854int
1683get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, 1855get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path,
1684 const struct nls_table *nls_codepage, unsigned int *pnum_referrals, 1856 const struct nls_table *nls_codepage, unsigned int *pnum_referrals,
@@ -1703,8 +1875,7 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path,
1703 strcpy(temp_unc + 2, pSesInfo->serverName); 1875 strcpy(temp_unc + 2, pSesInfo->serverName);
1704 strcpy(temp_unc + 2 + strlen(pSesInfo->serverName), "\\IPC$"); 1876 strcpy(temp_unc + 2 + strlen(pSesInfo->serverName), "\\IPC$");
1705 rc = CIFSTCon(xid, pSesInfo, temp_unc, NULL, nls_codepage); 1877 rc = CIFSTCon(xid, pSesInfo, temp_unc, NULL, nls_codepage);
1706 cFYI(1, 1878 cFYI(1, "CIFS Tcon rc = %d ipc_tid = %d", rc, pSesInfo->ipc_tid);
1707 ("CIFS Tcon rc = %d ipc_tid = %d", rc, pSesInfo->ipc_tid));
1708 kfree(temp_unc); 1879 kfree(temp_unc);
1709 } 1880 }
1710 if (rc == 0) 1881 if (rc == 0)
@@ -1777,12 +1948,12 @@ ipv4_connect(struct TCP_Server_Info *server)
1777 rc = sock_create_kern(PF_INET, SOCK_STREAM, 1948 rc = sock_create_kern(PF_INET, SOCK_STREAM,
1778 IPPROTO_TCP, &socket); 1949 IPPROTO_TCP, &socket);
1779 if (rc < 0) { 1950 if (rc < 0) {
1780 cERROR(1, ("Error %d creating socket", rc)); 1951 cERROR(1, "Error %d creating socket", rc);
1781 return rc; 1952 return rc;
1782 } 1953 }
1783 1954
1784 /* BB other socket options to set KEEPALIVE, NODELAY? */ 1955 /* BB other socket options to set KEEPALIVE, NODELAY? */
1785 cFYI(1, ("Socket created")); 1956 cFYI(1, "Socket created");
1786 server->ssocket = socket; 1957 server->ssocket = socket;
1787 socket->sk->sk_allocation = GFP_NOFS; 1958 socket->sk->sk_allocation = GFP_NOFS;
1788 cifs_reclassify_socket4(socket); 1959 cifs_reclassify_socket4(socket);
@@ -1827,7 +1998,7 @@ ipv4_connect(struct TCP_Server_Info *server)
1827 if (!connected) { 1998 if (!connected) {
1828 if (orig_port) 1999 if (orig_port)
1829 server->addr.sockAddr.sin_port = orig_port; 2000 server->addr.sockAddr.sin_port = orig_port;
1830 cFYI(1, ("Error %d connecting to server via ipv4", rc)); 2001 cFYI(1, "Error %d connecting to server via ipv4", rc);
1831 sock_release(socket); 2002 sock_release(socket);
1832 server->ssocket = NULL; 2003 server->ssocket = NULL;
1833 return rc; 2004 return rc;
@@ -1855,12 +2026,12 @@ ipv4_connect(struct TCP_Server_Info *server)
1855 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, 2026 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
1856 (char *)&val, sizeof(val)); 2027 (char *)&val, sizeof(val));
1857 if (rc) 2028 if (rc)
1858 cFYI(1, ("set TCP_NODELAY socket option error %d", rc)); 2029 cFYI(1, "set TCP_NODELAY socket option error %d", rc);
1859 } 2030 }
1860 2031
1861 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx", 2032 cFYI(1, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx",
1862 socket->sk->sk_sndbuf, 2033 socket->sk->sk_sndbuf,
1863 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo)); 2034 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);
1864 2035
1865 /* send RFC1001 sessinit */ 2036 /* send RFC1001 sessinit */
1866 if (server->addr.sockAddr.sin_port == htons(RFC1001_PORT)) { 2037 if (server->addr.sockAddr.sin_port == htons(RFC1001_PORT)) {
@@ -1938,13 +2109,13 @@ ipv6_connect(struct TCP_Server_Info *server)
1938 rc = sock_create_kern(PF_INET6, SOCK_STREAM, 2109 rc = sock_create_kern(PF_INET6, SOCK_STREAM,
1939 IPPROTO_TCP, &socket); 2110 IPPROTO_TCP, &socket);
1940 if (rc < 0) { 2111 if (rc < 0) {
1941 cERROR(1, ("Error %d creating ipv6 socket", rc)); 2112 cERROR(1, "Error %d creating ipv6 socket", rc);
1942 socket = NULL; 2113 socket = NULL;
1943 return rc; 2114 return rc;
1944 } 2115 }
1945 2116
1946 /* BB other socket options to set KEEPALIVE, NODELAY? */ 2117 /* BB other socket options to set KEEPALIVE, NODELAY? */
1947 cFYI(1, ("ipv6 Socket created")); 2118 cFYI(1, "ipv6 Socket created");
1948 server->ssocket = socket; 2119 server->ssocket = socket;
1949 socket->sk->sk_allocation = GFP_NOFS; 2120 socket->sk->sk_allocation = GFP_NOFS;
1950 cifs_reclassify_socket6(socket); 2121 cifs_reclassify_socket6(socket);
@@ -1988,7 +2159,7 @@ ipv6_connect(struct TCP_Server_Info *server)
1988 if (!connected) { 2159 if (!connected) {
1989 if (orig_port) 2160 if (orig_port)
1990 server->addr.sockAddr6.sin6_port = orig_port; 2161 server->addr.sockAddr6.sin6_port = orig_port;
1991 cFYI(1, ("Error %d connecting to server via ipv6", rc)); 2162 cFYI(1, "Error %d connecting to server via ipv6", rc);
1992 sock_release(socket); 2163 sock_release(socket);
1993 server->ssocket = NULL; 2164 server->ssocket = NULL;
1994 return rc; 2165 return rc;
@@ -2007,7 +2178,7 @@ ipv6_connect(struct TCP_Server_Info *server)
2007 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, 2178 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
2008 (char *)&val, sizeof(val)); 2179 (char *)&val, sizeof(val));
2009 if (rc) 2180 if (rc)
2010 cFYI(1, ("set TCP_NODELAY socket option error %d", rc)); 2181 cFYI(1, "set TCP_NODELAY socket option error %d", rc);
2011 } 2182 }
2012 2183
2013 server->ssocket = socket; 2184 server->ssocket = socket;
@@ -2032,13 +2203,13 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2032 if (vol_info && vol_info->no_linux_ext) { 2203 if (vol_info && vol_info->no_linux_ext) {
2033 tcon->fsUnixInfo.Capability = 0; 2204 tcon->fsUnixInfo.Capability = 0;
2034 tcon->unix_ext = 0; /* Unix Extensions disabled */ 2205 tcon->unix_ext = 0; /* Unix Extensions disabled */
2035 cFYI(1, ("Linux protocol extensions disabled")); 2206 cFYI(1, "Linux protocol extensions disabled");
2036 return; 2207 return;
2037 } else if (vol_info) 2208 } else if (vol_info)
2038 tcon->unix_ext = 1; /* Unix Extensions supported */ 2209 tcon->unix_ext = 1; /* Unix Extensions supported */
2039 2210
2040 if (tcon->unix_ext == 0) { 2211 if (tcon->unix_ext == 0) {
2041 cFYI(1, ("Unix extensions disabled so not set on reconnect")); 2212 cFYI(1, "Unix extensions disabled so not set on reconnect");
2042 return; 2213 return;
2043 } 2214 }
2044 2215
@@ -2054,12 +2225,11 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2054 cap &= ~CIFS_UNIX_POSIX_ACL_CAP; 2225 cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
2055 if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) { 2226 if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
2056 if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) 2227 if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
2057 cERROR(1, ("POSIXPATH support change")); 2228 cERROR(1, "POSIXPATH support change");
2058 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; 2229 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
2059 } else if ((cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) { 2230 } else if ((cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
2060 cERROR(1, ("possible reconnect error")); 2231 cERROR(1, "possible reconnect error");
2061 cERROR(1, 2232 cERROR(1, "server disabled POSIX path support");
2062 ("server disabled POSIX path support"));
2063 } 2233 }
2064 } 2234 }
2065 2235
@@ -2067,7 +2237,7 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2067 if (vol_info && vol_info->no_psx_acl) 2237 if (vol_info && vol_info->no_psx_acl)
2068 cap &= ~CIFS_UNIX_POSIX_ACL_CAP; 2238 cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
2069 else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { 2239 else if (CIFS_UNIX_POSIX_ACL_CAP & cap) {
2070 cFYI(1, ("negotiated posix acl support")); 2240 cFYI(1, "negotiated posix acl support");
2071 if (sb) 2241 if (sb)
2072 sb->s_flags |= MS_POSIXACL; 2242 sb->s_flags |= MS_POSIXACL;
2073 } 2243 }
@@ -2075,7 +2245,7 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2075 if (vol_info && vol_info->posix_paths == 0) 2245 if (vol_info && vol_info->posix_paths == 0)
2076 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; 2246 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
2077 else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { 2247 else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
2078 cFYI(1, ("negotiate posix pathnames")); 2248 cFYI(1, "negotiate posix pathnames");
2079 if (sb) 2249 if (sb)
2080 CIFS_SB(sb)->mnt_cifs_flags |= 2250 CIFS_SB(sb)->mnt_cifs_flags |=
2081 CIFS_MOUNT_POSIX_PATHS; 2251 CIFS_MOUNT_POSIX_PATHS;
@@ -2090,39 +2260,38 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2090 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) { 2260 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) {
2091 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { 2261 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) {
2092 CIFS_SB(sb)->rsize = 127 * 1024; 2262 CIFS_SB(sb)->rsize = 127 * 1024;
2093 cFYI(DBG2, 2263 cFYI(DBG2, "larger reads not supported by srv");
2094 ("larger reads not supported by srv"));
2095 } 2264 }
2096 } 2265 }
2097 2266
2098 2267
2099 cFYI(1, ("Negotiate caps 0x%x", (int)cap)); 2268 cFYI(1, "Negotiate caps 0x%x", (int)cap);
2100#ifdef CONFIG_CIFS_DEBUG2 2269#ifdef CONFIG_CIFS_DEBUG2
2101 if (cap & CIFS_UNIX_FCNTL_CAP) 2270 if (cap & CIFS_UNIX_FCNTL_CAP)
2102 cFYI(1, ("FCNTL cap")); 2271 cFYI(1, "FCNTL cap");
2103 if (cap & CIFS_UNIX_EXTATTR_CAP) 2272 if (cap & CIFS_UNIX_EXTATTR_CAP)
2104 cFYI(1, ("EXTATTR cap")); 2273 cFYI(1, "EXTATTR cap");
2105 if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) 2274 if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
2106 cFYI(1, ("POSIX path cap")); 2275 cFYI(1, "POSIX path cap");
2107 if (cap & CIFS_UNIX_XATTR_CAP) 2276 if (cap & CIFS_UNIX_XATTR_CAP)
2108 cFYI(1, ("XATTR cap")); 2277 cFYI(1, "XATTR cap");
2109 if (cap & CIFS_UNIX_POSIX_ACL_CAP) 2278 if (cap & CIFS_UNIX_POSIX_ACL_CAP)
2110 cFYI(1, ("POSIX ACL cap")); 2279 cFYI(1, "POSIX ACL cap");
2111 if (cap & CIFS_UNIX_LARGE_READ_CAP) 2280 if (cap & CIFS_UNIX_LARGE_READ_CAP)
2112 cFYI(1, ("very large read cap")); 2281 cFYI(1, "very large read cap");
2113 if (cap & CIFS_UNIX_LARGE_WRITE_CAP) 2282 if (cap & CIFS_UNIX_LARGE_WRITE_CAP)
2114 cFYI(1, ("very large write cap")); 2283 cFYI(1, "very large write cap");
2115#endif /* CIFS_DEBUG2 */ 2284#endif /* CIFS_DEBUG2 */
2116 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { 2285 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
2117 if (vol_info == NULL) { 2286 if (vol_info == NULL) {
2118 cFYI(1, ("resetting capabilities failed")); 2287 cFYI(1, "resetting capabilities failed");
2119 } else 2288 } else
2120 cERROR(1, ("Negotiating Unix capabilities " 2289 cERROR(1, "Negotiating Unix capabilities "
2121 "with the server failed. Consider " 2290 "with the server failed. Consider "
2122 "mounting with the Unix Extensions\n" 2291 "mounting with the Unix Extensions\n"
2123 "disabled, if problems are found, " 2292 "disabled, if problems are found, "
2124 "by specifying the nounix mount " 2293 "by specifying the nounix mount "
2125 "option.")); 2294 "option.");
2126 2295
2127 } 2296 }
2128 } 2297 }
@@ -2152,8 +2321,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2152 struct cifs_sb_info *cifs_sb) 2321 struct cifs_sb_info *cifs_sb)
2153{ 2322{
2154 if (pvolume_info->rsize > CIFSMaxBufSize) { 2323 if (pvolume_info->rsize > CIFSMaxBufSize) {
2155 cERROR(1, ("rsize %d too large, using MaxBufSize", 2324 cERROR(1, "rsize %d too large, using MaxBufSize",
2156 pvolume_info->rsize)); 2325 pvolume_info->rsize);
2157 cifs_sb->rsize = CIFSMaxBufSize; 2326 cifs_sb->rsize = CIFSMaxBufSize;
2158 } else if ((pvolume_info->rsize) && 2327 } else if ((pvolume_info->rsize) &&
2159 (pvolume_info->rsize <= CIFSMaxBufSize)) 2328 (pvolume_info->rsize <= CIFSMaxBufSize))
@@ -2162,8 +2331,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2162 cifs_sb->rsize = CIFSMaxBufSize; 2331 cifs_sb->rsize = CIFSMaxBufSize;
2163 2332
2164 if (pvolume_info->wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) { 2333 if (pvolume_info->wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) {
2165 cERROR(1, ("wsize %d too large, using 4096 instead", 2334 cERROR(1, "wsize %d too large, using 4096 instead",
2166 pvolume_info->wsize)); 2335 pvolume_info->wsize);
2167 cifs_sb->wsize = 4096; 2336 cifs_sb->wsize = 4096;
2168 } else if (pvolume_info->wsize) 2337 } else if (pvolume_info->wsize)
2169 cifs_sb->wsize = pvolume_info->wsize; 2338 cifs_sb->wsize = pvolume_info->wsize;
@@ -2181,7 +2350,7 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2181 if (cifs_sb->rsize < 2048) { 2350 if (cifs_sb->rsize < 2048) {
2182 cifs_sb->rsize = 2048; 2351 cifs_sb->rsize = 2048;
2183 /* Windows ME may prefer this */ 2352 /* Windows ME may prefer this */
2184 cFYI(1, ("readsize set to minimum: 2048")); 2353 cFYI(1, "readsize set to minimum: 2048");
2185 } 2354 }
2186 /* calculate prepath */ 2355 /* calculate prepath */
2187 cifs_sb->prepath = pvolume_info->prepath; 2356 cifs_sb->prepath = pvolume_info->prepath;
@@ -2199,8 +2368,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2199 cifs_sb->mnt_gid = pvolume_info->linux_gid; 2368 cifs_sb->mnt_gid = pvolume_info->linux_gid;
2200 cifs_sb->mnt_file_mode = pvolume_info->file_mode; 2369 cifs_sb->mnt_file_mode = pvolume_info->file_mode;
2201 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; 2370 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
2202 cFYI(1, ("file mode: 0x%x dir mode: 0x%x", 2371 cFYI(1, "file mode: 0x%x dir mode: 0x%x",
2203 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode)); 2372 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
2204 2373
2205 if (pvolume_info->noperm) 2374 if (pvolume_info->noperm)
2206 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; 2375 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
@@ -2229,13 +2398,13 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2229 if (pvolume_info->dynperm) 2398 if (pvolume_info->dynperm)
2230 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; 2399 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
2231 if (pvolume_info->direct_io) { 2400 if (pvolume_info->direct_io) {
2232 cFYI(1, ("mounting share using direct i/o")); 2401 cFYI(1, "mounting share using direct i/o");
2233 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; 2402 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
2234 } 2403 }
2235 2404
2236 if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) 2405 if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm))
2237 cERROR(1, ("mount option dynperm ignored if cifsacl " 2406 cERROR(1, "mount option dynperm ignored if cifsacl "
2238 "mount option supported")); 2407 "mount option supported");
2239} 2408}
2240 2409
2241static int 2410static int
@@ -2262,7 +2431,7 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
2262{ 2431{
2263 struct smb_vol *volume_info; 2432 struct smb_vol *volume_info;
2264 2433
2265 if (!pvolume_info && !*pvolume_info) 2434 if (!pvolume_info || !*pvolume_info)
2266 return; 2435 return;
2267 2436
2268 volume_info = *pvolume_info; 2437 volume_info = *pvolume_info;
@@ -2344,11 +2513,11 @@ try_mount_again:
2344 } 2513 }
2345 2514
2346 if (volume_info->nullauth) { 2515 if (volume_info->nullauth) {
2347 cFYI(1, ("null user")); 2516 cFYI(1, "null user");
2348 volume_info->username = ""; 2517 volume_info->username = "";
2349 } else if (volume_info->username) { 2518 } else if (volume_info->username) {
2350 /* BB fixme parse for domain name here */ 2519 /* BB fixme parse for domain name here */
2351 cFYI(1, ("Username: %s", volume_info->username)); 2520 cFYI(1, "Username: %s", volume_info->username);
2352 } else { 2521 } else {
2353 cifserror("No username specified"); 2522 cifserror("No username specified");
2354 /* In userspace mount helper we can get user name from alternate 2523 /* In userspace mount helper we can get user name from alternate
@@ -2357,20 +2526,20 @@ try_mount_again:
2357 goto out; 2526 goto out;
2358 } 2527 }
2359 2528
2360
2361 /* this is needed for ASCII cp to Unicode converts */ 2529 /* this is needed for ASCII cp to Unicode converts */
2362 if (volume_info->iocharset == NULL) { 2530 if (volume_info->iocharset == NULL) {
2363 cifs_sb->local_nls = load_nls_default(); 2531 /* load_nls_default cannot return null */
2364 /* load_nls_default can not return null */ 2532 volume_info->local_nls = load_nls_default();
2365 } else { 2533 } else {
2366 cifs_sb->local_nls = load_nls(volume_info->iocharset); 2534 volume_info->local_nls = load_nls(volume_info->iocharset);
2367 if (cifs_sb->local_nls == NULL) { 2535 if (volume_info->local_nls == NULL) {
2368 cERROR(1, ("CIFS mount error: iocharset %s not found", 2536 cERROR(1, "CIFS mount error: iocharset %s not found",
2369 volume_info->iocharset)); 2537 volume_info->iocharset);
2370 rc = -ELIBACC; 2538 rc = -ELIBACC;
2371 goto out; 2539 goto out;
2372 } 2540 }
2373 } 2541 }
2542 cifs_sb->local_nls = volume_info->local_nls;
2374 2543
2375 /* get a reference to a tcp session */ 2544 /* get a reference to a tcp session */
2376 srvTcp = cifs_get_tcp_session(volume_info); 2545 srvTcp = cifs_get_tcp_session(volume_info);
@@ -2379,148 +2548,30 @@ try_mount_again:
2379 goto out; 2548 goto out;
2380 } 2549 }
2381 2550
2382 pSesInfo = cifs_find_smb_ses(srvTcp, volume_info->username); 2551 /* get a reference to a SMB session */
2383 if (pSesInfo) { 2552 pSesInfo = cifs_get_smb_ses(srvTcp, volume_info);
2384 cFYI(1, ("Existing smb sess found (status=%d)", 2553 if (IS_ERR(pSesInfo)) {
2385 pSesInfo->status)); 2554 rc = PTR_ERR(pSesInfo);
2386 /* 2555 pSesInfo = NULL;
2387 * The existing SMB session already has a reference to srvTcp, 2556 goto mount_fail_check;
2388 * so we can put back the extra one we got before
2389 */
2390 cifs_put_tcp_session(srvTcp);
2391
2392 mutex_lock(&pSesInfo->session_mutex);
2393 if (pSesInfo->need_reconnect) {
2394 cFYI(1, ("Session needs reconnect"));
2395 rc = cifs_setup_session(xid, pSesInfo,
2396 cifs_sb->local_nls);
2397 }
2398 mutex_unlock(&pSesInfo->session_mutex);
2399 } else if (!rc) {
2400 cFYI(1, ("Existing smb sess not found"));
2401 pSesInfo = sesInfoAlloc();
2402 if (pSesInfo == NULL) {
2403 rc = -ENOMEM;
2404 goto mount_fail_check;
2405 }
2406
2407 /* new SMB session uses our srvTcp ref */
2408 pSesInfo->server = srvTcp;
2409 if (srvTcp->addr.sockAddr6.sin6_family == AF_INET6)
2410 sprintf(pSesInfo->serverName, "%pI6",
2411 &srvTcp->addr.sockAddr6.sin6_addr);
2412 else
2413 sprintf(pSesInfo->serverName, "%pI4",
2414 &srvTcp->addr.sockAddr.sin_addr.s_addr);
2415
2416 write_lock(&cifs_tcp_ses_lock);
2417 list_add(&pSesInfo->smb_ses_list, &srvTcp->smb_ses_list);
2418 write_unlock(&cifs_tcp_ses_lock);
2419
2420 /* volume_info->password freed at unmount */
2421 if (volume_info->password) {
2422 pSesInfo->password = kstrdup(volume_info->password,
2423 GFP_KERNEL);
2424 if (!pSesInfo->password) {
2425 rc = -ENOMEM;
2426 goto mount_fail_check;
2427 }
2428 }
2429 if (volume_info->username)
2430 strncpy(pSesInfo->userName, volume_info->username,
2431 MAX_USERNAME_SIZE);
2432 if (volume_info->domainname) {
2433 int len = strlen(volume_info->domainname);
2434 pSesInfo->domainName = kmalloc(len + 1, GFP_KERNEL);
2435 if (pSesInfo->domainName)
2436 strcpy(pSesInfo->domainName,
2437 volume_info->domainname);
2438 }
2439 pSesInfo->linux_uid = volume_info->linux_uid;
2440 pSesInfo->overrideSecFlg = volume_info->secFlg;
2441 mutex_lock(&pSesInfo->session_mutex);
2442
2443 /* BB FIXME need to pass vol->secFlgs BB */
2444 rc = cifs_setup_session(xid, pSesInfo,
2445 cifs_sb->local_nls);
2446 mutex_unlock(&pSesInfo->session_mutex);
2447 } 2557 }
2448 2558
2449 /* search for existing tcon to this server share */ 2559 setup_cifs_sb(volume_info, cifs_sb);
2450 if (!rc) { 2560 if (pSesInfo->capabilities & CAP_LARGE_FILES)
2451 setup_cifs_sb(volume_info, cifs_sb); 2561 sb->s_maxbytes = MAX_LFS_FILESIZE;
2452 2562 else
2453 tcon = cifs_find_tcon(pSesInfo, volume_info->UNC); 2563 sb->s_maxbytes = MAX_NON_LFS;
2454 if (tcon) {
2455 cFYI(1, ("Found match on UNC path"));
2456 /* existing tcon already has a reference */
2457 cifs_put_smb_ses(pSesInfo);
2458 if (tcon->seal != volume_info->seal)
2459 cERROR(1, ("transport encryption setting "
2460 "conflicts with existing tid"));
2461 } else {
2462 tcon = tconInfoAlloc();
2463 if (tcon == NULL) {
2464 rc = -ENOMEM;
2465 goto mount_fail_check;
2466 }
2467
2468 tcon->ses = pSesInfo;
2469 if (volume_info->password) {
2470 tcon->password = kstrdup(volume_info->password,
2471 GFP_KERNEL);
2472 if (!tcon->password) {
2473 rc = -ENOMEM;
2474 goto mount_fail_check;
2475 }
2476 }
2477
2478 if ((strchr(volume_info->UNC + 3, '\\') == NULL)
2479 && (strchr(volume_info->UNC + 3, '/') == NULL)) {
2480 cERROR(1, ("Missing share name"));
2481 rc = -ENODEV;
2482 goto mount_fail_check;
2483 } else {
2484 /* BB Do we need to wrap sesSem around
2485 * this TCon call and Unix SetFS as
2486 * we do on SessSetup and reconnect? */
2487 rc = CIFSTCon(xid, pSesInfo, volume_info->UNC,
2488 tcon, cifs_sb->local_nls);
2489 cFYI(1, ("CIFS Tcon rc = %d", rc));
2490 if (volume_info->nodfs) {
2491 tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
2492 cFYI(1, ("DFS disabled (%d)",
2493 tcon->Flags));
2494 }
2495 }
2496 if (rc)
2497 goto remote_path_check;
2498 tcon->seal = volume_info->seal;
2499 write_lock(&cifs_tcp_ses_lock);
2500 list_add(&tcon->tcon_list, &pSesInfo->tcon_list);
2501 write_unlock(&cifs_tcp_ses_lock);
2502 }
2503
2504 /* we can have only one retry value for a connection
2505 to a share so for resources mounted more than once
2506 to the same server share the last value passed in
2507 for the retry flag is used */
2508 tcon->retry = volume_info->retry;
2509 tcon->nocase = volume_info->nocase;
2510 tcon->local_lease = volume_info->local_lease;
2511 }
2512 if (pSesInfo) {
2513 if (pSesInfo->capabilities & CAP_LARGE_FILES)
2514 sb->s_maxbytes = MAX_LFS_FILESIZE;
2515 else
2516 sb->s_maxbytes = MAX_NON_LFS;
2517 }
2518 2564
2519 /* BB FIXME fix time_gran to be larger for LANMAN sessions */ 2565 /* BB FIXME fix time_gran to be larger for LANMAN sessions */
2520 sb->s_time_gran = 100; 2566 sb->s_time_gran = 100;
2521 2567
2522 if (rc) 2568 /* search for existing tcon to this server share */
2569 tcon = cifs_get_tcon(pSesInfo, volume_info);
2570 if (IS_ERR(tcon)) {
2571 rc = PTR_ERR(tcon);
2572 tcon = NULL;
2523 goto remote_path_check; 2573 goto remote_path_check;
2574 }
2524 2575
2525 cifs_sb->tcon = tcon; 2576 cifs_sb->tcon = tcon;
2526 2577
@@ -2544,7 +2595,7 @@ try_mount_again:
2544 2595
2545 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { 2596 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) {
2546 cifs_sb->rsize = 1024 * 127; 2597 cifs_sb->rsize = 1024 * 127;
2547 cFYI(DBG2, ("no very large read support, rsize now 127K")); 2598 cFYI(DBG2, "no very large read support, rsize now 127K");
2548 } 2599 }
2549 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X)) 2600 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X))
2550 cifs_sb->wsize = min(cifs_sb->wsize, 2601 cifs_sb->wsize = min(cifs_sb->wsize,
@@ -2593,7 +2644,7 @@ remote_path_check:
2593 goto mount_fail_check; 2644 goto mount_fail_check;
2594 } 2645 }
2595 2646
2596 cFYI(1, ("Getting referral for: %s", full_path)); 2647 cFYI(1, "Getting referral for: %s", full_path);
2597 rc = get_dfs_path(xid, pSesInfo , full_path + 1, 2648 rc = get_dfs_path(xid, pSesInfo , full_path + 1,
2598 cifs_sb->local_nls, &num_referrals, &referrals, 2649 cifs_sb->local_nls, &num_referrals, &referrals,
2599 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 2650 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -2707,7 +2758,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2707 by Samba (not sure whether other servers allow 2758 by Samba (not sure whether other servers allow
2708 NTLMv2 password here) */ 2759 NTLMv2 password here) */
2709#ifdef CONFIG_CIFS_WEAK_PW_HASH 2760#ifdef CONFIG_CIFS_WEAK_PW_HASH
2710 if ((extended_security & CIFSSEC_MAY_LANMAN) && 2761 if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
2711 (ses->server->secType == LANMAN)) 2762 (ses->server->secType == LANMAN))
2712 calc_lanman_hash(tcon->password, ses->server->cryptKey, 2763 calc_lanman_hash(tcon->password, ses->server->cryptKey,
2713 ses->server->secMode & 2764 ses->server->secMode &
@@ -2778,13 +2829,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2778 if (length == 3) { 2829 if (length == 3) {
2779 if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') && 2830 if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
2780 (bcc_ptr[2] == 'C')) { 2831 (bcc_ptr[2] == 'C')) {
2781 cFYI(1, ("IPC connection")); 2832 cFYI(1, "IPC connection");
2782 tcon->ipc = 1; 2833 tcon->ipc = 1;
2783 } 2834 }
2784 } else if (length == 2) { 2835 } else if (length == 2) {
2785 if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) { 2836 if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) {
2786 /* the most common case */ 2837 /* the most common case */
2787 cFYI(1, ("disk share connection")); 2838 cFYI(1, "disk share connection");
2788 } 2839 }
2789 } 2840 }
2790 bcc_ptr += length + 1; 2841 bcc_ptr += length + 1;
@@ -2797,7 +2848,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2797 bytes_left, is_unicode, 2848 bytes_left, is_unicode,
2798 nls_codepage); 2849 nls_codepage);
2799 2850
2800 cFYI(1, ("nativeFileSystem=%s", tcon->nativeFileSystem)); 2851 cFYI(1, "nativeFileSystem=%s", tcon->nativeFileSystem);
2801 2852
2802 if ((smb_buffer_response->WordCount == 3) || 2853 if ((smb_buffer_response->WordCount == 3) ||
2803 (smb_buffer_response->WordCount == 7)) 2854 (smb_buffer_response->WordCount == 7))
@@ -2805,7 +2856,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2805 tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport); 2856 tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport);
2806 else 2857 else
2807 tcon->Flags = 0; 2858 tcon->Flags = 0;
2808 cFYI(1, ("Tcon flags: 0x%x ", tcon->Flags)); 2859 cFYI(1, "Tcon flags: 0x%x ", tcon->Flags);
2809 } else if ((rc == 0) && tcon == NULL) { 2860 } else if ((rc == 0) && tcon == NULL) {
2810 /* all we need to save for IPC$ connection */ 2861 /* all we need to save for IPC$ connection */
2811 ses->ipc_tid = smb_buffer_response->Tid; 2862 ses->ipc_tid = smb_buffer_response->Tid;
@@ -2833,57 +2884,61 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
2833 return rc; 2884 return rc;
2834} 2885}
2835 2886
2836int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, 2887int cifs_negotiate_protocol(unsigned int xid, struct cifsSesInfo *ses)
2837 struct nls_table *nls_info)
2838{ 2888{
2839 int rc = 0; 2889 int rc = 0;
2840 int first_time = 0; 2890 struct TCP_Server_Info *server = ses->server;
2841 struct TCP_Server_Info *server = pSesInfo->server; 2891
2842 2892 /* only send once per connect */
2843 /* what if server changes its buffer size after dropping the session? */ 2893 if (server->maxBuf != 0)
2844 if (server->maxBuf == 0) /* no need to send on reconnect */ { 2894 return 0;
2845 rc = CIFSSMBNegotiate(xid, pSesInfo); 2895
2846 if (rc == -EAGAIN) { 2896 rc = CIFSSMBNegotiate(xid, ses);
2847 /* retry only once on 1st time connection */ 2897 if (rc == -EAGAIN) {
2848 rc = CIFSSMBNegotiate(xid, pSesInfo); 2898 /* retry only once on 1st time connection */
2849 if (rc == -EAGAIN) 2899 rc = CIFSSMBNegotiate(xid, ses);
2850 rc = -EHOSTDOWN; 2900 if (rc == -EAGAIN)
2851 } 2901 rc = -EHOSTDOWN;
2852 if (rc == 0) { 2902 }
2853 spin_lock(&GlobalMid_Lock); 2903 if (rc == 0) {
2854 if (server->tcpStatus != CifsExiting) 2904 spin_lock(&GlobalMid_Lock);
2855 server->tcpStatus = CifsGood; 2905 if (server->tcpStatus != CifsExiting)
2856 else 2906 server->tcpStatus = CifsGood;
2857 rc = -EHOSTDOWN; 2907 else
2858 spin_unlock(&GlobalMid_Lock); 2908 rc = -EHOSTDOWN;
2909 spin_unlock(&GlobalMid_Lock);
2859 2910
2860 }
2861 first_time = 1;
2862 } 2911 }
2863 2912
2864 if (rc) 2913 return rc;
2865 goto ss_err_exit; 2914}
2915
2916
2917int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
2918 struct nls_table *nls_info)
2919{
2920 int rc = 0;
2921 struct TCP_Server_Info *server = ses->server;
2866 2922
2867 pSesInfo->flags = 0; 2923 ses->flags = 0;
2868 pSesInfo->capabilities = server->capabilities; 2924 ses->capabilities = server->capabilities;
2869 if (linuxExtEnabled == 0) 2925 if (linuxExtEnabled == 0)
2870 pSesInfo->capabilities &= (~CAP_UNIX); 2926 ses->capabilities &= (~CAP_UNIX);
2871 2927
2872 cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", 2928 cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
2873 server->secMode, server->capabilities, server->timeAdj)); 2929 server->secMode, server->capabilities, server->timeAdj);
2874 2930
2875 rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); 2931 rc = CIFS_SessSetup(xid, ses, nls_info);
2876 if (rc) { 2932 if (rc) {
2877 cERROR(1, ("Send error in SessSetup = %d", rc)); 2933 cERROR(1, "Send error in SessSetup = %d", rc);
2878 } else { 2934 } else {
2879 cFYI(1, ("CIFS Session Established successfully")); 2935 cFYI(1, "CIFS Session Established successfully");
2880 spin_lock(&GlobalMid_Lock); 2936 spin_lock(&GlobalMid_Lock);
2881 pSesInfo->status = CifsGood; 2937 ses->status = CifsGood;
2882 pSesInfo->need_reconnect = false; 2938 ses->need_reconnect = false;
2883 spin_unlock(&GlobalMid_Lock); 2939 spin_unlock(&GlobalMid_Lock);
2884 } 2940 }
2885 2941
2886ss_err_exit:
2887 return rc; 2942 return rc;
2888} 2943}
2889 2944
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e9f7ecc2714b..391816b461ca 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -73,7 +73,7 @@ cifs_bp_rename_retry:
73 namelen += (1 + temp->d_name.len); 73 namelen += (1 + temp->d_name.len);
74 temp = temp->d_parent; 74 temp = temp->d_parent;
75 if (temp == NULL) { 75 if (temp == NULL) {
76 cERROR(1, ("corrupt dentry")); 76 cERROR(1, "corrupt dentry");
77 return NULL; 77 return NULL;
78 } 78 }
79 } 79 }
@@ -90,19 +90,18 @@ cifs_bp_rename_retry:
90 full_path[namelen] = dirsep; 90 full_path[namelen] = dirsep;
91 strncpy(full_path + namelen + 1, temp->d_name.name, 91 strncpy(full_path + namelen + 1, temp->d_name.name,
92 temp->d_name.len); 92 temp->d_name.len);
93 cFYI(0, ("name: %s", full_path + namelen)); 93 cFYI(0, "name: %s", full_path + namelen);
94 } 94 }
95 temp = temp->d_parent; 95 temp = temp->d_parent;
96 if (temp == NULL) { 96 if (temp == NULL) {
97 cERROR(1, ("corrupt dentry")); 97 cERROR(1, "corrupt dentry");
98 kfree(full_path); 98 kfree(full_path);
99 return NULL; 99 return NULL;
100 } 100 }
101 } 101 }
102 if (namelen != pplen + dfsplen) { 102 if (namelen != pplen + dfsplen) {
103 cERROR(1, 103 cERROR(1, "did not end path lookup where expected namelen is %d",
104 ("did not end path lookup where expected namelen is %d", 104 namelen);
105 namelen));
106 /* presumably this is only possible if racing with a rename 105 /* presumably this is only possible if racing with a rename
107 of one of the parent directories (we can not lock the dentries 106 of one of the parent directories (we can not lock the dentries
108 above us to prevent this, but retrying should be harmless) */ 107 above us to prevent this, but retrying should be harmless) */
@@ -130,6 +129,12 @@ cifs_bp_rename_retry:
130 return full_path; 129 return full_path;
131} 130}
132 131
132/*
133 * When called with struct file pointer set to NULL, there is no way we could
134 * update file->private_data, but getting it stuck on openFileList provides a
135 * way to access it from cifs_fill_filedata and thereby set file->private_data
136 * from cifs_open.
137 */
133struct cifsFileInfo * 138struct cifsFileInfo *
134cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, 139cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
135 struct file *file, struct vfsmount *mnt, unsigned int oflags) 140 struct file *file, struct vfsmount *mnt, unsigned int oflags)
@@ -173,7 +178,7 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
173 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 178 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
174 pCifsInode->clientCanCacheAll = true; 179 pCifsInode->clientCanCacheAll = true;
175 pCifsInode->clientCanCacheRead = true; 180 pCifsInode->clientCanCacheRead = true;
176 cFYI(1, ("Exclusive Oplock inode %p", newinode)); 181 cFYI(1, "Exclusive Oplock inode %p", newinode);
177 } else if ((oplock & 0xF) == OPLOCK_READ) 182 } else if ((oplock & 0xF) == OPLOCK_READ)
178 pCifsInode->clientCanCacheRead = true; 183 pCifsInode->clientCanCacheRead = true;
179 } 184 }
@@ -183,16 +188,17 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
183} 188}
184 189
185int cifs_posix_open(char *full_path, struct inode **pinode, 190int cifs_posix_open(char *full_path, struct inode **pinode,
186 struct vfsmount *mnt, int mode, int oflags, 191 struct vfsmount *mnt, struct super_block *sb,
187 __u32 *poplock, __u16 *pnetfid, int xid) 192 int mode, int oflags,
193 __u32 *poplock, __u16 *pnetfid, int xid)
188{ 194{
189 int rc; 195 int rc;
190 FILE_UNIX_BASIC_INFO *presp_data; 196 FILE_UNIX_BASIC_INFO *presp_data;
191 __u32 posix_flags = 0; 197 __u32 posix_flags = 0;
192 struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); 198 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
193 struct cifs_fattr fattr; 199 struct cifs_fattr fattr;
194 200
195 cFYI(1, ("posix open %s", full_path)); 201 cFYI(1, "posix open %s", full_path);
196 202
197 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 203 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
198 if (presp_data == NULL) 204 if (presp_data == NULL)
@@ -242,7 +248,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
242 248
243 /* get new inode and set it up */ 249 /* get new inode and set it up */
244 if (*pinode == NULL) { 250 if (*pinode == NULL) {
245 *pinode = cifs_iget(mnt->mnt_sb, &fattr); 251 cifs_fill_uniqueid(sb, &fattr);
252 *pinode = cifs_iget(sb, &fattr);
246 if (!*pinode) { 253 if (!*pinode) {
247 rc = -ENOMEM; 254 rc = -ENOMEM;
248 goto posix_open_ret; 255 goto posix_open_ret;
@@ -251,7 +258,18 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
251 cifs_fattr_to_inode(*pinode, &fattr); 258 cifs_fattr_to_inode(*pinode, &fattr);
252 } 259 }
253 260
254 cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags); 261 /*
262 * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
263 * file->private_data.
264 */
265 if (mnt) {
266 struct cifsFileInfo *pfile_info;
267
268 pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
269 oflags);
270 if (pfile_info == NULL)
271 rc = -ENOMEM;
272 }
255 273
256posix_open_ret: 274posix_open_ret:
257 kfree(presp_data); 275 kfree(presp_data);
@@ -315,13 +333,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
315 if (nd && (nd->flags & LOOKUP_OPEN)) 333 if (nd && (nd->flags & LOOKUP_OPEN))
316 oflags = nd->intent.open.flags; 334 oflags = nd->intent.open.flags;
317 else 335 else
318 oflags = FMODE_READ; 336 oflags = FMODE_READ | SMB_O_CREAT;
319 337
320 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && 338 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
321 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 339 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
322 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 340 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
323 rc = cifs_posix_open(full_path, &newinode, nd->path.mnt, 341 rc = cifs_posix_open(full_path, &newinode,
324 mode, oflags, &oplock, &fileHandle, xid); 342 nd ? nd->path.mnt : NULL,
343 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
325 /* EIO could indicate that (posix open) operation is not 344 /* EIO could indicate that (posix open) operation is not
326 supported, despite what server claimed in capability 345 supported, despite what server claimed in capability
327 negotation. EREMOTE indicates DFS junction, which is not 346 negotation. EREMOTE indicates DFS junction, which is not
@@ -358,7 +377,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
358 else if ((oflags & O_CREAT) == O_CREAT) 377 else if ((oflags & O_CREAT) == O_CREAT)
359 disposition = FILE_OPEN_IF; 378 disposition = FILE_OPEN_IF;
360 else 379 else
361 cFYI(1, ("Create flag not set in create function")); 380 cFYI(1, "Create flag not set in create function");
362 } 381 }
363 382
364 /* BB add processing to set equivalent of mode - e.g. via CreateX with 383 /* BB add processing to set equivalent of mode - e.g. via CreateX with
@@ -394,7 +413,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
394 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 413 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
395 } 414 }
396 if (rc) { 415 if (rc) {
397 cFYI(1, ("cifs_create returned 0x%x", rc)); 416 cFYI(1, "cifs_create returned 0x%x", rc);
398 goto cifs_create_out; 417 goto cifs_create_out;
399 } 418 }
400 419
@@ -457,15 +476,22 @@ cifs_create_set_dentry:
457 if (rc == 0) 476 if (rc == 0)
458 setup_cifs_dentry(tcon, direntry, newinode); 477 setup_cifs_dentry(tcon, direntry, newinode);
459 else 478 else
460 cFYI(1, ("Create worked, get_inode_info failed rc = %d", rc)); 479 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
461 480
462 /* nfsd case - nfs srv does not set nd */ 481 /* nfsd case - nfs srv does not set nd */
463 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) { 482 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
464 /* mknod case - do not leave file open */ 483 /* mknod case - do not leave file open */
465 CIFSSMBClose(xid, tcon, fileHandle); 484 CIFSSMBClose(xid, tcon, fileHandle);
466 } else if (!(posix_create) && (newinode)) { 485 } else if (!(posix_create) && (newinode)) {
467 cifs_new_fileinfo(newinode, fileHandle, NULL, 486 struct cifsFileInfo *pfile_info;
468 nd->path.mnt, oflags); 487 /*
488 * cifs_fill_filedata() takes care of setting cifsFileInfo
489 * pointer to file->private_data.
490 */
491 pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
492 nd->path.mnt, oflags);
493 if (pfile_info == NULL)
494 rc = -ENOMEM;
469 } 495 }
470cifs_create_out: 496cifs_create_out:
471 kfree(buf); 497 kfree(buf);
@@ -531,7 +557,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
531 u16 fileHandle; 557 u16 fileHandle;
532 FILE_ALL_INFO *buf; 558 FILE_ALL_INFO *buf;
533 559
534 cFYI(1, ("sfu compat create special file")); 560 cFYI(1, "sfu compat create special file");
535 561
536 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); 562 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
537 if (buf == NULL) { 563 if (buf == NULL) {
@@ -616,8 +642,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
616 642
617 xid = GetXid(); 643 xid = GetXid();
618 644
619 cFYI(1, ("parent inode = 0x%p name is: %s and dentry = 0x%p", 645 cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p",
620 parent_dir_inode, direntry->d_name.name, direntry)); 646 parent_dir_inode, direntry->d_name.name, direntry);
621 647
622 /* check whether path exists */ 648 /* check whether path exists */
623 649
@@ -632,7 +658,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
632 int i; 658 int i;
633 for (i = 0; i < direntry->d_name.len; i++) 659 for (i = 0; i < direntry->d_name.len; i++)
634 if (direntry->d_name.name[i] == '\\') { 660 if (direntry->d_name.name[i] == '\\') {
635 cFYI(1, ("Invalid file name")); 661 cFYI(1, "Invalid file name");
636 FreeXid(xid); 662 FreeXid(xid);
637 return ERR_PTR(-EINVAL); 663 return ERR_PTR(-EINVAL);
638 } 664 }
@@ -657,11 +683,11 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
657 } 683 }
658 684
659 if (direntry->d_inode != NULL) { 685 if (direntry->d_inode != NULL) {
660 cFYI(1, ("non-NULL inode in lookup")); 686 cFYI(1, "non-NULL inode in lookup");
661 } else { 687 } else {
662 cFYI(1, ("NULL inode in lookup")); 688 cFYI(1, "NULL inode in lookup");
663 } 689 }
664 cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); 690 cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode);
665 691
666 /* Posix open is only called (at lookup time) for file create now. 692 /* Posix open is only called (at lookup time) for file create now.
667 * For opens (rather than creates), because we do not know if it 693 * For opens (rather than creates), because we do not know if it
@@ -678,6 +704,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
678 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && 704 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
679 (nd->intent.open.flags & O_CREAT)) { 705 (nd->intent.open.flags & O_CREAT)) {
680 rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, 706 rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
707 parent_dir_inode->i_sb,
681 nd->intent.open.create_mode, 708 nd->intent.open.create_mode,
682 nd->intent.open.flags, &oplock, 709 nd->intent.open.flags, &oplock,
683 &fileHandle, xid); 710 &fileHandle, xid);
@@ -723,7 +750,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
723 /* if it was once a directory (but how can we tell?) we could do 750 /* if it was once a directory (but how can we tell?) we could do
724 shrink_dcache_parent(direntry); */ 751 shrink_dcache_parent(direntry); */
725 } else if (rc != -EACCES) { 752 } else if (rc != -EACCES) {
726 cERROR(1, ("Unexpected lookup error %d", rc)); 753 cERROR(1, "Unexpected lookup error %d", rc);
727 /* We special case check for Access Denied - since that 754 /* We special case check for Access Denied - since that
728 is a common return code */ 755 is a common return code */
729 } 756 }
@@ -742,8 +769,8 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
742 if (cifs_revalidate_dentry(direntry)) 769 if (cifs_revalidate_dentry(direntry))
743 return 0; 770 return 0;
744 } else { 771 } else {
745 cFYI(1, ("neg dentry 0x%p name = %s", 772 cFYI(1, "neg dentry 0x%p name = %s",
746 direntry, direntry->d_name.name)); 773 direntry, direntry->d_name.name);
747 if (time_after(jiffies, direntry->d_time + HZ) || 774 if (time_after(jiffies, direntry->d_time + HZ) ||
748 !lookupCacheEnabled) { 775 !lookupCacheEnabled) {
749 d_drop(direntry); 776 d_drop(direntry);
@@ -758,7 +785,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
758{ 785{
759 int rc = 0; 786 int rc = 0;
760 787
761 cFYI(1, ("In cifs d_delete, name = %s", direntry->d_name.name)); 788 cFYI(1, "In cifs d_delete, name = %s", direntry->d_name.name);
762 789
763 return rc; 790 return rc;
764} */ 791} */
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 6f8a0e3fb25b..4db2c5e7283f 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -106,14 +106,14 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
106 /* search for server name delimiter */ 106 /* search for server name delimiter */
107 len = strlen(unc); 107 len = strlen(unc);
108 if (len < 3) { 108 if (len < 3) {
109 cFYI(1, ("%s: unc is too short: %s", __func__, unc)); 109 cFYI(1, "%s: unc is too short: %s", __func__, unc);
110 return -EINVAL; 110 return -EINVAL;
111 } 111 }
112 len -= 2; 112 len -= 2;
113 name = memchr(unc+2, '\\', len); 113 name = memchr(unc+2, '\\', len);
114 if (!name) { 114 if (!name) {
115 cFYI(1, ("%s: probably server name is whole unc: %s", 115 cFYI(1, "%s: probably server name is whole unc: %s",
116 __func__, unc)); 116 __func__, unc);
117 } else { 117 } else {
118 len = (name - unc) - 2/* leading // */; 118 len = (name - unc) - 2/* leading // */;
119 } 119 }
@@ -127,8 +127,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
127 name[len] = 0; 127 name[len] = 0;
128 128
129 if (is_ip(name)) { 129 if (is_ip(name)) {
130 cFYI(1, ("%s: it is IP, skipping dns upcall: %s", 130 cFYI(1, "%s: it is IP, skipping dns upcall: %s",
131 __func__, name)); 131 __func__, name);
132 data = name; 132 data = name;
133 goto skip_upcall; 133 goto skip_upcall;
134 } 134 }
@@ -138,7 +138,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
138 len = rkey->type_data.x[0]; 138 len = rkey->type_data.x[0];
139 data = rkey->payload.data; 139 data = rkey->payload.data;
140 } else { 140 } else {
141 cERROR(1, ("%s: unable to resolve: %s", __func__, name)); 141 cERROR(1, "%s: unable to resolve: %s", __func__, name);
142 goto out; 142 goto out;
143 } 143 }
144 144
@@ -148,10 +148,10 @@ skip_upcall:
148 if (*ip_addr) { 148 if (*ip_addr) {
149 memcpy(*ip_addr, data, len + 1); 149 memcpy(*ip_addr, data, len + 1);
150 if (!IS_ERR(rkey)) 150 if (!IS_ERR(rkey))
151 cFYI(1, ("%s: resolved: %s to %s", __func__, 151 cFYI(1, "%s: resolved: %s to %s", __func__,
152 name, 152 name,
153 *ip_addr 153 *ip_addr
154 )); 154 );
155 rc = 0; 155 rc = 0;
156 } else { 156 } else {
157 rc = -ENOMEM; 157 rc = -ENOMEM;
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 6177f7cca16a..993f82045bf6 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -49,7 +49,7 @@
49static struct dentry *cifs_get_parent(struct dentry *dentry) 49static struct dentry *cifs_get_parent(struct dentry *dentry)
50{ 50{
51 /* BB need to add code here eventually to enable export via NFSD */ 51 /* BB need to add code here eventually to enable export via NFSD */
52 cFYI(1, ("get parent for %p", dentry)); 52 cFYI(1, "get parent for %p", dentry);
53 return ERR_PTR(-EACCES); 53 return ERR_PTR(-EACCES);
54} 54}
55 55
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9b11a8f56f3a..a83541ec9713 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * vfs operations that deal with files 4 * vfs operations that deal with files
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2002,2007 6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org) 8 * Jeremy Allison (jra@samba.org)
9 * 9 *
@@ -108,8 +108,7 @@ static inline int cifs_get_disposition(unsigned int flags)
108/* all arguments to this function must be checked for validity in caller */ 108/* all arguments to this function must be checked for validity in caller */
109static inline int 109static inline int
110cifs_posix_open_inode_helper(struct inode *inode, struct file *file, 110cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
111 struct cifsInodeInfo *pCifsInode, 111 struct cifsInodeInfo *pCifsInode, __u32 oplock,
112 struct cifsFileInfo *pCifsFile, __u32 oplock,
113 u16 netfid) 112 u16 netfid)
114{ 113{
115 114
@@ -136,15 +135,15 @@ cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
136 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && 135 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
137 (file->f_path.dentry->d_inode->i_size == 136 (file->f_path.dentry->d_inode->i_size ==
138 (loff_t)le64_to_cpu(buf->EndOfFile))) { 137 (loff_t)le64_to_cpu(buf->EndOfFile))) {
139 cFYI(1, ("inode unchanged on server")); 138 cFYI(1, "inode unchanged on server");
140 } else { 139 } else {
141 if (file->f_path.dentry->d_inode->i_mapping) { 140 if (file->f_path.dentry->d_inode->i_mapping) {
142 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping); 141 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
143 if (rc != 0) 142 if (rc != 0)
144 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; 143 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
145 } 144 }
146 cFYI(1, ("invalidating remote inode since open detected it " 145 cFYI(1, "invalidating remote inode since open detected it "
147 "changed")); 146 "changed");
148 invalidate_remote_inode(file->f_path.dentry->d_inode); 147 invalidate_remote_inode(file->f_path.dentry->d_inode);
149 } */ 148 } */
150 149
@@ -152,8 +151,8 @@ psx_client_can_cache:
152 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 151 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
153 pCifsInode->clientCanCacheAll = true; 152 pCifsInode->clientCanCacheAll = true;
154 pCifsInode->clientCanCacheRead = true; 153 pCifsInode->clientCanCacheRead = true;
155 cFYI(1, ("Exclusive Oplock granted on inode %p", 154 cFYI(1, "Exclusive Oplock granted on inode %p",
156 file->f_path.dentry->d_inode)); 155 file->f_path.dentry->d_inode);
157 } else if ((oplock & 0xF) == OPLOCK_READ) 156 } else if ((oplock & 0xF) == OPLOCK_READ)
158 pCifsInode->clientCanCacheRead = true; 157 pCifsInode->clientCanCacheRead = true;
159 158
@@ -190,8 +189,8 @@ cifs_fill_filedata(struct file *file)
190 if (file->private_data != NULL) { 189 if (file->private_data != NULL) {
191 return pCifsFile; 190 return pCifsFile;
192 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) 191 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
193 cERROR(1, ("could not find file instance for " 192 cERROR(1, "could not find file instance for "
194 "new file %p", file)); 193 "new file %p", file);
195 return NULL; 194 return NULL;
196} 195}
197 196
@@ -217,7 +216,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
217 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && 216 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
218 (file->f_path.dentry->d_inode->i_size == 217 (file->f_path.dentry->d_inode->i_size ==
219 (loff_t)le64_to_cpu(buf->EndOfFile))) { 218 (loff_t)le64_to_cpu(buf->EndOfFile))) {
220 cFYI(1, ("inode unchanged on server")); 219 cFYI(1, "inode unchanged on server");
221 } else { 220 } else {
222 if (file->f_path.dentry->d_inode->i_mapping) { 221 if (file->f_path.dentry->d_inode->i_mapping) {
223 /* BB no need to lock inode until after invalidate 222 /* BB no need to lock inode until after invalidate
@@ -226,8 +225,8 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
226 if (rc != 0) 225 if (rc != 0)
227 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; 226 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
228 } 227 }
229 cFYI(1, ("invalidating remote inode since open detected it " 228 cFYI(1, "invalidating remote inode since open detected it "
230 "changed")); 229 "changed");
231 invalidate_remote_inode(file->f_path.dentry->d_inode); 230 invalidate_remote_inode(file->f_path.dentry->d_inode);
232 } 231 }
233 232
@@ -242,8 +241,8 @@ client_can_cache:
242 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { 241 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
243 pCifsInode->clientCanCacheAll = true; 242 pCifsInode->clientCanCacheAll = true;
244 pCifsInode->clientCanCacheRead = true; 243 pCifsInode->clientCanCacheRead = true;
245 cFYI(1, ("Exclusive Oplock granted on inode %p", 244 cFYI(1, "Exclusive Oplock granted on inode %p",
246 file->f_path.dentry->d_inode)); 245 file->f_path.dentry->d_inode);
247 } else if ((*oplock & 0xF) == OPLOCK_READ) 246 } else if ((*oplock & 0xF) == OPLOCK_READ)
248 pCifsInode->clientCanCacheRead = true; 247 pCifsInode->clientCanCacheRead = true;
249 248
@@ -285,8 +284,8 @@ int cifs_open(struct inode *inode, struct file *file)
285 return rc; 284 return rc;
286 } 285 }
287 286
288 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s", 287 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
289 inode, file->f_flags, full_path)); 288 inode, file->f_flags, full_path);
290 289
291 if (oplockEnabled) 290 if (oplockEnabled)
292 oplock = REQ_OPLOCK; 291 oplock = REQ_OPLOCK;
@@ -298,27 +297,29 @@ int cifs_open(struct inode *inode, struct file *file)
298 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 297 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
299 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 298 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
300 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 299 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
300 oflags |= SMB_O_CREAT;
301 /* can not refresh inode info since size could be stale */ 301 /* can not refresh inode info since size could be stale */
302 rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, 302 rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
303 cifs_sb->mnt_file_mode /* ignored */, 303 inode->i_sb,
304 oflags, &oplock, &netfid, xid); 304 cifs_sb->mnt_file_mode /* ignored */,
305 oflags, &oplock, &netfid, xid);
305 if (rc == 0) { 306 if (rc == 0) {
306 cFYI(1, ("posix open succeeded")); 307 cFYI(1, "posix open succeeded");
307 /* no need for special case handling of setting mode 308 /* no need for special case handling of setting mode
308 on read only files needed here */ 309 on read only files needed here */
309 310
310 pCifsFile = cifs_fill_filedata(file); 311 pCifsFile = cifs_fill_filedata(file);
311 cifs_posix_open_inode_helper(inode, file, pCifsInode, 312 cifs_posix_open_inode_helper(inode, file, pCifsInode,
312 pCifsFile, oplock, netfid); 313 oplock, netfid);
313 goto out; 314 goto out;
314 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 315 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
315 if (tcon->ses->serverNOS) 316 if (tcon->ses->serverNOS)
316 cERROR(1, ("server %s of type %s returned" 317 cERROR(1, "server %s of type %s returned"
317 " unexpected error on SMB posix open" 318 " unexpected error on SMB posix open"
318 ", disabling posix open support." 319 ", disabling posix open support."
319 " Check if server update available.", 320 " Check if server update available.",
320 tcon->ses->serverName, 321 tcon->ses->serverName,
321 tcon->ses->serverNOS)); 322 tcon->ses->serverNOS);
322 tcon->broken_posix_open = true; 323 tcon->broken_posix_open = true;
323 } else if ((rc != -EIO) && (rc != -EREMOTE) && 324 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
324 (rc != -EOPNOTSUPP)) /* path not found or net err */ 325 (rc != -EOPNOTSUPP)) /* path not found or net err */
@@ -386,7 +387,7 @@ int cifs_open(struct inode *inode, struct file *file)
386 & CIFS_MOUNT_MAP_SPECIAL_CHR); 387 & CIFS_MOUNT_MAP_SPECIAL_CHR);
387 } 388 }
388 if (rc) { 389 if (rc) {
389 cFYI(1, ("cifs_open returned 0x%x", rc)); 390 cFYI(1, "cifs_open returned 0x%x", rc);
390 goto out; 391 goto out;
391 } 392 }
392 393
@@ -469,7 +470,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
469 } 470 }
470 471
471 if (file->f_path.dentry == NULL) { 472 if (file->f_path.dentry == NULL) {
472 cERROR(1, ("no valid name if dentry freed")); 473 cERROR(1, "no valid name if dentry freed");
473 dump_stack(); 474 dump_stack();
474 rc = -EBADF; 475 rc = -EBADF;
475 goto reopen_error_exit; 476 goto reopen_error_exit;
@@ -477,7 +478,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
477 478
478 inode = file->f_path.dentry->d_inode; 479 inode = file->f_path.dentry->d_inode;
479 if (inode == NULL) { 480 if (inode == NULL) {
480 cERROR(1, ("inode not valid")); 481 cERROR(1, "inode not valid");
481 dump_stack(); 482 dump_stack();
482 rc = -EBADF; 483 rc = -EBADF;
483 goto reopen_error_exit; 484 goto reopen_error_exit;
@@ -499,8 +500,8 @@ reopen_error_exit:
499 return rc; 500 return rc;
500 } 501 }
501 502
502 cFYI(1, ("inode = 0x%p file flags 0x%x for %s", 503 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
503 inode, file->f_flags, full_path)); 504 inode, file->f_flags, full_path);
504 505
505 if (oplockEnabled) 506 if (oplockEnabled)
506 oplock = REQ_OPLOCK; 507 oplock = REQ_OPLOCK;
@@ -513,10 +514,11 @@ reopen_error_exit:
513 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 514 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
514 /* can not refresh inode info since size could be stale */ 515 /* can not refresh inode info since size could be stale */
515 rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, 516 rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
516 cifs_sb->mnt_file_mode /* ignored */, 517 inode->i_sb,
517 oflags, &oplock, &netfid, xid); 518 cifs_sb->mnt_file_mode /* ignored */,
519 oflags, &oplock, &netfid, xid);
518 if (rc == 0) { 520 if (rc == 0) {
519 cFYI(1, ("posix reopen succeeded")); 521 cFYI(1, "posix reopen succeeded");
520 goto reopen_success; 522 goto reopen_success;
521 } 523 }
522 /* fallthrough to retry open the old way on errors, especially 524 /* fallthrough to retry open the old way on errors, especially
@@ -537,8 +539,8 @@ reopen_error_exit:
537 CIFS_MOUNT_MAP_SPECIAL_CHR); 539 CIFS_MOUNT_MAP_SPECIAL_CHR);
538 if (rc) { 540 if (rc) {
539 mutex_unlock(&pCifsFile->fh_mutex); 541 mutex_unlock(&pCifsFile->fh_mutex);
540 cFYI(1, ("cifs_open returned 0x%x", rc)); 542 cFYI(1, "cifs_open returned 0x%x", rc);
541 cFYI(1, ("oplock: %d", oplock)); 543 cFYI(1, "oplock: %d", oplock);
542 } else { 544 } else {
543reopen_success: 545reopen_success:
544 pCifsFile->netfid = netfid; 546 pCifsFile->netfid = netfid;
@@ -570,8 +572,8 @@ reopen_success:
570 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 572 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
571 pCifsInode->clientCanCacheAll = true; 573 pCifsInode->clientCanCacheAll = true;
572 pCifsInode->clientCanCacheRead = true; 574 pCifsInode->clientCanCacheRead = true;
573 cFYI(1, ("Exclusive Oplock granted on inode %p", 575 cFYI(1, "Exclusive Oplock granted on inode %p",
574 file->f_path.dentry->d_inode)); 576 file->f_path.dentry->d_inode);
575 } else if ((oplock & 0xF) == OPLOCK_READ) { 577 } else if ((oplock & 0xF) == OPLOCK_READ) {
576 pCifsInode->clientCanCacheRead = true; 578 pCifsInode->clientCanCacheRead = true;
577 pCifsInode->clientCanCacheAll = false; 579 pCifsInode->clientCanCacheAll = false;
@@ -619,8 +621,7 @@ int cifs_close(struct inode *inode, struct file *file)
619 the struct would be in each open file, 621 the struct would be in each open file,
620 but this should give enough time to 622 but this should give enough time to
621 clear the socket */ 623 clear the socket */
622 cFYI(DBG2, 624 cFYI(DBG2, "close delay, write pending");
623 ("close delay, write pending"));
624 msleep(timeout); 625 msleep(timeout);
625 timeout *= 4; 626 timeout *= 4;
626 } 627 }
@@ -653,7 +654,7 @@ int cifs_close(struct inode *inode, struct file *file)
653 654
654 read_lock(&GlobalSMBSeslock); 655 read_lock(&GlobalSMBSeslock);
655 if (list_empty(&(CIFS_I(inode)->openFileList))) { 656 if (list_empty(&(CIFS_I(inode)->openFileList))) {
656 cFYI(1, ("closing last open instance for inode %p", inode)); 657 cFYI(1, "closing last open instance for inode %p", inode);
657 /* if the file is not open we do not know if we can cache info 658 /* if the file is not open we do not know if we can cache info
658 on this inode, much less write behind and read ahead */ 659 on this inode, much less write behind and read ahead */
659 CIFS_I(inode)->clientCanCacheRead = false; 660 CIFS_I(inode)->clientCanCacheRead = false;
@@ -674,7 +675,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
674 (struct cifsFileInfo *)file->private_data; 675 (struct cifsFileInfo *)file->private_data;
675 char *ptmp; 676 char *ptmp;
676 677
677 cFYI(1, ("Closedir inode = 0x%p", inode)); 678 cFYI(1, "Closedir inode = 0x%p", inode);
678 679
679 xid = GetXid(); 680 xid = GetXid();
680 681
@@ -685,22 +686,22 @@ int cifs_closedir(struct inode *inode, struct file *file)
685 686
686 pTcon = cifs_sb->tcon; 687 pTcon = cifs_sb->tcon;
687 688
688 cFYI(1, ("Freeing private data in close dir")); 689 cFYI(1, "Freeing private data in close dir");
689 write_lock(&GlobalSMBSeslock); 690 write_lock(&GlobalSMBSeslock);
690 if (!pCFileStruct->srch_inf.endOfSearch && 691 if (!pCFileStruct->srch_inf.endOfSearch &&
691 !pCFileStruct->invalidHandle) { 692 !pCFileStruct->invalidHandle) {
692 pCFileStruct->invalidHandle = true; 693 pCFileStruct->invalidHandle = true;
693 write_unlock(&GlobalSMBSeslock); 694 write_unlock(&GlobalSMBSeslock);
694 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); 695 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
695 cFYI(1, ("Closing uncompleted readdir with rc %d", 696 cFYI(1, "Closing uncompleted readdir with rc %d",
696 rc)); 697 rc);
697 /* not much we can do if it fails anyway, ignore rc */ 698 /* not much we can do if it fails anyway, ignore rc */
698 rc = 0; 699 rc = 0;
699 } else 700 } else
700 write_unlock(&GlobalSMBSeslock); 701 write_unlock(&GlobalSMBSeslock);
701 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; 702 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
702 if (ptmp) { 703 if (ptmp) {
703 cFYI(1, ("closedir free smb buf in srch struct")); 704 cFYI(1, "closedir free smb buf in srch struct");
704 pCFileStruct->srch_inf.ntwrk_buf_start = NULL; 705 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
705 if (pCFileStruct->srch_inf.smallBuf) 706 if (pCFileStruct->srch_inf.smallBuf)
706 cifs_small_buf_release(ptmp); 707 cifs_small_buf_release(ptmp);
@@ -748,49 +749,49 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
748 rc = -EACCES; 749 rc = -EACCES;
749 xid = GetXid(); 750 xid = GetXid();
750 751
751 cFYI(1, ("Lock parm: 0x%x flockflags: " 752 cFYI(1, "Lock parm: 0x%x flockflags: "
752 "0x%x flocktype: 0x%x start: %lld end: %lld", 753 "0x%x flocktype: 0x%x start: %lld end: %lld",
753 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start, 754 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
754 pfLock->fl_end)); 755 pfLock->fl_end);
755 756
756 if (pfLock->fl_flags & FL_POSIX) 757 if (pfLock->fl_flags & FL_POSIX)
757 cFYI(1, ("Posix")); 758 cFYI(1, "Posix");
758 if (pfLock->fl_flags & FL_FLOCK) 759 if (pfLock->fl_flags & FL_FLOCK)
759 cFYI(1, ("Flock")); 760 cFYI(1, "Flock");
760 if (pfLock->fl_flags & FL_SLEEP) { 761 if (pfLock->fl_flags & FL_SLEEP) {
761 cFYI(1, ("Blocking lock")); 762 cFYI(1, "Blocking lock");
762 wait_flag = true; 763 wait_flag = true;
763 } 764 }
764 if (pfLock->fl_flags & FL_ACCESS) 765 if (pfLock->fl_flags & FL_ACCESS)
765 cFYI(1, ("Process suspended by mandatory locking - " 766 cFYI(1, "Process suspended by mandatory locking - "
766 "not implemented yet")); 767 "not implemented yet");
767 if (pfLock->fl_flags & FL_LEASE) 768 if (pfLock->fl_flags & FL_LEASE)
768 cFYI(1, ("Lease on file - not implemented yet")); 769 cFYI(1, "Lease on file - not implemented yet");
769 if (pfLock->fl_flags & 770 if (pfLock->fl_flags &
770 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) 771 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
771 cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags)); 772 cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags);
772 773
773 if (pfLock->fl_type == F_WRLCK) { 774 if (pfLock->fl_type == F_WRLCK) {
774 cFYI(1, ("F_WRLCK ")); 775 cFYI(1, "F_WRLCK ");
775 numLock = 1; 776 numLock = 1;
776 } else if (pfLock->fl_type == F_UNLCK) { 777 } else if (pfLock->fl_type == F_UNLCK) {
777 cFYI(1, ("F_UNLCK")); 778 cFYI(1, "F_UNLCK");
778 numUnlock = 1; 779 numUnlock = 1;
779 /* Check if unlock includes more than 780 /* Check if unlock includes more than
780 one lock range */ 781 one lock range */
781 } else if (pfLock->fl_type == F_RDLCK) { 782 } else if (pfLock->fl_type == F_RDLCK) {
782 cFYI(1, ("F_RDLCK")); 783 cFYI(1, "F_RDLCK");
783 lockType |= LOCKING_ANDX_SHARED_LOCK; 784 lockType |= LOCKING_ANDX_SHARED_LOCK;
784 numLock = 1; 785 numLock = 1;
785 } else if (pfLock->fl_type == F_EXLCK) { 786 } else if (pfLock->fl_type == F_EXLCK) {
786 cFYI(1, ("F_EXLCK")); 787 cFYI(1, "F_EXLCK");
787 numLock = 1; 788 numLock = 1;
788 } else if (pfLock->fl_type == F_SHLCK) { 789 } else if (pfLock->fl_type == F_SHLCK) {
789 cFYI(1, ("F_SHLCK")); 790 cFYI(1, "F_SHLCK");
790 lockType |= LOCKING_ANDX_SHARED_LOCK; 791 lockType |= LOCKING_ANDX_SHARED_LOCK;
791 numLock = 1; 792 numLock = 1;
792 } else 793 } else
793 cFYI(1, ("Unknown type of lock")); 794 cFYI(1, "Unknown type of lock");
794 795
795 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 796 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
796 tcon = cifs_sb->tcon; 797 tcon = cifs_sb->tcon;
@@ -833,8 +834,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
833 0 /* wait flag */ ); 834 0 /* wait flag */ );
834 pfLock->fl_type = F_UNLCK; 835 pfLock->fl_type = F_UNLCK;
835 if (rc != 0) 836 if (rc != 0)
836 cERROR(1, ("Error unlocking previously locked " 837 cERROR(1, "Error unlocking previously locked "
837 "range %d during test of lock", rc)); 838 "range %d during test of lock", rc);
838 rc = 0; 839 rc = 0;
839 840
840 } else { 841 } else {
@@ -856,9 +857,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
856 0 /* wait flag */); 857 0 /* wait flag */);
857 pfLock->fl_type = F_RDLCK; 858 pfLock->fl_type = F_RDLCK;
858 if (rc != 0) 859 if (rc != 0)
859 cERROR(1, ("Error unlocking " 860 cERROR(1, "Error unlocking "
860 "previously locked range %d " 861 "previously locked range %d "
861 "during test of lock", rc)); 862 "during test of lock", rc);
862 rc = 0; 863 rc = 0;
863 } else { 864 } else {
864 pfLock->fl_type = F_WRLCK; 865 pfLock->fl_type = F_WRLCK;
@@ -923,9 +924,10 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
923 1, 0, li->type, false); 924 1, 0, li->type, false);
924 if (stored_rc) 925 if (stored_rc)
925 rc = stored_rc; 926 rc = stored_rc;
926 927 else {
927 list_del(&li->llist); 928 list_del(&li->llist);
928 kfree(li); 929 kfree(li);
930 }
929 } 931 }
930 } 932 }
931 mutex_unlock(&fid->lock_mutex); 933 mutex_unlock(&fid->lock_mutex);
@@ -988,9 +990,8 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
988 990
989 pTcon = cifs_sb->tcon; 991 pTcon = cifs_sb->tcon;
990 992
991 /* cFYI(1, 993 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
992 (" write %d bytes to offset %lld of %s", write_size, 994 *poffset, file->f_path.dentry->d_name.name); */
993 *poffset, file->f_path.dentry->d_name.name)); */
994 995
995 if (file->private_data == NULL) 996 if (file->private_data == NULL)
996 return -EBADF; 997 return -EBADF;
@@ -1091,8 +1092,8 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
1091 1092
1092 pTcon = cifs_sb->tcon; 1093 pTcon = cifs_sb->tcon;
1093 1094
1094 cFYI(1, ("write %zd bytes to offset %lld of %s", write_size, 1095 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1095 *poffset, file->f_path.dentry->d_name.name)); 1096 *poffset, file->f_path.dentry->d_name.name);
1096 1097
1097 if (file->private_data == NULL) 1098 if (file->private_data == NULL)
1098 return -EBADF; 1099 return -EBADF;
@@ -1233,7 +1234,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1233 it being zero) during stress testcases so we need to check for it */ 1234 it being zero) during stress testcases so we need to check for it */
1234 1235
1235 if (cifs_inode == NULL) { 1236 if (cifs_inode == NULL) {
1236 cERROR(1, ("Null inode passed to cifs_writeable_file")); 1237 cERROR(1, "Null inode passed to cifs_writeable_file");
1237 dump_stack(); 1238 dump_stack();
1238 return NULL; 1239 return NULL;
1239 } 1240 }
@@ -1277,7 +1278,7 @@ refind_writable:
1277 again. Note that it would be bad 1278 again. Note that it would be bad
1278 to hold up writepages here (rather than 1279 to hold up writepages here (rather than
1279 in caller) with continuous retries */ 1280 in caller) with continuous retries */
1280 cFYI(1, ("wp failed on reopen file")); 1281 cFYI(1, "wp failed on reopen file");
1281 read_lock(&GlobalSMBSeslock); 1282 read_lock(&GlobalSMBSeslock);
1282 /* can not use this handle, no write 1283 /* can not use this handle, no write
1283 pending on this one after all */ 1284 pending on this one after all */
@@ -1353,7 +1354,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1353 else if (bytes_written < 0) 1354 else if (bytes_written < 0)
1354 rc = bytes_written; 1355 rc = bytes_written;
1355 } else { 1356 } else {
1356 cFYI(1, ("No writeable filehandles for inode")); 1357 cFYI(1, "No writeable filehandles for inode");
1357 rc = -EIO; 1358 rc = -EIO;
1358 } 1359 }
1359 1360
@@ -1525,7 +1526,7 @@ retry:
1525 */ 1526 */
1526 open_file = find_writable_file(CIFS_I(mapping->host)); 1527 open_file = find_writable_file(CIFS_I(mapping->host));
1527 if (!open_file) { 1528 if (!open_file) {
1528 cERROR(1, ("No writable handles for inode")); 1529 cERROR(1, "No writable handles for inode");
1529 rc = -EBADF; 1530 rc = -EBADF;
1530 } else { 1531 } else {
1531 long_op = cifs_write_timeout(cifsi, offset); 1532 long_op = cifs_write_timeout(cifsi, offset);
@@ -1538,8 +1539,8 @@ retry:
1538 cifs_update_eof(cifsi, offset, bytes_written); 1539 cifs_update_eof(cifsi, offset, bytes_written);
1539 1540
1540 if (rc || bytes_written < bytes_to_write) { 1541 if (rc || bytes_written < bytes_to_write) {
1541 cERROR(1, ("Write2 ret %d, wrote %d", 1542 cERROR(1, "Write2 ret %d, wrote %d",
1542 rc, bytes_written)); 1543 rc, bytes_written);
1543 /* BB what if continued retry is 1544 /* BB what if continued retry is
1544 requested via mount flags? */ 1545 requested via mount flags? */
1545 if (rc == -ENOSPC) 1546 if (rc == -ENOSPC)
@@ -1600,7 +1601,7 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1600/* BB add check for wbc flags */ 1601/* BB add check for wbc flags */
1601 page_cache_get(page); 1602 page_cache_get(page);
1602 if (!PageUptodate(page)) 1603 if (!PageUptodate(page))
1603 cFYI(1, ("ppw - page not up to date")); 1604 cFYI(1, "ppw - page not up to date");
1604 1605
1605 /* 1606 /*
1606 * Set the "writeback" flag, and clear "dirty" in the radix tree. 1607 * Set the "writeback" flag, and clear "dirty" in the radix tree.
@@ -1629,8 +1630,8 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
1629 int rc; 1630 int rc;
1630 struct inode *inode = mapping->host; 1631 struct inode *inode = mapping->host;
1631 1632
1632 cFYI(1, ("write_end for page %p from pos %lld with %d bytes", 1633 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1633 page, pos, copied)); 1634 page, pos, copied);
1634 1635
1635 if (PageChecked(page)) { 1636 if (PageChecked(page)) {
1636 if (copied == len) 1637 if (copied == len)
@@ -1686,8 +1687,8 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1686 1687
1687 xid = GetXid(); 1688 xid = GetXid();
1688 1689
1689 cFYI(1, ("Sync file - name: %s datasync: 0x%x", 1690 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1690 dentry->d_name.name, datasync)); 1691 dentry->d_name.name, datasync);
1691 1692
1692 rc = filemap_write_and_wait(inode->i_mapping); 1693 rc = filemap_write_and_wait(inode->i_mapping);
1693 if (rc == 0) { 1694 if (rc == 0) {
@@ -1711,7 +1712,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1711 unsigned int rpages = 0; 1712 unsigned int rpages = 0;
1712 int rc = 0; 1713 int rc = 0;
1713 1714
1714 cFYI(1, ("sync page %p",page)); 1715 cFYI(1, "sync page %p", page);
1715 mapping = page->mapping; 1716 mapping = page->mapping;
1716 if (!mapping) 1717 if (!mapping)
1717 return 0; 1718 return 0;
@@ -1722,7 +1723,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1722/* fill in rpages then 1723/* fill in rpages then
1723 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */ 1724 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1724 1725
1725/* cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index)); 1726/* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1726 1727
1727#if 0 1728#if 0
1728 if (rc < 0) 1729 if (rc < 0)
@@ -1756,7 +1757,7 @@ int cifs_flush(struct file *file, fl_owner_t id)
1756 CIFS_I(inode)->write_behind_rc = 0; 1757 CIFS_I(inode)->write_behind_rc = 0;
1757 } 1758 }
1758 1759
1759 cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc)); 1760 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1760 1761
1761 return rc; 1762 return rc;
1762} 1763}
@@ -1788,7 +1789,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1788 open_file = (struct cifsFileInfo *)file->private_data; 1789 open_file = (struct cifsFileInfo *)file->private_data;
1789 1790
1790 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1791 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1791 cFYI(1, ("attempting read on write only file instance")); 1792 cFYI(1, "attempting read on write only file instance");
1792 1793
1793 for (total_read = 0, current_offset = read_data; 1794 for (total_read = 0, current_offset = read_data;
1794 read_size > total_read; 1795 read_size > total_read;
@@ -1869,7 +1870,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1869 open_file = (struct cifsFileInfo *)file->private_data; 1870 open_file = (struct cifsFileInfo *)file->private_data;
1870 1871
1871 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1872 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1872 cFYI(1, ("attempting read on write only file instance")); 1873 cFYI(1, "attempting read on write only file instance");
1873 1874
1874 for (total_read = 0, current_offset = read_data; 1875 for (total_read = 0, current_offset = read_data;
1875 read_size > total_read; 1876 read_size > total_read;
@@ -1920,7 +1921,7 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1920 xid = GetXid(); 1921 xid = GetXid();
1921 rc = cifs_revalidate_file(file); 1922 rc = cifs_revalidate_file(file);
1922 if (rc) { 1923 if (rc) {
1923 cFYI(1, ("Validation prior to mmap failed, error=%d", rc)); 1924 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1924 FreeXid(xid); 1925 FreeXid(xid);
1925 return rc; 1926 return rc;
1926 } 1927 }
@@ -1931,8 +1932,7 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1931 1932
1932 1933
1933static void cifs_copy_cache_pages(struct address_space *mapping, 1934static void cifs_copy_cache_pages(struct address_space *mapping,
1934 struct list_head *pages, int bytes_read, char *data, 1935 struct list_head *pages, int bytes_read, char *data)
1935 struct pagevec *plru_pvec)
1936{ 1936{
1937 struct page *page; 1937 struct page *page;
1938 char *target; 1938 char *target;
@@ -1944,10 +1944,10 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1944 page = list_entry(pages->prev, struct page, lru); 1944 page = list_entry(pages->prev, struct page, lru);
1945 list_del(&page->lru); 1945 list_del(&page->lru);
1946 1946
1947 if (add_to_page_cache(page, mapping, page->index, 1947 if (add_to_page_cache_lru(page, mapping, page->index,
1948 GFP_KERNEL)) { 1948 GFP_KERNEL)) {
1949 page_cache_release(page); 1949 page_cache_release(page);
1950 cFYI(1, ("Add page cache failed")); 1950 cFYI(1, "Add page cache failed");
1951 data += PAGE_CACHE_SIZE; 1951 data += PAGE_CACHE_SIZE;
1952 bytes_read -= PAGE_CACHE_SIZE; 1952 bytes_read -= PAGE_CACHE_SIZE;
1953 continue; 1953 continue;
@@ -1970,8 +1970,6 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1970 flush_dcache_page(page); 1970 flush_dcache_page(page);
1971 SetPageUptodate(page); 1971 SetPageUptodate(page);
1972 unlock_page(page); 1972 unlock_page(page);
1973 if (!pagevec_add(plru_pvec, page))
1974 __pagevec_lru_add_file(plru_pvec);
1975 data += PAGE_CACHE_SIZE; 1973 data += PAGE_CACHE_SIZE;
1976 } 1974 }
1977 return; 1975 return;
@@ -1990,7 +1988,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1990 unsigned int read_size, i; 1988 unsigned int read_size, i;
1991 char *smb_read_data = NULL; 1989 char *smb_read_data = NULL;
1992 struct smb_com_read_rsp *pSMBr; 1990 struct smb_com_read_rsp *pSMBr;
1993 struct pagevec lru_pvec;
1994 struct cifsFileInfo *open_file; 1991 struct cifsFileInfo *open_file;
1995 int buf_type = CIFS_NO_BUFFER; 1992 int buf_type = CIFS_NO_BUFFER;
1996 1993
@@ -2004,8 +2001,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2004 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2001 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2005 pTcon = cifs_sb->tcon; 2002 pTcon = cifs_sb->tcon;
2006 2003
2007 pagevec_init(&lru_pvec, 0); 2004 cFYI(DBG2, "rpages: num pages %d", num_pages);
2008 cFYI(DBG2, ("rpages: num pages %d", num_pages));
2009 for (i = 0; i < num_pages; ) { 2005 for (i = 0; i < num_pages; ) {
2010 unsigned contig_pages; 2006 unsigned contig_pages;
2011 struct page *tmp_page; 2007 struct page *tmp_page;
@@ -2038,8 +2034,8 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2038 /* Read size needs to be in multiples of one page */ 2034 /* Read size needs to be in multiples of one page */
2039 read_size = min_t(const unsigned int, read_size, 2035 read_size = min_t(const unsigned int, read_size,
2040 cifs_sb->rsize & PAGE_CACHE_MASK); 2036 cifs_sb->rsize & PAGE_CACHE_MASK);
2041 cFYI(DBG2, ("rpages: read size 0x%x contiguous pages %d", 2037 cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d",
2042 read_size, contig_pages)); 2038 read_size, contig_pages);
2043 rc = -EAGAIN; 2039 rc = -EAGAIN;
2044 while (rc == -EAGAIN) { 2040 while (rc == -EAGAIN) {
2045 if ((open_file->invalidHandle) && 2041 if ((open_file->invalidHandle) &&
@@ -2066,14 +2062,14 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2066 } 2062 }
2067 } 2063 }
2068 if ((rc < 0) || (smb_read_data == NULL)) { 2064 if ((rc < 0) || (smb_read_data == NULL)) {
2069 cFYI(1, ("Read error in readpages: %d", rc)); 2065 cFYI(1, "Read error in readpages: %d", rc);
2070 break; 2066 break;
2071 } else if (bytes_read > 0) { 2067 } else if (bytes_read > 0) {
2072 task_io_account_read(bytes_read); 2068 task_io_account_read(bytes_read);
2073 pSMBr = (struct smb_com_read_rsp *)smb_read_data; 2069 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2074 cifs_copy_cache_pages(mapping, page_list, bytes_read, 2070 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2075 smb_read_data + 4 /* RFC1001 hdr */ + 2071 smb_read_data + 4 /* RFC1001 hdr */ +
2076 le16_to_cpu(pSMBr->DataOffset), &lru_pvec); 2072 le16_to_cpu(pSMBr->DataOffset));
2077 2073
2078 i += bytes_read >> PAGE_CACHE_SHIFT; 2074 i += bytes_read >> PAGE_CACHE_SHIFT;
2079 cifs_stats_bytes_read(pTcon, bytes_read); 2075 cifs_stats_bytes_read(pTcon, bytes_read);
@@ -2089,9 +2085,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2089 /* break; */ 2085 /* break; */
2090 } 2086 }
2091 } else { 2087 } else {
2092 cFYI(1, ("No bytes read (%d) at offset %lld . " 2088 cFYI(1, "No bytes read (%d) at offset %lld . "
2093 "Cleaning remaining pages from readahead list", 2089 "Cleaning remaining pages from readahead list",
2094 bytes_read, offset)); 2090 bytes_read, offset);
2095 /* BB turn off caching and do new lookup on 2091 /* BB turn off caching and do new lookup on
2096 file size at server? */ 2092 file size at server? */
2097 break; 2093 break;
@@ -2106,8 +2102,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2106 bytes_read = 0; 2102 bytes_read = 0;
2107 } 2103 }
2108 2104
2109 pagevec_lru_add_file(&lru_pvec);
2110
2111/* need to free smb_read_data buf before exit */ 2105/* need to free smb_read_data buf before exit */
2112 if (smb_read_data) { 2106 if (smb_read_data) {
2113 if (buf_type == CIFS_SMALL_BUFFER) 2107 if (buf_type == CIFS_SMALL_BUFFER)
@@ -2136,7 +2130,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
2136 if (rc < 0) 2130 if (rc < 0)
2137 goto io_error; 2131 goto io_error;
2138 else 2132 else
2139 cFYI(1, ("Bytes read %d", rc)); 2133 cFYI(1, "Bytes read %d", rc);
2140 2134
2141 file->f_path.dentry->d_inode->i_atime = 2135 file->f_path.dentry->d_inode->i_atime =
2142 current_fs_time(file->f_path.dentry->d_inode->i_sb); 2136 current_fs_time(file->f_path.dentry->d_inode->i_sb);
@@ -2168,8 +2162,8 @@ static int cifs_readpage(struct file *file, struct page *page)
2168 return rc; 2162 return rc;
2169 } 2163 }
2170 2164
2171 cFYI(1, ("readpage %p at offset %d 0x%x\n", 2165 cFYI(1, "readpage %p at offset %d 0x%x\n",
2172 page, (int)offset, (int)offset)); 2166 page, (int)offset, (int)offset);
2173 2167
2174 rc = cifs_readpage_worker(file, page, &offset); 2168 rc = cifs_readpage_worker(file, page, &offset);
2175 2169
@@ -2239,7 +2233,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
2239 struct page *page; 2233 struct page *page;
2240 int rc = 0; 2234 int rc = 0;
2241 2235
2242 cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); 2236 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2243 2237
2244 page = grab_cache_page_write_begin(mapping, index, flags); 2238 page = grab_cache_page_write_begin(mapping, index, flags);
2245 if (!page) { 2239 if (!page) {
@@ -2311,12 +2305,10 @@ cifs_oplock_break(struct slow_work *work)
2311 int rc, waitrc = 0; 2305 int rc, waitrc = 0;
2312 2306
2313 if (inode && S_ISREG(inode->i_mode)) { 2307 if (inode && S_ISREG(inode->i_mode)) {
2314#ifdef CONFIG_CIFS_EXPERIMENTAL 2308 if (cinode->clientCanCacheRead)
2315 if (cinode->clientCanCacheAll == 0)
2316 break_lease(inode, O_RDONLY); 2309 break_lease(inode, O_RDONLY);
2317 else if (cinode->clientCanCacheRead == 0) 2310 else
2318 break_lease(inode, O_WRONLY); 2311 break_lease(inode, O_WRONLY);
2319#endif
2320 rc = filemap_fdatawrite(inode->i_mapping); 2312 rc = filemap_fdatawrite(inode->i_mapping);
2321 if (cinode->clientCanCacheRead == 0) { 2313 if (cinode->clientCanCacheRead == 0) {
2322 waitrc = filemap_fdatawait(inode->i_mapping); 2314 waitrc = filemap_fdatawait(inode->i_mapping);
@@ -2326,7 +2318,7 @@ cifs_oplock_break(struct slow_work *work)
2326 rc = waitrc; 2318 rc = waitrc;
2327 if (rc) 2319 if (rc)
2328 cinode->write_behind_rc = rc; 2320 cinode->write_behind_rc = rc;
2329 cFYI(1, ("Oplock flush inode %p rc %d", inode, rc)); 2321 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2330 } 2322 }
2331 2323
2332 /* 2324 /*
@@ -2338,7 +2330,7 @@ cifs_oplock_break(struct slow_work *work)
2338 if (!cfile->closePend && !cfile->oplock_break_cancelled) { 2330 if (!cfile->closePend && !cfile->oplock_break_cancelled) {
2339 rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0, 2331 rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0,
2340 LOCKING_ANDX_OPLOCK_RELEASE, false); 2332 LOCKING_ANDX_OPLOCK_RELEASE, false);
2341 cFYI(1, ("Oplock release rc = %d", rc)); 2333 cFYI(1, "Oplock release rc = %d", rc);
2342 } 2334 }
2343} 2335}
2344 2336
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 29b9ea244c81..62b324f26a56 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/inode.c 2 * fs/cifs/inode.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2008 4 * Copyright (C) International Business Machines Corp., 2002,2010
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -86,30 +86,30 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
86{ 86{
87 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 87 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
88 88
89 cFYI(1, ("%s: revalidating inode %llu", __func__, cifs_i->uniqueid)); 89 cFYI(1, "%s: revalidating inode %llu", __func__, cifs_i->uniqueid);
90 90
91 if (inode->i_state & I_NEW) { 91 if (inode->i_state & I_NEW) {
92 cFYI(1, ("%s: inode %llu is new", __func__, cifs_i->uniqueid)); 92 cFYI(1, "%s: inode %llu is new", __func__, cifs_i->uniqueid);
93 return; 93 return;
94 } 94 }
95 95
96 /* don't bother with revalidation if we have an oplock */ 96 /* don't bother with revalidation if we have an oplock */
97 if (cifs_i->clientCanCacheRead) { 97 if (cifs_i->clientCanCacheRead) {
98 cFYI(1, ("%s: inode %llu is oplocked", __func__, 98 cFYI(1, "%s: inode %llu is oplocked", __func__,
99 cifs_i->uniqueid)); 99 cifs_i->uniqueid);
100 return; 100 return;
101 } 101 }
102 102
103 /* revalidate if mtime or size have changed */ 103 /* revalidate if mtime or size have changed */
104 if (timespec_equal(&inode->i_mtime, &fattr->cf_mtime) && 104 if (timespec_equal(&inode->i_mtime, &fattr->cf_mtime) &&
105 cifs_i->server_eof == fattr->cf_eof) { 105 cifs_i->server_eof == fattr->cf_eof) {
106 cFYI(1, ("%s: inode %llu is unchanged", __func__, 106 cFYI(1, "%s: inode %llu is unchanged", __func__,
107 cifs_i->uniqueid)); 107 cifs_i->uniqueid);
108 return; 108 return;
109 } 109 }
110 110
111 cFYI(1, ("%s: invalidating inode %llu mapping", __func__, 111 cFYI(1, "%s: invalidating inode %llu mapping", __func__,
112 cifs_i->uniqueid)); 112 cifs_i->uniqueid);
113 cifs_i->invalid_mapping = true; 113 cifs_i->invalid_mapping = true;
114} 114}
115 115
@@ -137,15 +137,14 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
137 inode->i_mode = fattr->cf_mode; 137 inode->i_mode = fattr->cf_mode;
138 138
139 cifs_i->cifsAttrs = fattr->cf_cifsattrs; 139 cifs_i->cifsAttrs = fattr->cf_cifsattrs;
140 cifs_i->uniqueid = fattr->cf_uniqueid;
141 140
142 if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) 141 if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
143 cifs_i->time = 0; 142 cifs_i->time = 0;
144 else 143 else
145 cifs_i->time = jiffies; 144 cifs_i->time = jiffies;
146 145
147 cFYI(1, ("inode 0x%p old_time=%ld new_time=%ld", inode, 146 cFYI(1, "inode 0x%p old_time=%ld new_time=%ld", inode,
148 oldtime, cifs_i->time)); 147 oldtime, cifs_i->time);
149 148
150 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; 149 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
151 150
@@ -170,6 +169,17 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
170 cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL); 169 cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL);
171} 170}
172 171
172void
173cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr)
174{
175 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
176
177 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
178 return;
179
180 fattr->cf_uniqueid = iunique(sb, ROOT_I);
181}
182
173/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */ 183/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */
174void 184void
175cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, 185cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
@@ -227,7 +237,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
227 /* safest to call it a file if we do not know */ 237 /* safest to call it a file if we do not know */
228 fattr->cf_mode |= S_IFREG; 238 fattr->cf_mode |= S_IFREG;
229 fattr->cf_dtype = DT_REG; 239 fattr->cf_dtype = DT_REG;
230 cFYI(1, ("unknown type %d", le32_to_cpu(info->Type))); 240 cFYI(1, "unknown type %d", le32_to_cpu(info->Type));
231 break; 241 break;
232 } 242 }
233 243
@@ -256,7 +266,7 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
256{ 266{
257 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 267 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
258 268
259 cFYI(1, ("creating fake fattr for DFS referral")); 269 cFYI(1, "creating fake fattr for DFS referral");
260 270
261 memset(fattr, 0, sizeof(*fattr)); 271 memset(fattr, 0, sizeof(*fattr));
262 fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU; 272 fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
@@ -305,7 +315,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
305 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 315 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
306 316
307 tcon = cifs_sb->tcon; 317 tcon = cifs_sb->tcon;
308 cFYI(1, ("Getting info on %s", full_path)); 318 cFYI(1, "Getting info on %s", full_path);
309 319
310 /* could have done a find first instead but this returns more info */ 320 /* could have done a find first instead but this returns more info */
311 rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data, 321 rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
@@ -323,6 +333,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
323 333
324 if (*pinode == NULL) { 334 if (*pinode == NULL) {
325 /* get new inode */ 335 /* get new inode */
336 cifs_fill_uniqueid(sb, &fattr);
326 *pinode = cifs_iget(sb, &fattr); 337 *pinode = cifs_iget(sb, &fattr);
327 if (!*pinode) 338 if (!*pinode)
328 rc = -ENOMEM; 339 rc = -ENOMEM;
@@ -373,7 +384,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
373 &bytes_read, &pbuf, &buf_type); 384 &bytes_read, &pbuf, &buf_type);
374 if ((rc == 0) && (bytes_read >= 8)) { 385 if ((rc == 0) && (bytes_read >= 8)) {
375 if (memcmp("IntxBLK", pbuf, 8) == 0) { 386 if (memcmp("IntxBLK", pbuf, 8) == 0) {
376 cFYI(1, ("Block device")); 387 cFYI(1, "Block device");
377 fattr->cf_mode |= S_IFBLK; 388 fattr->cf_mode |= S_IFBLK;
378 fattr->cf_dtype = DT_BLK; 389 fattr->cf_dtype = DT_BLK;
379 if (bytes_read == 24) { 390 if (bytes_read == 24) {
@@ -385,7 +396,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
385 fattr->cf_rdev = MKDEV(mjr, mnr); 396 fattr->cf_rdev = MKDEV(mjr, mnr);
386 } 397 }
387 } else if (memcmp("IntxCHR", pbuf, 8) == 0) { 398 } else if (memcmp("IntxCHR", pbuf, 8) == 0) {
388 cFYI(1, ("Char device")); 399 cFYI(1, "Char device");
389 fattr->cf_mode |= S_IFCHR; 400 fattr->cf_mode |= S_IFCHR;
390 fattr->cf_dtype = DT_CHR; 401 fattr->cf_dtype = DT_CHR;
391 if (bytes_read == 24) { 402 if (bytes_read == 24) {
@@ -397,7 +408,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
397 fattr->cf_rdev = MKDEV(mjr, mnr); 408 fattr->cf_rdev = MKDEV(mjr, mnr);
398 } 409 }
399 } else if (memcmp("IntxLNK", pbuf, 7) == 0) { 410 } else if (memcmp("IntxLNK", pbuf, 7) == 0) {
400 cFYI(1, ("Symlink")); 411 cFYI(1, "Symlink");
401 fattr->cf_mode |= S_IFLNK; 412 fattr->cf_mode |= S_IFLNK;
402 fattr->cf_dtype = DT_LNK; 413 fattr->cf_dtype = DT_LNK;
403 } else { 414 } else {
@@ -439,10 +450,10 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
439 else if (rc > 3) { 450 else if (rc > 3) {
440 mode = le32_to_cpu(*((__le32 *)ea_value)); 451 mode = le32_to_cpu(*((__le32 *)ea_value));
441 fattr->cf_mode &= ~SFBITS_MASK; 452 fattr->cf_mode &= ~SFBITS_MASK;
442 cFYI(1, ("special bits 0%o org mode 0%o", mode, 453 cFYI(1, "special bits 0%o org mode 0%o", mode,
443 fattr->cf_mode)); 454 fattr->cf_mode);
444 fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode; 455 fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode;
445 cFYI(1, ("special mode bits 0%o", mode)); 456 cFYI(1, "special mode bits 0%o", mode);
446 } 457 }
447 458
448 return 0; 459 return 0;
@@ -548,11 +559,11 @@ int cifs_get_inode_info(struct inode **pinode,
548 struct cifs_fattr fattr; 559 struct cifs_fattr fattr;
549 560
550 pTcon = cifs_sb->tcon; 561 pTcon = cifs_sb->tcon;
551 cFYI(1, ("Getting info on %s", full_path)); 562 cFYI(1, "Getting info on %s", full_path);
552 563
553 if ((pfindData == NULL) && (*pinode != NULL)) { 564 if ((pfindData == NULL) && (*pinode != NULL)) {
554 if (CIFS_I(*pinode)->clientCanCacheRead) { 565 if (CIFS_I(*pinode)->clientCanCacheRead) {
555 cFYI(1, ("No need to revalidate cached inode sizes")); 566 cFYI(1, "No need to revalidate cached inode sizes");
556 return rc; 567 return rc;
557 } 568 }
558 } 569 }
@@ -618,7 +629,7 @@ int cifs_get_inode_info(struct inode **pinode,
618 cifs_sb->mnt_cifs_flags & 629 cifs_sb->mnt_cifs_flags &
619 CIFS_MOUNT_MAP_SPECIAL_CHR); 630 CIFS_MOUNT_MAP_SPECIAL_CHR);
620 if (rc1 || !fattr.cf_uniqueid) { 631 if (rc1 || !fattr.cf_uniqueid) {
621 cFYI(1, ("GetSrvInodeNum rc %d", rc1)); 632 cFYI(1, "GetSrvInodeNum rc %d", rc1);
622 fattr.cf_uniqueid = iunique(sb, ROOT_I); 633 fattr.cf_uniqueid = iunique(sb, ROOT_I);
623 cifs_autodisable_serverino(cifs_sb); 634 cifs_autodisable_serverino(cifs_sb);
624 } 635 }
@@ -634,13 +645,13 @@ int cifs_get_inode_info(struct inode **pinode,
634 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { 645 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
635 tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid); 646 tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid);
636 if (tmprc) 647 if (tmprc)
637 cFYI(1, ("cifs_sfu_type failed: %d", tmprc)); 648 cFYI(1, "cifs_sfu_type failed: %d", tmprc);
638 } 649 }
639 650
640#ifdef CONFIG_CIFS_EXPERIMENTAL 651#ifdef CONFIG_CIFS_EXPERIMENTAL
641 /* fill in 0777 bits from ACL */ 652 /* fill in 0777 bits from ACL */
642 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 653 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
643 cFYI(1, ("Getting mode bits from ACL")); 654 cFYI(1, "Getting mode bits from ACL");
644 cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid); 655 cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid);
645 } 656 }
646#endif 657#endif
@@ -745,7 +756,7 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
745 struct inode *inode; 756 struct inode *inode;
746 757
747retry_iget5_locked: 758retry_iget5_locked:
748 cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); 759 cFYI(1, "looking for uniqueid=%llu", fattr->cf_uniqueid);
749 760
750 /* hash down to 32-bits on 32-bit arch */ 761 /* hash down to 32-bits on 32-bit arch */
751 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); 762 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
@@ -797,7 +808,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
797 return ERR_PTR(-ENOMEM); 808 return ERR_PTR(-ENOMEM);
798 809
799 if (rc && cifs_sb->tcon->ipc) { 810 if (rc && cifs_sb->tcon->ipc) {
800 cFYI(1, ("ipc connection - fake read inode")); 811 cFYI(1, "ipc connection - fake read inode");
801 inode->i_mode |= S_IFDIR; 812 inode->i_mode |= S_IFDIR;
802 inode->i_nlink = 2; 813 inode->i_nlink = 2;
803 inode->i_op = &cifs_ipc_inode_ops; 814 inode->i_op = &cifs_ipc_inode_ops;
@@ -859,7 +870,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
859 * server times. 870 * server times.
860 */ 871 */
861 if (set_time && (attrs->ia_valid & ATTR_CTIME)) { 872 if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
862 cFYI(1, ("CIFS - CTIME changed")); 873 cFYI(1, "CIFS - CTIME changed");
863 info_buf.ChangeTime = 874 info_buf.ChangeTime =
864 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); 875 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
865 } else 876 } else
@@ -894,8 +905,8 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
894 goto out; 905 goto out;
895 } 906 }
896 907
897 cFYI(1, ("calling SetFileInfo since SetPathInfo for " 908 cFYI(1, "calling SetFileInfo since SetPathInfo for "
898 "times not supported by this server")); 909 "times not supported by this server");
899 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, 910 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
900 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, 911 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
901 CREATE_NOT_DIR, &netfid, &oplock, 912 CREATE_NOT_DIR, &netfid, &oplock,
@@ -1053,7 +1064,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
1053 struct iattr *attrs = NULL; 1064 struct iattr *attrs = NULL;
1054 __u32 dosattr = 0, origattr = 0; 1065 __u32 dosattr = 0, origattr = 0;
1055 1066
1056 cFYI(1, ("cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry)); 1067 cFYI(1, "cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry);
1057 1068
1058 xid = GetXid(); 1069 xid = GetXid();
1059 1070
@@ -1072,7 +1083,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
1072 rc = CIFSPOSIXDelFile(xid, tcon, full_path, 1083 rc = CIFSPOSIXDelFile(xid, tcon, full_path,
1073 SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, 1084 SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls,
1074 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 1085 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
1075 cFYI(1, ("posix del rc %d", rc)); 1086 cFYI(1, "posix del rc %d", rc);
1076 if ((rc == 0) || (rc == -ENOENT)) 1087 if ((rc == 0) || (rc == -ENOENT))
1077 goto psx_del_no_retry; 1088 goto psx_del_no_retry;
1078 } 1089 }
@@ -1146,7 +1157,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1146 struct inode *newinode = NULL; 1157 struct inode *newinode = NULL;
1147 struct cifs_fattr fattr; 1158 struct cifs_fattr fattr;
1148 1159
1149 cFYI(1, ("In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode)); 1160 cFYI(1, "In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode);
1150 1161
1151 xid = GetXid(); 1162 xid = GetXid();
1152 1163
@@ -1181,7 +1192,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1181 kfree(pInfo); 1192 kfree(pInfo);
1182 goto mkdir_retry_old; 1193 goto mkdir_retry_old;
1183 } else if (rc) { 1194 } else if (rc) {
1184 cFYI(1, ("posix mkdir returned 0x%x", rc)); 1195 cFYI(1, "posix mkdir returned 0x%x", rc);
1185 d_drop(direntry); 1196 d_drop(direntry);
1186 } else { 1197 } else {
1187 if (pInfo->Type == cpu_to_le32(-1)) { 1198 if (pInfo->Type == cpu_to_le32(-1)) {
@@ -1198,6 +1209,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1198 direntry->d_op = &cifs_dentry_ops; 1209 direntry->d_op = &cifs_dentry_ops;
1199 1210
1200 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); 1211 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1212 cifs_fill_uniqueid(inode->i_sb, &fattr);
1201 newinode = cifs_iget(inode->i_sb, &fattr); 1213 newinode = cifs_iget(inode->i_sb, &fattr);
1202 if (!newinode) { 1214 if (!newinode) {
1203 kfree(pInfo); 1215 kfree(pInfo);
@@ -1207,12 +1219,12 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1207 d_instantiate(direntry, newinode); 1219 d_instantiate(direntry, newinode);
1208 1220
1209#ifdef CONFIG_CIFS_DEBUG2 1221#ifdef CONFIG_CIFS_DEBUG2
1210 cFYI(1, ("instantiated dentry %p %s to inode %p", 1222 cFYI(1, "instantiated dentry %p %s to inode %p",
1211 direntry, direntry->d_name.name, newinode)); 1223 direntry, direntry->d_name.name, newinode);
1212 1224
1213 if (newinode->i_nlink != 2) 1225 if (newinode->i_nlink != 2)
1214 cFYI(1, ("unexpected number of links %d", 1226 cFYI(1, "unexpected number of links %d",
1215 newinode->i_nlink)); 1227 newinode->i_nlink);
1216#endif 1228#endif
1217 } 1229 }
1218 kfree(pInfo); 1230 kfree(pInfo);
@@ -1223,7 +1235,7 @@ mkdir_retry_old:
1223 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls, 1235 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
1224 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 1236 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
1225 if (rc) { 1237 if (rc) {
1226 cFYI(1, ("cifs_mkdir returned 0x%x", rc)); 1238 cFYI(1, "cifs_mkdir returned 0x%x", rc);
1227 d_drop(direntry); 1239 d_drop(direntry);
1228 } else { 1240 } else {
1229mkdir_get_info: 1241mkdir_get_info:
@@ -1326,7 +1338,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1326 char *full_path = NULL; 1338 char *full_path = NULL;
1327 struct cifsInodeInfo *cifsInode; 1339 struct cifsInodeInfo *cifsInode;
1328 1340
1329 cFYI(1, ("cifs_rmdir, inode = 0x%p", inode)); 1341 cFYI(1, "cifs_rmdir, inode = 0x%p", inode);
1330 1342
1331 xid = GetXid(); 1343 xid = GetXid();
1332 1344
@@ -1528,6 +1540,11 @@ cifs_inode_needs_reval(struct inode *inode)
1528 if (time_after_eq(jiffies, cifs_i->time + HZ)) 1540 if (time_after_eq(jiffies, cifs_i->time + HZ))
1529 return true; 1541 return true;
1530 1542
1543 /* hardlinked files w/ noserverino get "special" treatment */
1544 if (!(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
1545 S_ISREG(inode->i_mode) && inode->i_nlink != 1)
1546 return true;
1547
1531 return false; 1548 return false;
1532} 1549}
1533 1550
@@ -1594,9 +1611,9 @@ int cifs_revalidate_dentry(struct dentry *dentry)
1594 goto check_inval; 1611 goto check_inval;
1595 } 1612 }
1596 1613
1597 cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " 1614 cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
1598 "jiffies %ld", full_path, inode, inode->i_count.counter, 1615 "jiffies %ld", full_path, inode, inode->i_count.counter,
1599 dentry, dentry->d_time, jiffies)); 1616 dentry, dentry->d_time, jiffies);
1600 1617
1601 if (CIFS_SB(sb)->tcon->unix_ext) 1618 if (CIFS_SB(sb)->tcon->unix_ext)
1602 rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); 1619 rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
@@ -1690,12 +1707,12 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1690 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, 1707 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
1691 npid, false); 1708 npid, false);
1692 cifsFileInfo_put(open_file); 1709 cifsFileInfo_put(open_file);
1693 cFYI(1, ("SetFSize for attrs rc = %d", rc)); 1710 cFYI(1, "SetFSize for attrs rc = %d", rc);
1694 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1711 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1695 unsigned int bytes_written; 1712 unsigned int bytes_written;
1696 rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size, 1713 rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size,
1697 &bytes_written, NULL, NULL, 1); 1714 &bytes_written, NULL, NULL, 1);
1698 cFYI(1, ("Wrt seteof rc %d", rc)); 1715 cFYI(1, "Wrt seteof rc %d", rc);
1699 } 1716 }
1700 } else 1717 } else
1701 rc = -EINVAL; 1718 rc = -EINVAL;
@@ -1709,7 +1726,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1709 false, cifs_sb->local_nls, 1726 false, cifs_sb->local_nls,
1710 cifs_sb->mnt_cifs_flags & 1727 cifs_sb->mnt_cifs_flags &
1711 CIFS_MOUNT_MAP_SPECIAL_CHR); 1728 CIFS_MOUNT_MAP_SPECIAL_CHR);
1712 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc)); 1729 cFYI(1, "SetEOF by path (setattrs) rc = %d", rc);
1713 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1730 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1714 __u16 netfid; 1731 __u16 netfid;
1715 int oplock = 0; 1732 int oplock = 0;
@@ -1726,7 +1743,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1726 attrs->ia_size, 1743 attrs->ia_size,
1727 &bytes_written, NULL, 1744 &bytes_written, NULL,
1728 NULL, 1); 1745 NULL, 1);
1729 cFYI(1, ("wrt seteof rc %d", rc)); 1746 cFYI(1, "wrt seteof rc %d", rc);
1730 CIFSSMBClose(xid, pTcon, netfid); 1747 CIFSSMBClose(xid, pTcon, netfid);
1731 } 1748 }
1732 } 1749 }
@@ -1754,8 +1771,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1754 struct cifs_unix_set_info_args *args = NULL; 1771 struct cifs_unix_set_info_args *args = NULL;
1755 struct cifsFileInfo *open_file; 1772 struct cifsFileInfo *open_file;
1756 1773
1757 cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x", 1774 cFYI(1, "setattr_unix on file %s attrs->ia_valid=0x%x",
1758 direntry->d_name.name, attrs->ia_valid)); 1775 direntry->d_name.name, attrs->ia_valid);
1759 1776
1760 xid = GetXid(); 1777 xid = GetXid();
1761 1778
@@ -1885,8 +1902,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1885 1902
1886 xid = GetXid(); 1903 xid = GetXid();
1887 1904
1888 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", 1905 cFYI(1, "setattr on file %s attrs->iavalid 0x%x",
1889 direntry->d_name.name, attrs->ia_valid)); 1906 direntry->d_name.name, attrs->ia_valid);
1890 1907
1891 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1908 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
1892 /* check if we have permission to change attrs */ 1909 /* check if we have permission to change attrs */
@@ -1943,7 +1960,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1943 attrs->ia_valid &= ~ATTR_MODE; 1960 attrs->ia_valid &= ~ATTR_MODE;
1944 1961
1945 if (attrs->ia_valid & ATTR_MODE) { 1962 if (attrs->ia_valid & ATTR_MODE) {
1946 cFYI(1, ("Mode changed to 0%o", attrs->ia_mode)); 1963 cFYI(1, "Mode changed to 0%o", attrs->ia_mode);
1947 mode = attrs->ia_mode; 1964 mode = attrs->ia_mode;
1948 } 1965 }
1949 1966
@@ -2029,7 +2046,7 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs)
2029#if 0 2046#if 0
2030void cifs_delete_inode(struct inode *inode) 2047void cifs_delete_inode(struct inode *inode)
2031{ 2048{
2032 cFYI(1, ("In cifs_delete_inode, inode = 0x%p", inode)); 2049 cFYI(1, "In cifs_delete_inode, inode = 0x%p", inode);
2033 /* may have to add back in if and when safe distributed caching of 2050 /* may have to add back in if and when safe distributed caching of
2034 directories added e.g. via FindNotify */ 2051 directories added e.g. via FindNotify */
2035} 2052}
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index f94650683a00..505926f1ee6b 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -47,7 +47,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
47 47
48 xid = GetXid(); 48 xid = GetXid();
49 49
50 cFYI(1, ("ioctl file %p cmd %u arg %lu", filep, command, arg)); 50 cFYI(1, "ioctl file %p cmd %u arg %lu", filep, command, arg);
51 51
52 cifs_sb = CIFS_SB(inode->i_sb); 52 cifs_sb = CIFS_SB(inode->i_sb);
53 53
@@ -64,12 +64,12 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
64 64
65 switch (command) { 65 switch (command) {
66 case CIFS_IOC_CHECKUMOUNT: 66 case CIFS_IOC_CHECKUMOUNT:
67 cFYI(1, ("User unmount attempted")); 67 cFYI(1, "User unmount attempted");
68 if (cifs_sb->mnt_uid == current_uid()) 68 if (cifs_sb->mnt_uid == current_uid())
69 rc = 0; 69 rc = 0;
70 else { 70 else {
71 rc = -EACCES; 71 rc = -EACCES;
72 cFYI(1, ("uids do not match")); 72 cFYI(1, "uids do not match");
73 } 73 }
74 break; 74 break;
75#ifdef CONFIG_CIFS_POSIX 75#ifdef CONFIG_CIFS_POSIX
@@ -97,11 +97,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
97 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, 97 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
98 extAttrBits, &ExtAttrMask);*/ 98 extAttrBits, &ExtAttrMask);*/
99 } 99 }
100 cFYI(1, ("set flags not implemented yet")); 100 cFYI(1, "set flags not implemented yet");
101 break; 101 break;
102#endif /* CONFIG_CIFS_POSIX */ 102#endif /* CONFIG_CIFS_POSIX */
103 default: 103 default:
104 cFYI(1, ("unsupported ioctl")); 104 cFYI(1, "unsupported ioctl");
105 break; 105 break;
106 } 106 }
107 107
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index c1a9d4236a8c..473ca8033656 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -139,7 +139,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
139 if (!full_path) 139 if (!full_path)
140 goto out; 140 goto out;
141 141
142 cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode)); 142 cFYI(1, "Full path: %s inode = 0x%p", full_path, inode);
143 143
144 rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, 144 rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path,
145 cifs_sb->local_nls); 145 cifs_sb->local_nls);
@@ -178,8 +178,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
178 return rc; 178 return rc;
179 } 179 }
180 180
181 cFYI(1, ("Full path: %s", full_path)); 181 cFYI(1, "Full path: %s", full_path);
182 cFYI(1, ("symname is %s", symname)); 182 cFYI(1, "symname is %s", symname);
183 183
184 /* BB what if DFS and this volume is on different share? BB */ 184 /* BB what if DFS and this volume is on different share? BB */
185 if (pTcon->unix_ext) 185 if (pTcon->unix_ext)
@@ -198,8 +198,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
198 inode->i_sb, xid, NULL); 198 inode->i_sb, xid, NULL);
199 199
200 if (rc != 0) { 200 if (rc != 0) {
201 cFYI(1, ("Create symlink ok, getinodeinfo fail rc = %d", 201 cFYI(1, "Create symlink ok, getinodeinfo fail rc = %d",
202 rc)); 202 rc);
203 } else { 203 } else {
204 if (pTcon->nocase) 204 if (pTcon->nocase)
205 direntry->d_op = &cifs_ci_dentry_ops; 205 direntry->d_op = &cifs_ci_dentry_ops;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index d1474996a812..1394aa37f26c 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -51,7 +51,7 @@ _GetXid(void)
51 if (GlobalTotalActiveXid > GlobalMaxActiveXid) 51 if (GlobalTotalActiveXid > GlobalMaxActiveXid)
52 GlobalMaxActiveXid = GlobalTotalActiveXid; 52 GlobalMaxActiveXid = GlobalTotalActiveXid;
53 if (GlobalTotalActiveXid > 65000) 53 if (GlobalTotalActiveXid > 65000)
54 cFYI(1, ("warning: more than 65000 requests active")); 54 cFYI(1, "warning: more than 65000 requests active");
55 xid = GlobalCurrentXid++; 55 xid = GlobalCurrentXid++;
56 spin_unlock(&GlobalMid_Lock); 56 spin_unlock(&GlobalMid_Lock);
57 return xid; 57 return xid;
@@ -88,7 +88,7 @@ void
88sesInfoFree(struct cifsSesInfo *buf_to_free) 88sesInfoFree(struct cifsSesInfo *buf_to_free)
89{ 89{
90 if (buf_to_free == NULL) { 90 if (buf_to_free == NULL) {
91 cFYI(1, ("Null buffer passed to sesInfoFree")); 91 cFYI(1, "Null buffer passed to sesInfoFree");
92 return; 92 return;
93 } 93 }
94 94
@@ -126,7 +126,7 @@ void
126tconInfoFree(struct cifsTconInfo *buf_to_free) 126tconInfoFree(struct cifsTconInfo *buf_to_free)
127{ 127{
128 if (buf_to_free == NULL) { 128 if (buf_to_free == NULL) {
129 cFYI(1, ("Null buffer passed to tconInfoFree")); 129 cFYI(1, "Null buffer passed to tconInfoFree");
130 return; 130 return;
131 } 131 }
132 atomic_dec(&tconInfoAllocCount); 132 atomic_dec(&tconInfoAllocCount);
@@ -166,7 +166,7 @@ void
166cifs_buf_release(void *buf_to_free) 166cifs_buf_release(void *buf_to_free)
167{ 167{
168 if (buf_to_free == NULL) { 168 if (buf_to_free == NULL) {
169 /* cFYI(1, ("Null buffer passed to cifs_buf_release"));*/ 169 /* cFYI(1, "Null buffer passed to cifs_buf_release");*/
170 return; 170 return;
171 } 171 }
172 mempool_free(buf_to_free, cifs_req_poolp); 172 mempool_free(buf_to_free, cifs_req_poolp);
@@ -202,7 +202,7 @@ cifs_small_buf_release(void *buf_to_free)
202{ 202{
203 203
204 if (buf_to_free == NULL) { 204 if (buf_to_free == NULL) {
205 cFYI(1, ("Null buffer passed to cifs_small_buf_release")); 205 cFYI(1, "Null buffer passed to cifs_small_buf_release");
206 return; 206 return;
207 } 207 }
208 mempool_free(buf_to_free, cifs_sm_req_poolp); 208 mempool_free(buf_to_free, cifs_sm_req_poolp);
@@ -345,19 +345,19 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
345 /* with userid/password pairs found on the smb session */ 345 /* with userid/password pairs found on the smb session */
346 /* for other target tcp/ip addresses BB */ 346 /* for other target tcp/ip addresses BB */
347 if (current_fsuid() != treeCon->ses->linux_uid) { 347 if (current_fsuid() != treeCon->ses->linux_uid) {
348 cFYI(1, ("Multiuser mode and UID " 348 cFYI(1, "Multiuser mode and UID "
349 "did not match tcon uid")); 349 "did not match tcon uid");
350 read_lock(&cifs_tcp_ses_lock); 350 read_lock(&cifs_tcp_ses_lock);
351 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) { 351 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) {
352 ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list); 352 ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list);
353 if (ses->linux_uid == current_fsuid()) { 353 if (ses->linux_uid == current_fsuid()) {
354 if (ses->server == treeCon->ses->server) { 354 if (ses->server == treeCon->ses->server) {
355 cFYI(1, ("found matching uid substitute right smb_uid")); 355 cFYI(1, "found matching uid substitute right smb_uid");
356 buffer->Uid = ses->Suid; 356 buffer->Uid = ses->Suid;
357 break; 357 break;
358 } else { 358 } else {
359 /* BB eventually call cifs_setup_session here */ 359 /* BB eventually call cifs_setup_session here */
360 cFYI(1, ("local UID found but no smb sess with this server exists")); 360 cFYI(1, "local UID found but no smb sess with this server exists");
361 } 361 }
362 } 362 }
363 } 363 }
@@ -394,17 +394,16 @@ checkSMBhdr(struct smb_hdr *smb, __u16 mid)
394 if (smb->Command == SMB_COM_LOCKING_ANDX) 394 if (smb->Command == SMB_COM_LOCKING_ANDX)
395 return 0; 395 return 0;
396 else 396 else
397 cERROR(1, ("Received Request not response")); 397 cERROR(1, "Received Request not response");
398 } 398 }
399 } else { /* bad signature or mid */ 399 } else { /* bad signature or mid */
400 if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) 400 if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff))
401 cERROR(1, 401 cERROR(1, "Bad protocol string signature header %x",
402 ("Bad protocol string signature header %x", 402 *(unsigned int *) smb->Protocol);
403 *(unsigned int *) smb->Protocol));
404 if (mid != smb->Mid) 403 if (mid != smb->Mid)
405 cERROR(1, ("Mids do not match")); 404 cERROR(1, "Mids do not match");
406 } 405 }
407 cERROR(1, ("bad smb detected. The Mid=%d", smb->Mid)); 406 cERROR(1, "bad smb detected. The Mid=%d", smb->Mid);
408 return 1; 407 return 1;
409} 408}
410 409
@@ -413,7 +412,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
413{ 412{
414 __u32 len = smb->smb_buf_length; 413 __u32 len = smb->smb_buf_length;
415 __u32 clc_len; /* calculated length */ 414 __u32 clc_len; /* calculated length */
416 cFYI(0, ("checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len)); 415 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
417 416
418 if (length < 2 + sizeof(struct smb_hdr)) { 417 if (length < 2 + sizeof(struct smb_hdr)) {
419 if ((length >= sizeof(struct smb_hdr) - 1) 418 if ((length >= sizeof(struct smb_hdr) - 1)
@@ -437,15 +436,15 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
437 tmp[sizeof(struct smb_hdr)+1] = 0; 436 tmp[sizeof(struct smb_hdr)+1] = 0;
438 return 0; 437 return 0;
439 } 438 }
440 cERROR(1, ("rcvd invalid byte count (bcc)")); 439 cERROR(1, "rcvd invalid byte count (bcc)");
441 } else { 440 } else {
442 cERROR(1, ("Length less than smb header size")); 441 cERROR(1, "Length less than smb header size");
443 } 442 }
444 return 1; 443 return 1;
445 } 444 }
446 if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 445 if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
447 cERROR(1, ("smb length greater than MaxBufSize, mid=%d", 446 cERROR(1, "smb length greater than MaxBufSize, mid=%d",
448 smb->Mid)); 447 smb->Mid);
449 return 1; 448 return 1;
450 } 449 }
451 450
@@ -454,8 +453,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
454 clc_len = smbCalcSize_LE(smb); 453 clc_len = smbCalcSize_LE(smb);
455 454
456 if (4 + len != length) { 455 if (4 + len != length) {
457 cERROR(1, ("Length read does not match RFC1001 length %d", 456 cERROR(1, "Length read does not match RFC1001 length %d",
458 len)); 457 len);
459 return 1; 458 return 1;
460 } 459 }
461 460
@@ -466,8 +465,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
466 if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) 465 if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF))
467 return 0; /* bcc wrapped */ 466 return 0; /* bcc wrapped */
468 } 467 }
469 cFYI(1, ("Calculated size %d vs length %d mismatch for mid %d", 468 cFYI(1, "Calculated size %d vs length %d mismatch for mid %d",
470 clc_len, 4 + len, smb->Mid)); 469 clc_len, 4 + len, smb->Mid);
471 /* Windows XP can return a few bytes too much, presumably 470 /* Windows XP can return a few bytes too much, presumably
472 an illegal pad, at the end of byte range lock responses 471 an illegal pad, at the end of byte range lock responses
473 so we allow for that three byte pad, as long as actual 472 so we allow for that three byte pad, as long as actual
@@ -482,8 +481,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
482 if ((4+len > clc_len) && (len <= clc_len + 512)) 481 if ((4+len > clc_len) && (len <= clc_len + 512))
483 return 0; 482 return 0;
484 else { 483 else {
485 cERROR(1, ("RFC1001 size %d bigger than SMB for Mid=%d", 484 cERROR(1, "RFC1001 size %d bigger than SMB for Mid=%d",
486 len, smb->Mid)); 485 len, smb->Mid);
487 return 1; 486 return 1;
488 } 487 }
489 } 488 }
@@ -501,7 +500,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
501 struct cifsFileInfo *netfile; 500 struct cifsFileInfo *netfile;
502 int rc; 501 int rc;
503 502
504 cFYI(1, ("Checking for oplock break or dnotify response")); 503 cFYI(1, "Checking for oplock break or dnotify response");
505 if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && 504 if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) &&
506 (pSMB->hdr.Flags & SMBFLG_RESPONSE)) { 505 (pSMB->hdr.Flags & SMBFLG_RESPONSE)) {
507 struct smb_com_transaction_change_notify_rsp *pSMBr = 506 struct smb_com_transaction_change_notify_rsp *pSMBr =
@@ -513,15 +512,15 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
513 512
514 pnotify = (struct file_notify_information *) 513 pnotify = (struct file_notify_information *)
515 ((char *)&pSMBr->hdr.Protocol + data_offset); 514 ((char *)&pSMBr->hdr.Protocol + data_offset);
516 cFYI(1, ("dnotify on %s Action: 0x%x", 515 cFYI(1, "dnotify on %s Action: 0x%x",
517 pnotify->FileName, pnotify->Action)); 516 pnotify->FileName, pnotify->Action);
518 /* cifs_dump_mem("Rcvd notify Data: ",buf, 517 /* cifs_dump_mem("Rcvd notify Data: ",buf,
519 sizeof(struct smb_hdr)+60); */ 518 sizeof(struct smb_hdr)+60); */
520 return true; 519 return true;
521 } 520 }
522 if (pSMBr->hdr.Status.CifsError) { 521 if (pSMBr->hdr.Status.CifsError) {
523 cFYI(1, ("notify err 0x%d", 522 cFYI(1, "notify err 0x%d",
524 pSMBr->hdr.Status.CifsError)); 523 pSMBr->hdr.Status.CifsError);
525 return true; 524 return true;
526 } 525 }
527 return false; 526 return false;
@@ -535,7 +534,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
535 large dirty files cached on the client */ 534 large dirty files cached on the client */
536 if ((NT_STATUS_INVALID_HANDLE) == 535 if ((NT_STATUS_INVALID_HANDLE) ==
537 le32_to_cpu(pSMB->hdr.Status.CifsError)) { 536 le32_to_cpu(pSMB->hdr.Status.CifsError)) {
538 cFYI(1, ("invalid handle on oplock break")); 537 cFYI(1, "invalid handle on oplock break");
539 return true; 538 return true;
540 } else if (ERRbadfid == 539 } else if (ERRbadfid ==
541 le16_to_cpu(pSMB->hdr.Status.DosError.Error)) { 540 le16_to_cpu(pSMB->hdr.Status.DosError.Error)) {
@@ -547,8 +546,8 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
547 if (pSMB->hdr.WordCount != 8) 546 if (pSMB->hdr.WordCount != 8)
548 return false; 547 return false;
549 548
550 cFYI(1, ("oplock type 0x%d level 0x%d", 549 cFYI(1, "oplock type 0x%d level 0x%d",
551 pSMB->LockType, pSMB->OplockLevel)); 550 pSMB->LockType, pSMB->OplockLevel);
552 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) 551 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
553 return false; 552 return false;
554 553
@@ -579,15 +578,15 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
579 return true; 578 return true;
580 } 579 }
581 580
582 cFYI(1, ("file id match, oplock break")); 581 cFYI(1, "file id match, oplock break");
583 pCifsInode = CIFS_I(netfile->pInode); 582 pCifsInode = CIFS_I(netfile->pInode);
584 pCifsInode->clientCanCacheAll = false; 583 pCifsInode->clientCanCacheAll = false;
585 if (pSMB->OplockLevel == 0) 584 if (pSMB->OplockLevel == 0)
586 pCifsInode->clientCanCacheRead = false; 585 pCifsInode->clientCanCacheRead = false;
587 rc = slow_work_enqueue(&netfile->oplock_break); 586 rc = slow_work_enqueue(&netfile->oplock_break);
588 if (rc) { 587 if (rc) {
589 cERROR(1, ("failed to enqueue oplock " 588 cERROR(1, "failed to enqueue oplock "
590 "break: %d\n", rc)); 589 "break: %d\n", rc);
591 } else { 590 } else {
592 netfile->oplock_break_cancelled = false; 591 netfile->oplock_break_cancelled = false;
593 } 592 }
@@ -597,12 +596,12 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
597 } 596 }
598 read_unlock(&GlobalSMBSeslock); 597 read_unlock(&GlobalSMBSeslock);
599 read_unlock(&cifs_tcp_ses_lock); 598 read_unlock(&cifs_tcp_ses_lock);
600 cFYI(1, ("No matching file for oplock break")); 599 cFYI(1, "No matching file for oplock break");
601 return true; 600 return true;
602 } 601 }
603 } 602 }
604 read_unlock(&cifs_tcp_ses_lock); 603 read_unlock(&cifs_tcp_ses_lock);
605 cFYI(1, ("Can not process oplock break for non-existent connection")); 604 cFYI(1, "Can not process oplock break for non-existent connection");
606 return true; 605 return true;
607} 606}
608 607
@@ -721,11 +720,11 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
721{ 720{
722 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 721 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
723 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; 722 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
724 cERROR(1, ("Autodisabling the use of server inode numbers on " 723 cERROR(1, "Autodisabling the use of server inode numbers on "
725 "%s. This server doesn't seem to support them " 724 "%s. This server doesn't seem to support them "
726 "properly. Hardlinks will not be recognized on this " 725 "properly. Hardlinks will not be recognized on this "
727 "mount. Consider mounting with the \"noserverino\" " 726 "mount. Consider mounting with the \"noserverino\" "
728 "option to silence this message.", 727 "option to silence this message.",
729 cifs_sb->tcon->treeName)); 728 cifs_sb->tcon->treeName);
730 } 729 }
731} 730}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index bd6d6895730d..d35d52889cb5 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -149,7 +149,7 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
149 else if (address_family == AF_INET6) 149 else if (address_family == AF_INET6)
150 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); 150 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL);
151 151
152 cFYI(DBG2, ("address conversion returned %d for %s", ret, cp)); 152 cFYI(DBG2, "address conversion returned %d for %s", ret, cp);
153 if (ret > 0) 153 if (ret > 0)
154 ret = 1; 154 ret = 1;
155 return ret; 155 return ret;
@@ -870,8 +870,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
870 } 870 }
871 /* else ERRHRD class errors or junk - return EIO */ 871 /* else ERRHRD class errors or junk - return EIO */
872 872
873 cFYI(1, ("Mapping smb error code %d to POSIX err %d", 873 cFYI(1, "Mapping smb error code %d to POSIX err %d",
874 smberrcode, rc)); 874 smberrcode, rc);
875 875
876 /* generic corrective action e.g. reconnect SMB session on 876 /* generic corrective action e.g. reconnect SMB session on
877 * ERRbaduid could be added */ 877 * ERRbaduid could be added */
@@ -940,20 +940,20 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
940 SMB_TIME *st = (SMB_TIME *)&time; 940 SMB_TIME *st = (SMB_TIME *)&time;
941 SMB_DATE *sd = (SMB_DATE *)&date; 941 SMB_DATE *sd = (SMB_DATE *)&date;
942 942
943 cFYI(1, ("date %d time %d", date, time)); 943 cFYI(1, "date %d time %d", date, time);
944 944
945 sec = 2 * st->TwoSeconds; 945 sec = 2 * st->TwoSeconds;
946 min = st->Minutes; 946 min = st->Minutes;
947 if ((sec > 59) || (min > 59)) 947 if ((sec > 59) || (min > 59))
948 cERROR(1, ("illegal time min %d sec %d", min, sec)); 948 cERROR(1, "illegal time min %d sec %d", min, sec);
949 sec += (min * 60); 949 sec += (min * 60);
950 sec += 60 * 60 * st->Hours; 950 sec += 60 * 60 * st->Hours;
951 if (st->Hours > 24) 951 if (st->Hours > 24)
952 cERROR(1, ("illegal hours %d", st->Hours)); 952 cERROR(1, "illegal hours %d", st->Hours);
953 days = sd->Day; 953 days = sd->Day;
954 month = sd->Month; 954 month = sd->Month;
955 if ((days > 31) || (month > 12)) { 955 if ((days > 31) || (month > 12)) {
956 cERROR(1, ("illegal date, month %d day: %d", month, days)); 956 cERROR(1, "illegal date, month %d day: %d", month, days);
957 if (month > 12) 957 if (month > 12)
958 month = 12; 958 month = 12;
959 } 959 }
@@ -979,7 +979,7 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
979 979
980 ts.tv_sec = sec + offset; 980 ts.tv_sec = sec + offset;
981 981
982 /* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */ 982 /* cFYI(1, "sec after cnvrt dos to unix time %d",sec); */
983 983
984 ts.tv_nsec = 0; 984 ts.tv_nsec = 0;
985 return ts; 985 return ts;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 18e0bc1fb593..daf1753af674 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -47,15 +47,15 @@ static void dump_cifs_file_struct(struct file *file, char *label)
47 if (file) { 47 if (file) {
48 cf = file->private_data; 48 cf = file->private_data;
49 if (cf == NULL) { 49 if (cf == NULL) {
50 cFYI(1, ("empty cifs private file data")); 50 cFYI(1, "empty cifs private file data");
51 return; 51 return;
52 } 52 }
53 if (cf->invalidHandle) 53 if (cf->invalidHandle)
54 cFYI(1, ("invalid handle")); 54 cFYI(1, "invalid handle");
55 if (cf->srch_inf.endOfSearch) 55 if (cf->srch_inf.endOfSearch)
56 cFYI(1, ("end of search")); 56 cFYI(1, "end of search");
57 if (cf->srch_inf.emptyDir) 57 if (cf->srch_inf.emptyDir)
58 cFYI(1, ("empty dir")); 58 cFYI(1, "empty dir");
59 } 59 }
60} 60}
61#else 61#else
@@ -76,7 +76,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
76 struct inode *inode; 76 struct inode *inode;
77 struct super_block *sb = parent->d_inode->i_sb; 77 struct super_block *sb = parent->d_inode->i_sb;
78 78
79 cFYI(1, ("For %s", name->name)); 79 cFYI(1, "For %s", name->name);
80 80
81 if (parent->d_op && parent->d_op->d_hash) 81 if (parent->d_op && parent->d_op->d_hash)
82 parent->d_op->d_hash(parent, name); 82 parent->d_op->d_hash(parent, name);
@@ -214,7 +214,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
214 fid, 214 fid,
215 cifs_sb->local_nls); 215 cifs_sb->local_nls);
216 if (CIFSSMBClose(xid, ptcon, fid)) { 216 if (CIFSSMBClose(xid, ptcon, fid)) {
217 cFYI(1, ("Error closing temporary reparsepoint open)")); 217 cFYI(1, "Error closing temporary reparsepoint open");
218 } 218 }
219 } 219 }
220} 220}
@@ -252,7 +252,7 @@ static int initiate_cifs_search(const int xid, struct file *file)
252 if (full_path == NULL) 252 if (full_path == NULL)
253 return -ENOMEM; 253 return -ENOMEM;
254 254
255 cFYI(1, ("Full path: %s start at: %lld", full_path, file->f_pos)); 255 cFYI(1, "Full path: %s start at: %lld", full_path, file->f_pos);
256 256
257ffirst_retry: 257ffirst_retry:
258 /* test for Unix extensions */ 258 /* test for Unix extensions */
@@ -297,7 +297,7 @@ static int cifs_unicode_bytelen(char *str)
297 if (ustr[len] == 0) 297 if (ustr[len] == 0)
298 return len << 1; 298 return len << 1;
299 } 299 }
300 cFYI(1, ("Unicode string longer than PATH_MAX found")); 300 cFYI(1, "Unicode string longer than PATH_MAX found");
301 return len << 1; 301 return len << 1;
302} 302}
303 303
@@ -314,19 +314,18 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
314 pfData->FileNameLength; 314 pfData->FileNameLength;
315 } else 315 } else
316 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset); 316 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset);
317 cFYI(1, ("new entry %p old entry %p", new_entry, old_entry)); 317 cFYI(1, "new entry %p old entry %p", new_entry, old_entry);
318 /* validate that new_entry is not past end of SMB */ 318 /* validate that new_entry is not past end of SMB */
319 if (new_entry >= end_of_smb) { 319 if (new_entry >= end_of_smb) {
320 cERROR(1, 320 cERROR(1, "search entry %p began after end of SMB %p old entry %p",
321 ("search entry %p began after end of SMB %p old entry %p", 321 new_entry, end_of_smb, old_entry);
322 new_entry, end_of_smb, old_entry));
323 return NULL; 322 return NULL;
324 } else if (((level == SMB_FIND_FILE_INFO_STANDARD) && 323 } else if (((level == SMB_FIND_FILE_INFO_STANDARD) &&
325 (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) 324 (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb))
326 || ((level != SMB_FIND_FILE_INFO_STANDARD) && 325 || ((level != SMB_FIND_FILE_INFO_STANDARD) &&
327 (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) { 326 (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) {
328 cERROR(1, ("search entry %p extends after end of SMB %p", 327 cERROR(1, "search entry %p extends after end of SMB %p",
329 new_entry, end_of_smb)); 328 new_entry, end_of_smb);
330 return NULL; 329 return NULL;
331 } else 330 } else
332 return new_entry; 331 return new_entry;
@@ -380,8 +379,8 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
380 filename = &pFindData->FileName[0]; 379 filename = &pFindData->FileName[0];
381 len = pFindData->FileNameLength; 380 len = pFindData->FileNameLength;
382 } else { 381 } else {
383 cFYI(1, ("Unknown findfirst level %d", 382 cFYI(1, "Unknown findfirst level %d",
384 cfile->srch_inf.info_level)); 383 cfile->srch_inf.info_level);
385 } 384 }
386 385
387 if (filename) { 386 if (filename) {
@@ -481,7 +480,7 @@ static int cifs_save_resume_key(const char *current_entry,
481 len = (unsigned int)pFindData->FileNameLength; 480 len = (unsigned int)pFindData->FileNameLength;
482 cifsFile->srch_inf.resume_key = pFindData->ResumeKey; 481 cifsFile->srch_inf.resume_key = pFindData->ResumeKey;
483 } else { 482 } else {
484 cFYI(1, ("Unknown findfirst level %d", level)); 483 cFYI(1, "Unknown findfirst level %d", level);
485 return -EINVAL; 484 return -EINVAL;
486 } 485 }
487 cifsFile->srch_inf.resume_name_len = len; 486 cifsFile->srch_inf.resume_name_len = len;
@@ -525,7 +524,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
525 is_dir_changed(file)) || 524 is_dir_changed(file)) ||
526 (index_to_find < first_entry_in_buffer)) { 525 (index_to_find < first_entry_in_buffer)) {
527 /* close and restart search */ 526 /* close and restart search */
528 cFYI(1, ("search backing up - close and restart search")); 527 cFYI(1, "search backing up - close and restart search");
529 write_lock(&GlobalSMBSeslock); 528 write_lock(&GlobalSMBSeslock);
530 if (!cifsFile->srch_inf.endOfSearch && 529 if (!cifsFile->srch_inf.endOfSearch &&
531 !cifsFile->invalidHandle) { 530 !cifsFile->invalidHandle) {
@@ -535,7 +534,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
535 } else 534 } else
536 write_unlock(&GlobalSMBSeslock); 535 write_unlock(&GlobalSMBSeslock);
537 if (cifsFile->srch_inf.ntwrk_buf_start) { 536 if (cifsFile->srch_inf.ntwrk_buf_start) {
538 cFYI(1, ("freeing SMB ff cache buf on search rewind")); 537 cFYI(1, "freeing SMB ff cache buf on search rewind");
539 if (cifsFile->srch_inf.smallBuf) 538 if (cifsFile->srch_inf.smallBuf)
540 cifs_small_buf_release(cifsFile->srch_inf. 539 cifs_small_buf_release(cifsFile->srch_inf.
541 ntwrk_buf_start); 540 ntwrk_buf_start);
@@ -546,8 +545,8 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
546 } 545 }
547 rc = initiate_cifs_search(xid, file); 546 rc = initiate_cifs_search(xid, file);
548 if (rc) { 547 if (rc) {
549 cFYI(1, ("error %d reinitiating a search on rewind", 548 cFYI(1, "error %d reinitiating a search on rewind",
550 rc)); 549 rc);
551 return rc; 550 return rc;
552 } 551 }
553 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); 552 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
@@ -555,7 +554,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
555 554
556 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && 555 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
557 (rc == 0) && !cifsFile->srch_inf.endOfSearch) { 556 (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
558 cFYI(1, ("calling findnext2")); 557 cFYI(1, "calling findnext2");
559 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, 558 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid,
560 &cifsFile->srch_inf); 559 &cifsFile->srch_inf);
561 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); 560 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
@@ -575,7 +574,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
575 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry 574 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
576 - cifsFile->srch_inf.entries_in_buffer; 575 - cifsFile->srch_inf.entries_in_buffer;
577 pos_in_buf = index_to_find - first_entry_in_buffer; 576 pos_in_buf = index_to_find - first_entry_in_buffer;
578 cFYI(1, ("found entry - pos_in_buf %d", pos_in_buf)); 577 cFYI(1, "found entry - pos_in_buf %d", pos_in_buf);
579 578
580 for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) { 579 for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) {
581 /* go entry by entry figuring out which is first */ 580 /* go entry by entry figuring out which is first */
@@ -584,19 +583,19 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
584 } 583 }
585 if ((current_entry == NULL) && (i < pos_in_buf)) { 584 if ((current_entry == NULL) && (i < pos_in_buf)) {
586 /* BB fixme - check if we should flag this error */ 585 /* BB fixme - check if we should flag this error */
587 cERROR(1, ("reached end of buf searching for pos in buf" 586 cERROR(1, "reached end of buf searching for pos in buf"
588 " %d index to find %lld rc %d", 587 " %d index to find %lld rc %d",
589 pos_in_buf, index_to_find, rc)); 588 pos_in_buf, index_to_find, rc);
590 } 589 }
591 rc = 0; 590 rc = 0;
592 *ppCurrentEntry = current_entry; 591 *ppCurrentEntry = current_entry;
593 } else { 592 } else {
594 cFYI(1, ("index not in buffer - could not findnext into it")); 593 cFYI(1, "index not in buffer - could not findnext into it");
595 return 0; 594 return 0;
596 } 595 }
597 596
598 if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) { 597 if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) {
599 cFYI(1, ("can not return entries pos_in_buf beyond last")); 598 cFYI(1, "can not return entries pos_in_buf beyond last");
600 *num_to_ret = 0; 599 *num_to_ret = 0;
601 } else 600 } else
602 *num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf; 601 *num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf;
@@ -656,12 +655,12 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
656 /* one byte length, no name conversion */ 655 /* one byte length, no name conversion */
657 len = (unsigned int)pFindData->FileNameLength; 656 len = (unsigned int)pFindData->FileNameLength;
658 } else { 657 } else {
659 cFYI(1, ("Unknown findfirst level %d", level)); 658 cFYI(1, "Unknown findfirst level %d", level);
660 return -EINVAL; 659 return -EINVAL;
661 } 660 }
662 661
663 if (len > max_len) { 662 if (len > max_len) {
664 cERROR(1, ("bad search response length %d past smb end", len)); 663 cERROR(1, "bad search response length %d past smb end", len);
665 return -EINVAL; 664 return -EINVAL;
666 } 665 }
667 666
@@ -754,7 +753,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
754 * case already. Why should we be clobbering other errors from it? 753 * case already. Why should we be clobbering other errors from it?
755 */ 754 */
756 if (rc) { 755 if (rc) {
757 cFYI(1, ("filldir rc = %d", rc)); 756 cFYI(1, "filldir rc = %d", rc);
758 rc = -EOVERFLOW; 757 rc = -EOVERFLOW;
759 } 758 }
760 dput(tmp_dentry); 759 dput(tmp_dentry);
@@ -786,7 +785,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
786 case 0: 785 case 0:
787 if (filldir(direntry, ".", 1, file->f_pos, 786 if (filldir(direntry, ".", 1, file->f_pos,
788 file->f_path.dentry->d_inode->i_ino, DT_DIR) < 0) { 787 file->f_path.dentry->d_inode->i_ino, DT_DIR) < 0) {
789 cERROR(1, ("Filldir for current dir failed")); 788 cERROR(1, "Filldir for current dir failed");
790 rc = -ENOMEM; 789 rc = -ENOMEM;
791 break; 790 break;
792 } 791 }
@@ -794,7 +793,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
794 case 1: 793 case 1:
795 if (filldir(direntry, "..", 2, file->f_pos, 794 if (filldir(direntry, "..", 2, file->f_pos,
796 file->f_path.dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 795 file->f_path.dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
797 cERROR(1, ("Filldir for parent dir failed")); 796 cERROR(1, "Filldir for parent dir failed");
798 rc = -ENOMEM; 797 rc = -ENOMEM;
799 break; 798 break;
800 } 799 }
@@ -807,7 +806,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
807 806
808 if (file->private_data == NULL) { 807 if (file->private_data == NULL) {
809 rc = initiate_cifs_search(xid, file); 808 rc = initiate_cifs_search(xid, file);
810 cFYI(1, ("initiate cifs search rc %d", rc)); 809 cFYI(1, "initiate cifs search rc %d", rc);
811 if (rc) { 810 if (rc) {
812 FreeXid(xid); 811 FreeXid(xid);
813 return rc; 812 return rc;
@@ -821,7 +820,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
821 cifsFile = file->private_data; 820 cifsFile = file->private_data;
822 if (cifsFile->srch_inf.endOfSearch) { 821 if (cifsFile->srch_inf.endOfSearch) {
823 if (cifsFile->srch_inf.emptyDir) { 822 if (cifsFile->srch_inf.emptyDir) {
824 cFYI(1, ("End of search, empty dir")); 823 cFYI(1, "End of search, empty dir");
825 rc = 0; 824 rc = 0;
826 break; 825 break;
827 } 826 }
@@ -833,16 +832,16 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
833 rc = find_cifs_entry(xid, pTcon, file, 832 rc = find_cifs_entry(xid, pTcon, file,
834 &current_entry, &num_to_fill); 833 &current_entry, &num_to_fill);
835 if (rc) { 834 if (rc) {
836 cFYI(1, ("fce error %d", rc)); 835 cFYI(1, "fce error %d", rc);
837 goto rddir2_exit; 836 goto rddir2_exit;
838 } else if (current_entry != NULL) { 837 } else if (current_entry != NULL) {
839 cFYI(1, ("entry %lld found", file->f_pos)); 838 cFYI(1, "entry %lld found", file->f_pos);
840 } else { 839 } else {
841 cFYI(1, ("could not find entry")); 840 cFYI(1, "could not find entry");
842 goto rddir2_exit; 841 goto rddir2_exit;
843 } 842 }
844 cFYI(1, ("loop through %d times filling dir for net buf %p", 843 cFYI(1, "loop through %d times filling dir for net buf %p",
845 num_to_fill, cifsFile->srch_inf.ntwrk_buf_start)); 844 num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
846 max_len = smbCalcSize((struct smb_hdr *) 845 max_len = smbCalcSize((struct smb_hdr *)
847 cifsFile->srch_inf.ntwrk_buf_start); 846 cifsFile->srch_inf.ntwrk_buf_start);
848 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; 847 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
@@ -851,8 +850,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
851 for (i = 0; (i < num_to_fill) && (rc == 0); i++) { 850 for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
852 if (current_entry == NULL) { 851 if (current_entry == NULL) {
853 /* evaluate whether this case is an error */ 852 /* evaluate whether this case is an error */
854 cERROR(1, ("past SMB end, num to fill %d i %d", 853 cERROR(1, "past SMB end, num to fill %d i %d",
855 num_to_fill, i)); 854 num_to_fill, i);
856 break; 855 break;
857 } 856 }
858 /* if buggy server returns . and .. late do 857 /* if buggy server returns . and .. late do
@@ -867,8 +866,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
867 file->f_pos++; 866 file->f_pos++;
868 if (file->f_pos == 867 if (file->f_pos ==
869 cifsFile->srch_inf.index_of_last_entry) { 868 cifsFile->srch_inf.index_of_last_entry) {
870 cFYI(1, ("last entry in buf at pos %lld %s", 869 cFYI(1, "last entry in buf at pos %lld %s",
871 file->f_pos, tmp_buf)); 870 file->f_pos, tmp_buf);
872 cifs_save_resume_key(current_entry, cifsFile); 871 cifs_save_resume_key(current_entry, cifsFile);
873 break; 872 break;
874 } else 873 } else
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7c3fd7463f44..7707389bdf2c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -35,9 +35,11 @@
35extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, 35extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
36 unsigned char *p24); 36 unsigned char *p24);
37 37
38/* Checks if this is the first smb session to be reconnected after 38/*
39 the socket has been reestablished (so we know whether to use vc 0). 39 * Checks if this is the first smb session to be reconnected after
40 Called while holding the cifs_tcp_ses_lock, so do not block */ 40 * the socket has been reestablished (so we know whether to use vc 0).
41 * Called while holding the cifs_tcp_ses_lock, so do not block
42 */
41static bool is_first_ses_reconnect(struct cifsSesInfo *ses) 43static bool is_first_ses_reconnect(struct cifsSesInfo *ses)
42{ 44{
43 struct list_head *tmp; 45 struct list_head *tmp;
@@ -284,7 +286,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
284 int len; 286 int len;
285 char *data = *pbcc_area; 287 char *data = *pbcc_area;
286 288
287 cFYI(1, ("bleft %d", bleft)); 289 cFYI(1, "bleft %d", bleft);
288 290
289 /* 291 /*
290 * Windows servers do not always double null terminate their final 292 * Windows servers do not always double null terminate their final
@@ -301,7 +303,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
301 303
302 kfree(ses->serverOS); 304 kfree(ses->serverOS);
303 ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); 305 ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
304 cFYI(1, ("serverOS=%s", ses->serverOS)); 306 cFYI(1, "serverOS=%s", ses->serverOS);
305 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; 307 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
306 data += len; 308 data += len;
307 bleft -= len; 309 bleft -= len;
@@ -310,7 +312,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
310 312
311 kfree(ses->serverNOS); 313 kfree(ses->serverNOS);
312 ses->serverNOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); 314 ses->serverNOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
313 cFYI(1, ("serverNOS=%s", ses->serverNOS)); 315 cFYI(1, "serverNOS=%s", ses->serverNOS);
314 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; 316 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
315 data += len; 317 data += len;
316 bleft -= len; 318 bleft -= len;
@@ -319,7 +321,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
319 321
320 kfree(ses->serverDomain); 322 kfree(ses->serverDomain);
321 ses->serverDomain = cifs_strndup_from_ucs(data, bleft, true, nls_cp); 323 ses->serverDomain = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
322 cFYI(1, ("serverDomain=%s", ses->serverDomain)); 324 cFYI(1, "serverDomain=%s", ses->serverDomain);
323 325
324 return; 326 return;
325} 327}
@@ -332,7 +334,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
332 int len; 334 int len;
333 char *bcc_ptr = *pbcc_area; 335 char *bcc_ptr = *pbcc_area;
334 336
335 cFYI(1, ("decode sessetup ascii. bleft %d", bleft)); 337 cFYI(1, "decode sessetup ascii. bleft %d", bleft);
336 338
337 len = strnlen(bcc_ptr, bleft); 339 len = strnlen(bcc_ptr, bleft);
338 if (len >= bleft) 340 if (len >= bleft)
@@ -344,7 +346,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
344 if (ses->serverOS) 346 if (ses->serverOS)
345 strncpy(ses->serverOS, bcc_ptr, len); 347 strncpy(ses->serverOS, bcc_ptr, len);
346 if (strncmp(ses->serverOS, "OS/2", 4) == 0) { 348 if (strncmp(ses->serverOS, "OS/2", 4) == 0) {
347 cFYI(1, ("OS/2 server")); 349 cFYI(1, "OS/2 server");
348 ses->flags |= CIFS_SES_OS2; 350 ses->flags |= CIFS_SES_OS2;
349 } 351 }
350 352
@@ -373,7 +375,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
373 /* BB For newer servers which do not support Unicode, 375 /* BB For newer servers which do not support Unicode,
374 but thus do return domain here we could add parsing 376 but thus do return domain here we could add parsing
375 for it later, but it is not very important */ 377 for it later, but it is not very important */
376 cFYI(1, ("ascii: bytes left %d", bleft)); 378 cFYI(1, "ascii: bytes left %d", bleft);
377 379
378 return rc; 380 return rc;
379} 381}
@@ -384,16 +386,16 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
384 CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; 386 CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
385 387
386 if (blob_len < sizeof(CHALLENGE_MESSAGE)) { 388 if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
387 cERROR(1, ("challenge blob len %d too small", blob_len)); 389 cERROR(1, "challenge blob len %d too small", blob_len);
388 return -EINVAL; 390 return -EINVAL;
389 } 391 }
390 392
391 if (memcmp(pblob->Signature, "NTLMSSP", 8)) { 393 if (memcmp(pblob->Signature, "NTLMSSP", 8)) {
392 cERROR(1, ("blob signature incorrect %s", pblob->Signature)); 394 cERROR(1, "blob signature incorrect %s", pblob->Signature);
393 return -EINVAL; 395 return -EINVAL;
394 } 396 }
395 if (pblob->MessageType != NtLmChallenge) { 397 if (pblob->MessageType != NtLmChallenge) {
396 cERROR(1, ("Incorrect message type %d", pblob->MessageType)); 398 cERROR(1, "Incorrect message type %d", pblob->MessageType);
397 return -EINVAL; 399 return -EINVAL;
398 } 400 }
399 401
@@ -447,7 +449,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
447 This function returns the length of the data in the blob */ 449 This function returns the length of the data in the blob */
448static int build_ntlmssp_auth_blob(unsigned char *pbuffer, 450static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
449 struct cifsSesInfo *ses, 451 struct cifsSesInfo *ses,
450 const struct nls_table *nls_cp, int first) 452 const struct nls_table *nls_cp, bool first)
451{ 453{
452 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; 454 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
453 __u32 flags; 455 __u32 flags;
@@ -546,7 +548,7 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
546 548
547static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB, 549static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
548 struct cifsSesInfo *ses, 550 struct cifsSesInfo *ses,
549 const struct nls_table *nls, int first_time) 551 const struct nls_table *nls, bool first_time)
550{ 552{
551 int bloblen; 553 int bloblen;
552 554
@@ -559,8 +561,8 @@ static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
559#endif 561#endif
560 562
561int 563int
562CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, 564CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
563 const struct nls_table *nls_cp) 565 const struct nls_table *nls_cp)
564{ 566{
565 int rc = 0; 567 int rc = 0;
566 int wct; 568 int wct;
@@ -577,13 +579,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
577 int bytes_remaining; 579 int bytes_remaining;
578 struct key *spnego_key = NULL; 580 struct key *spnego_key = NULL;
579 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ 581 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
582 bool first_time;
580 583
581 if (ses == NULL) 584 if (ses == NULL)
582 return -EINVAL; 585 return -EINVAL;
583 586
587 read_lock(&cifs_tcp_ses_lock);
588 first_time = is_first_ses_reconnect(ses);
589 read_unlock(&cifs_tcp_ses_lock);
590
584 type = ses->server->secType; 591 type = ses->server->secType;
585 592
586 cFYI(1, ("sess setup type %d", type)); 593 cFYI(1, "sess setup type %d", type);
587ssetup_ntlmssp_authenticate: 594ssetup_ntlmssp_authenticate:
588 if (phase == NtLmChallenge) 595 if (phase == NtLmChallenge)
589 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ 596 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
@@ -664,7 +671,7 @@ ssetup_ntlmssp_authenticate:
664 changed to do higher than lanman dialect and 671 changed to do higher than lanman dialect and
665 we reconnected would we ever calc signing_key? */ 672 we reconnected would we ever calc signing_key? */
666 673
667 cFYI(1, ("Negotiating LANMAN setting up strings")); 674 cFYI(1, "Negotiating LANMAN setting up strings");
668 /* Unicode not allowed for LANMAN dialects */ 675 /* Unicode not allowed for LANMAN dialects */
669 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 676 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
670#endif 677#endif
@@ -744,7 +751,7 @@ ssetup_ntlmssp_authenticate:
744 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); 751 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
745 } else 752 } else
746 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 753 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
747 } else if (type == Kerberos || type == MSKerberos) { 754 } else if (type == Kerberos) {
748#ifdef CONFIG_CIFS_UPCALL 755#ifdef CONFIG_CIFS_UPCALL
749 struct cifs_spnego_msg *msg; 756 struct cifs_spnego_msg *msg;
750 spnego_key = cifs_get_spnego_key(ses); 757 spnego_key = cifs_get_spnego_key(ses);
@@ -758,17 +765,17 @@ ssetup_ntlmssp_authenticate:
758 /* check version field to make sure that cifs.upcall is 765 /* check version field to make sure that cifs.upcall is
759 sending us a response in an expected form */ 766 sending us a response in an expected form */
760 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { 767 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
761 cERROR(1, ("incorrect version of cifs.upcall (expected" 768 cERROR(1, "incorrect version of cifs.upcall (expected"
762 " %d but got %d)", 769 " %d but got %d)",
763 CIFS_SPNEGO_UPCALL_VERSION, msg->version)); 770 CIFS_SPNEGO_UPCALL_VERSION, msg->version);
764 rc = -EKEYREJECTED; 771 rc = -EKEYREJECTED;
765 goto ssetup_exit; 772 goto ssetup_exit;
766 } 773 }
767 /* bail out if key is too long */ 774 /* bail out if key is too long */
768 if (msg->sesskey_len > 775 if (msg->sesskey_len >
769 sizeof(ses->server->mac_signing_key.data.krb5)) { 776 sizeof(ses->server->mac_signing_key.data.krb5)) {
770 cERROR(1, ("Kerberos signing key too long (%u bytes)", 777 cERROR(1, "Kerberos signing key too long (%u bytes)",
771 msg->sesskey_len)); 778 msg->sesskey_len);
772 rc = -EOVERFLOW; 779 rc = -EOVERFLOW;
773 goto ssetup_exit; 780 goto ssetup_exit;
774 } 781 }
@@ -796,7 +803,7 @@ ssetup_ntlmssp_authenticate:
796 /* BB: is this right? */ 803 /* BB: is this right? */
797 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 804 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
798#else /* ! CONFIG_CIFS_UPCALL */ 805#else /* ! CONFIG_CIFS_UPCALL */
799 cERROR(1, ("Kerberos negotiated but upcall support disabled!")); 806 cERROR(1, "Kerberos negotiated but upcall support disabled!");
800 rc = -ENOSYS; 807 rc = -ENOSYS;
801 goto ssetup_exit; 808 goto ssetup_exit;
802#endif /* CONFIG_CIFS_UPCALL */ 809#endif /* CONFIG_CIFS_UPCALL */
@@ -804,12 +811,12 @@ ssetup_ntlmssp_authenticate:
804#ifdef CONFIG_CIFS_EXPERIMENTAL 811#ifdef CONFIG_CIFS_EXPERIMENTAL
805 if (type == RawNTLMSSP) { 812 if (type == RawNTLMSSP) {
806 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { 813 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
807 cERROR(1, ("NTLMSSP requires Unicode support")); 814 cERROR(1, "NTLMSSP requires Unicode support");
808 rc = -ENOSYS; 815 rc = -ENOSYS;
809 goto ssetup_exit; 816 goto ssetup_exit;
810 } 817 }
811 818
812 cFYI(1, ("ntlmssp session setup phase %d", phase)); 819 cFYI(1, "ntlmssp session setup phase %d", phase);
813 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 820 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
814 capabilities |= CAP_EXTENDED_SECURITY; 821 capabilities |= CAP_EXTENDED_SECURITY;
815 pSMB->req.Capabilities |= cpu_to_le32(capabilities); 822 pSMB->req.Capabilities |= cpu_to_le32(capabilities);
@@ -827,7 +834,7 @@ ssetup_ntlmssp_authenticate:
827 on the response (challenge) */ 834 on the response (challenge) */
828 smb_buf->Uid = ses->Suid; 835 smb_buf->Uid = ses->Suid;
829 } else { 836 } else {
830 cERROR(1, ("invalid phase %d", phase)); 837 cERROR(1, "invalid phase %d", phase);
831 rc = -ENOSYS; 838 rc = -ENOSYS;
832 goto ssetup_exit; 839 goto ssetup_exit;
833 } 840 }
@@ -839,12 +846,12 @@ ssetup_ntlmssp_authenticate:
839 } 846 }
840 unicode_oslm_strings(&bcc_ptr, nls_cp); 847 unicode_oslm_strings(&bcc_ptr, nls_cp);
841 } else { 848 } else {
842 cERROR(1, ("secType %d not supported!", type)); 849 cERROR(1, "secType %d not supported!", type);
843 rc = -ENOSYS; 850 rc = -ENOSYS;
844 goto ssetup_exit; 851 goto ssetup_exit;
845 } 852 }
846#else 853#else
847 cERROR(1, ("secType %d not supported!", type)); 854 cERROR(1, "secType %d not supported!", type);
848 rc = -ENOSYS; 855 rc = -ENOSYS;
849 goto ssetup_exit; 856 goto ssetup_exit;
850#endif 857#endif
@@ -862,7 +869,7 @@ ssetup_ntlmssp_authenticate:
862 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR); 869 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
863 /* SMB request buf freed in SendReceive2 */ 870 /* SMB request buf freed in SendReceive2 */
864 871
865 cFYI(1, ("ssetup rc from sendrecv2 is %d", rc)); 872 cFYI(1, "ssetup rc from sendrecv2 is %d", rc);
866 873
867 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; 874 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
868 smb_buf = (struct smb_hdr *)iov[0].iov_base; 875 smb_buf = (struct smb_hdr *)iov[0].iov_base;
@@ -870,7 +877,7 @@ ssetup_ntlmssp_authenticate:
870 if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError == 877 if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError ==
871 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { 878 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) {
872 if (phase != NtLmNegotiate) { 879 if (phase != NtLmNegotiate) {
873 cERROR(1, ("Unexpected more processing error")); 880 cERROR(1, "Unexpected more processing error");
874 goto ssetup_exit; 881 goto ssetup_exit;
875 } 882 }
876 /* NTLMSSP Negotiate sent now processing challenge (response) */ 883 /* NTLMSSP Negotiate sent now processing challenge (response) */
@@ -882,14 +889,14 @@ ssetup_ntlmssp_authenticate:
882 889
883 if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { 890 if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) {
884 rc = -EIO; 891 rc = -EIO;
885 cERROR(1, ("bad word count %d", smb_buf->WordCount)); 892 cERROR(1, "bad word count %d", smb_buf->WordCount);
886 goto ssetup_exit; 893 goto ssetup_exit;
887 } 894 }
888 action = le16_to_cpu(pSMB->resp.Action); 895 action = le16_to_cpu(pSMB->resp.Action);
889 if (action & GUEST_LOGIN) 896 if (action & GUEST_LOGIN)
890 cFYI(1, ("Guest login")); /* BB mark SesInfo struct? */ 897 cFYI(1, "Guest login"); /* BB mark SesInfo struct? */
891 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ 898 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
892 cFYI(1, ("UID = %d ", ses->Suid)); 899 cFYI(1, "UID = %d ", ses->Suid);
893 /* response can have either 3 or 4 word count - Samba sends 3 */ 900 /* response can have either 3 or 4 word count - Samba sends 3 */
894 /* and lanman response is 3 */ 901 /* and lanman response is 3 */
895 bytes_remaining = BCC(smb_buf); 902 bytes_remaining = BCC(smb_buf);
@@ -899,7 +906,7 @@ ssetup_ntlmssp_authenticate:
899 __u16 blob_len; 906 __u16 blob_len;
900 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); 907 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
901 if (blob_len > bytes_remaining) { 908 if (blob_len > bytes_remaining) {
902 cERROR(1, ("bad security blob length %d", blob_len)); 909 cERROR(1, "bad security blob length %d", blob_len);
903 rc = -EINVAL; 910 rc = -EINVAL;
904 goto ssetup_exit; 911 goto ssetup_exit;
905 } 912 }
@@ -933,7 +940,7 @@ ssetup_exit:
933 } 940 }
934 kfree(str_area); 941 kfree(str_area);
935 if (resp_buf_type == CIFS_SMALL_BUFFER) { 942 if (resp_buf_type == CIFS_SMALL_BUFFER) {
936 cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); 943 cFYI(1, "ssetup freeing small buf %p", iov[0].iov_base);
937 cifs_small_buf_release(iov[0].iov_base); 944 cifs_small_buf_release(iov[0].iov_base);
938 } else if (resp_buf_type == CIFS_LARGE_BUFFER) 945 } else if (resp_buf_type == CIFS_LARGE_BUFFER)
939 cifs_buf_release(iov[0].iov_base); 946 cifs_buf_release(iov[0].iov_base);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index ad081fe7eb18..82f78c4d6978 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -35,7 +35,6 @@
35#include "cifs_debug.h" 35#include "cifs_debug.h"
36 36
37extern mempool_t *cifs_mid_poolp; 37extern mempool_t *cifs_mid_poolp;
38extern struct kmem_cache *cifs_oplock_cachep;
39 38
40static struct mid_q_entry * 39static struct mid_q_entry *
41AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) 40AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
@@ -43,7 +42,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
43 struct mid_q_entry *temp; 42 struct mid_q_entry *temp;
44 43
45 if (server == NULL) { 44 if (server == NULL) {
46 cERROR(1, ("Null TCP session in AllocMidQEntry")); 45 cERROR(1, "Null TCP session in AllocMidQEntry");
47 return NULL; 46 return NULL;
48 } 47 }
49 48
@@ -55,7 +54,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
55 temp->mid = smb_buffer->Mid; /* always LE */ 54 temp->mid = smb_buffer->Mid; /* always LE */
56 temp->pid = current->pid; 55 temp->pid = current->pid;
57 temp->command = smb_buffer->Command; 56 temp->command = smb_buffer->Command;
58 cFYI(1, ("For smb_command %d", temp->command)); 57 cFYI(1, "For smb_command %d", temp->command);
59 /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ 58 /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */
60 /* when mid allocated can be before when sent */ 59 /* when mid allocated can be before when sent */
61 temp->when_alloc = jiffies; 60 temp->when_alloc = jiffies;
@@ -140,7 +139,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
140 total_len += iov[i].iov_len; 139 total_len += iov[i].iov_len;
141 140
142 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length); 141 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
143 cFYI(1, ("Sending smb: total_len %d", total_len)); 142 cFYI(1, "Sending smb: total_len %d", total_len);
144 dump_smb(smb_buffer, len); 143 dump_smb(smb_buffer, len);
145 144
146 i = 0; 145 i = 0;
@@ -168,9 +167,8 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
168 reconnect which may clear the network problem. 167 reconnect which may clear the network problem.
169 */ 168 */
170 if ((i >= 14) || (!server->noblocksnd && (i > 2))) { 169 if ((i >= 14) || (!server->noblocksnd && (i > 2))) {
171 cERROR(1, 170 cERROR(1, "sends on sock %p stuck for 15 seconds",
172 ("sends on sock %p stuck for 15 seconds", 171 ssocket);
173 ssocket));
174 rc = -EAGAIN; 172 rc = -EAGAIN;
175 break; 173 break;
176 } 174 }
@@ -184,13 +182,13 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
184 total_len = 0; 182 total_len = 0;
185 break; 183 break;
186 } else if (rc > total_len) { 184 } else if (rc > total_len) {
187 cERROR(1, ("sent %d requested %d", rc, total_len)); 185 cERROR(1, "sent %d requested %d", rc, total_len);
188 break; 186 break;
189 } 187 }
190 if (rc == 0) { 188 if (rc == 0) {
191 /* should never happen, letting socket clear before 189 /* should never happen, letting socket clear before
192 retrying is our only obvious option here */ 190 retrying is our only obvious option here */
193 cERROR(1, ("tcp sent no data")); 191 cERROR(1, "tcp sent no data");
194 msleep(500); 192 msleep(500);
195 continue; 193 continue;
196 } 194 }
@@ -213,8 +211,8 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
213 } 211 }
214 212
215 if ((total_len > 0) && (total_len != smb_buf_length + 4)) { 213 if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
216 cFYI(1, ("partial send (%d remaining), terminating session", 214 cFYI(1, "partial send (%d remaining), terminating session",
217 total_len)); 215 total_len);
218 /* If we have only sent part of an SMB then the next SMB 216 /* If we have only sent part of an SMB then the next SMB
219 could be taken as the remainder of this one. We need 217 could be taken as the remainder of this one. We need
220 to kill the socket so the server throws away the partial 218 to kill the socket so the server throws away the partial
@@ -223,7 +221,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
223 } 221 }
224 222
225 if (rc < 0) { 223 if (rc < 0) {
226 cERROR(1, ("Error %d sending data on socket to server", rc)); 224 cERROR(1, "Error %d sending data on socket to server", rc);
227 } else 225 } else
228 rc = 0; 226 rc = 0;
229 227
@@ -296,7 +294,7 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf,
296 } 294 }
297 295
298 if (ses->server->tcpStatus == CifsNeedReconnect) { 296 if (ses->server->tcpStatus == CifsNeedReconnect) {
299 cFYI(1, ("tcp session dead - return to caller to retry")); 297 cFYI(1, "tcp session dead - return to caller to retry");
300 return -EAGAIN; 298 return -EAGAIN;
301 } 299 }
302 300
@@ -348,7 +346,7 @@ static int wait_for_response(struct cifsSesInfo *ses,
348 lrt += time_to_wait; 346 lrt += time_to_wait;
349 if (time_after(jiffies, lrt)) { 347 if (time_after(jiffies, lrt)) {
350 /* No replies for time_to_wait. */ 348 /* No replies for time_to_wait. */
351 cERROR(1, ("server not responding")); 349 cERROR(1, "server not responding");
352 return -1; 350 return -1;
353 } 351 }
354 } else { 352 } else {
@@ -379,7 +377,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
379 iov[0].iov_len = in_buf->smb_buf_length + 4; 377 iov[0].iov_len = in_buf->smb_buf_length + 4;
380 flags |= CIFS_NO_RESP; 378 flags |= CIFS_NO_RESP;
381 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); 379 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
382 cFYI(DBG2, ("SendRcvNoRsp flags %d rc %d", flags, rc)); 380 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
383 381
384 return rc; 382 return rc;
385} 383}
@@ -402,7 +400,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
402 400
403 if ((ses == NULL) || (ses->server == NULL)) { 401 if ((ses == NULL) || (ses->server == NULL)) {
404 cifs_small_buf_release(in_buf); 402 cifs_small_buf_release(in_buf);
405 cERROR(1, ("Null session")); 403 cERROR(1, "Null session");
406 return -EIO; 404 return -EIO;
407 } 405 }
408 406
@@ -471,7 +469,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
471 else if (long_op == CIFS_BLOCKING_OP) 469 else if (long_op == CIFS_BLOCKING_OP)
472 timeout = 0x7FFFFFFF; /* large, but not so large as to wrap */ 470 timeout = 0x7FFFFFFF; /* large, but not so large as to wrap */
473 else { 471 else {
474 cERROR(1, ("unknown timeout flag %d", long_op)); 472 cERROR(1, "unknown timeout flag %d", long_op);
475 rc = -EIO; 473 rc = -EIO;
476 goto out; 474 goto out;
477 } 475 }
@@ -490,8 +488,8 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
490 spin_lock(&GlobalMid_Lock); 488 spin_lock(&GlobalMid_Lock);
491 489
492 if (midQ->resp_buf == NULL) { 490 if (midQ->resp_buf == NULL) {
493 cERROR(1, ("No response to cmd %d mid %d", 491 cERROR(1, "No response to cmd %d mid %d",
494 midQ->command, midQ->mid)); 492 midQ->command, midQ->mid);
495 if (midQ->midState == MID_REQUEST_SUBMITTED) { 493 if (midQ->midState == MID_REQUEST_SUBMITTED) {
496 if (ses->server->tcpStatus == CifsExiting) 494 if (ses->server->tcpStatus == CifsExiting)
497 rc = -EHOSTDOWN; 495 rc = -EHOSTDOWN;
@@ -504,7 +502,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
504 if (rc != -EHOSTDOWN) { 502 if (rc != -EHOSTDOWN) {
505 if (midQ->midState == MID_RETRY_NEEDED) { 503 if (midQ->midState == MID_RETRY_NEEDED) {
506 rc = -EAGAIN; 504 rc = -EAGAIN;
507 cFYI(1, ("marking request for retry")); 505 cFYI(1, "marking request for retry");
508 } else { 506 } else {
509 rc = -EIO; 507 rc = -EIO;
510 } 508 }
@@ -521,8 +519,8 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
521 receive_len = midQ->resp_buf->smb_buf_length; 519 receive_len = midQ->resp_buf->smb_buf_length;
522 520
523 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 521 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
524 cERROR(1, ("Frame too large received. Length: %d Xid: %d", 522 cERROR(1, "Frame too large received. Length: %d Xid: %d",
525 receive_len, xid)); 523 receive_len, xid);
526 rc = -EIO; 524 rc = -EIO;
527 goto out; 525 goto out;
528 } 526 }
@@ -548,7 +546,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
548 &ses->server->mac_signing_key, 546 &ses->server->mac_signing_key,
549 midQ->sequence_number+1); 547 midQ->sequence_number+1);
550 if (rc) { 548 if (rc) {
551 cERROR(1, ("Unexpected SMB signature")); 549 cERROR(1, "Unexpected SMB signature");
552 /* BB FIXME add code to kill session */ 550 /* BB FIXME add code to kill session */
553 } 551 }
554 } 552 }
@@ -569,7 +567,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
569 DeleteMidQEntry */ 567 DeleteMidQEntry */
570 } else { 568 } else {
571 rc = -EIO; 569 rc = -EIO;
572 cFYI(1, ("Bad MID state?")); 570 cFYI(1, "Bad MID state?");
573 } 571 }
574 572
575out: 573out:
@@ -591,11 +589,11 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
591 struct mid_q_entry *midQ; 589 struct mid_q_entry *midQ;
592 590
593 if (ses == NULL) { 591 if (ses == NULL) {
594 cERROR(1, ("Null smb session")); 592 cERROR(1, "Null smb session");
595 return -EIO; 593 return -EIO;
596 } 594 }
597 if (ses->server == NULL) { 595 if (ses->server == NULL) {
598 cERROR(1, ("Null tcp session")); 596 cERROR(1, "Null tcp session");
599 return -EIO; 597 return -EIO;
600 } 598 }
601 599
@@ -607,8 +605,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
607 use ses->maxReq */ 605 use ses->maxReq */
608 606
609 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 607 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
610 cERROR(1, ("Illegal length, greater than maximum frame, %d", 608 cERROR(1, "Illegal length, greater than maximum frame, %d",
611 in_buf->smb_buf_length)); 609 in_buf->smb_buf_length);
612 return -EIO; 610 return -EIO;
613 } 611 }
614 612
@@ -665,7 +663,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
665 else if (long_op == CIFS_BLOCKING_OP) 663 else if (long_op == CIFS_BLOCKING_OP)
666 timeout = 0x7FFFFFFF; /* large but no so large as to wrap */ 664 timeout = 0x7FFFFFFF; /* large but no so large as to wrap */
667 else { 665 else {
668 cERROR(1, ("unknown timeout flag %d", long_op)); 666 cERROR(1, "unknown timeout flag %d", long_op);
669 rc = -EIO; 667 rc = -EIO;
670 goto out; 668 goto out;
671 } 669 }
@@ -681,8 +679,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
681 679
682 spin_lock(&GlobalMid_Lock); 680 spin_lock(&GlobalMid_Lock);
683 if (midQ->resp_buf == NULL) { 681 if (midQ->resp_buf == NULL) {
684 cERROR(1, ("No response for cmd %d mid %d", 682 cERROR(1, "No response for cmd %d mid %d",
685 midQ->command, midQ->mid)); 683 midQ->command, midQ->mid);
686 if (midQ->midState == MID_REQUEST_SUBMITTED) { 684 if (midQ->midState == MID_REQUEST_SUBMITTED) {
687 if (ses->server->tcpStatus == CifsExiting) 685 if (ses->server->tcpStatus == CifsExiting)
688 rc = -EHOSTDOWN; 686 rc = -EHOSTDOWN;
@@ -695,7 +693,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
695 if (rc != -EHOSTDOWN) { 693 if (rc != -EHOSTDOWN) {
696 if (midQ->midState == MID_RETRY_NEEDED) { 694 if (midQ->midState == MID_RETRY_NEEDED) {
697 rc = -EAGAIN; 695 rc = -EAGAIN;
698 cFYI(1, ("marking request for retry")); 696 cFYI(1, "marking request for retry");
699 } else { 697 } else {
700 rc = -EIO; 698 rc = -EIO;
701 } 699 }
@@ -712,8 +710,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
712 receive_len = midQ->resp_buf->smb_buf_length; 710 receive_len = midQ->resp_buf->smb_buf_length;
713 711
714 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 712 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
715 cERROR(1, ("Frame too large received. Length: %d Xid: %d", 713 cERROR(1, "Frame too large received. Length: %d Xid: %d",
716 receive_len, xid)); 714 receive_len, xid);
717 rc = -EIO; 715 rc = -EIO;
718 goto out; 716 goto out;
719 } 717 }
@@ -736,7 +734,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
736 &ses->server->mac_signing_key, 734 &ses->server->mac_signing_key,
737 midQ->sequence_number+1); 735 midQ->sequence_number+1);
738 if (rc) { 736 if (rc) {
739 cERROR(1, ("Unexpected SMB signature")); 737 cERROR(1, "Unexpected SMB signature");
740 /* BB FIXME add code to kill session */ 738 /* BB FIXME add code to kill session */
741 } 739 }
742 } 740 }
@@ -753,7 +751,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
753 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); 751 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
754 } else { 752 } else {
755 rc = -EIO; 753 rc = -EIO;
756 cERROR(1, ("Bad MID state?")); 754 cERROR(1, "Bad MID state?");
757 } 755 }
758 756
759out: 757out:
@@ -824,13 +822,13 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
824 struct cifsSesInfo *ses; 822 struct cifsSesInfo *ses;
825 823
826 if (tcon == NULL || tcon->ses == NULL) { 824 if (tcon == NULL || tcon->ses == NULL) {
827 cERROR(1, ("Null smb session")); 825 cERROR(1, "Null smb session");
828 return -EIO; 826 return -EIO;
829 } 827 }
830 ses = tcon->ses; 828 ses = tcon->ses;
831 829
832 if (ses->server == NULL) { 830 if (ses->server == NULL) {
833 cERROR(1, ("Null tcp session")); 831 cERROR(1, "Null tcp session");
834 return -EIO; 832 return -EIO;
835 } 833 }
836 834
@@ -842,8 +840,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
842 use ses->maxReq */ 840 use ses->maxReq */
843 841
844 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 842 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
845 cERROR(1, ("Illegal length, greater than maximum frame, %d", 843 cERROR(1, "Illegal length, greater than maximum frame, %d",
846 in_buf->smb_buf_length)); 844 in_buf->smb_buf_length);
847 return -EIO; 845 return -EIO;
848 } 846 }
849 847
@@ -933,8 +931,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
933 spin_unlock(&GlobalMid_Lock); 931 spin_unlock(&GlobalMid_Lock);
934 receive_len = midQ->resp_buf->smb_buf_length; 932 receive_len = midQ->resp_buf->smb_buf_length;
935 } else { 933 } else {
936 cERROR(1, ("No response for cmd %d mid %d", 934 cERROR(1, "No response for cmd %d mid %d",
937 midQ->command, midQ->mid)); 935 midQ->command, midQ->mid);
938 if (midQ->midState == MID_REQUEST_SUBMITTED) { 936 if (midQ->midState == MID_REQUEST_SUBMITTED) {
939 if (ses->server->tcpStatus == CifsExiting) 937 if (ses->server->tcpStatus == CifsExiting)
940 rc = -EHOSTDOWN; 938 rc = -EHOSTDOWN;
@@ -947,7 +945,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
947 if (rc != -EHOSTDOWN) { 945 if (rc != -EHOSTDOWN) {
948 if (midQ->midState == MID_RETRY_NEEDED) { 946 if (midQ->midState == MID_RETRY_NEEDED) {
949 rc = -EAGAIN; 947 rc = -EAGAIN;
950 cFYI(1, ("marking request for retry")); 948 cFYI(1, "marking request for retry");
951 } else { 949 } else {
952 rc = -EIO; 950 rc = -EIO;
953 } 951 }
@@ -958,8 +956,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
958 } 956 }
959 957
960 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 958 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
961 cERROR(1, ("Frame too large received. Length: %d Xid: %d", 959 cERROR(1, "Frame too large received. Length: %d Xid: %d",
962 receive_len, xid)); 960 receive_len, xid);
963 rc = -EIO; 961 rc = -EIO;
964 goto out; 962 goto out;
965 } 963 }
@@ -968,7 +966,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
968 966
969 if ((out_buf == NULL) || (midQ->midState != MID_RESPONSE_RECEIVED)) { 967 if ((out_buf == NULL) || (midQ->midState != MID_RESPONSE_RECEIVED)) {
970 rc = -EIO; 968 rc = -EIO;
971 cERROR(1, ("Bad MID state?")); 969 cERROR(1, "Bad MID state?");
972 goto out; 970 goto out;
973 } 971 }
974 972
@@ -986,7 +984,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
986 &ses->server->mac_signing_key, 984 &ses->server->mac_signing_key,
987 midQ->sequence_number+1); 985 midQ->sequence_number+1);
988 if (rc) { 986 if (rc) {
989 cERROR(1, ("Unexpected SMB signature")); 987 cERROR(1, "Unexpected SMB signature");
990 /* BB FIXME add code to kill session */ 988 /* BB FIXME add code to kill session */
991 } 989 }
992 } 990 }
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index f555ce077d4f..a1509207bfa6 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -70,12 +70,12 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
70 return rc; 70 return rc;
71 } 71 }
72 if (ea_name == NULL) { 72 if (ea_name == NULL) {
73 cFYI(1, ("Null xattr names not supported")); 73 cFYI(1, "Null xattr names not supported");
74 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) 74 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5)
75 && (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4))) { 75 && (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4))) {
76 cFYI(1, 76 cFYI(1,
77 ("illegal xattr request %s (only user namespace supported)", 77 "illegal xattr request %s (only user namespace supported)",
78 ea_name)); 78 ea_name);
79 /* BB what if no namespace prefix? */ 79 /* BB what if no namespace prefix? */
80 /* Should we just pass them to server, except for 80 /* Should we just pass them to server, except for
81 system and perhaps security prefixes? */ 81 system and perhaps security prefixes? */
@@ -131,19 +131,19 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
131 search server for EAs or streams to 131 search server for EAs or streams to
132 returns as xattrs */ 132 returns as xattrs */
133 if (value_size > MAX_EA_VALUE_SIZE) { 133 if (value_size > MAX_EA_VALUE_SIZE) {
134 cFYI(1, ("size of EA value too large")); 134 cFYI(1, "size of EA value too large");
135 kfree(full_path); 135 kfree(full_path);
136 FreeXid(xid); 136 FreeXid(xid);
137 return -EOPNOTSUPP; 137 return -EOPNOTSUPP;
138 } 138 }
139 139
140 if (ea_name == NULL) { 140 if (ea_name == NULL) {
141 cFYI(1, ("Null xattr names not supported")); 141 cFYI(1, "Null xattr names not supported");
142 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { 142 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) {
143 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 143 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
144 goto set_ea_exit; 144 goto set_ea_exit;
145 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) 145 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0)
146 cFYI(1, ("attempt to set cifs inode metadata")); 146 cFYI(1, "attempt to set cifs inode metadata");
147 147
148 ea_name += 5; /* skip past user. prefix */ 148 ea_name += 5; /* skip past user. prefix */
149 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 149 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
@@ -169,9 +169,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
169 ACL_TYPE_ACCESS, cifs_sb->local_nls, 169 ACL_TYPE_ACCESS, cifs_sb->local_nls,
170 cifs_sb->mnt_cifs_flags & 170 cifs_sb->mnt_cifs_flags &
171 CIFS_MOUNT_MAP_SPECIAL_CHR); 171 CIFS_MOUNT_MAP_SPECIAL_CHR);
172 cFYI(1, ("set POSIX ACL rc %d", rc)); 172 cFYI(1, "set POSIX ACL rc %d", rc);
173#else 173#else
174 cFYI(1, ("set POSIX ACL not supported")); 174 cFYI(1, "set POSIX ACL not supported");
175#endif 175#endif
176 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, 176 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
177 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { 177 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -182,13 +182,13 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
182 ACL_TYPE_DEFAULT, cifs_sb->local_nls, 182 ACL_TYPE_DEFAULT, cifs_sb->local_nls,
183 cifs_sb->mnt_cifs_flags & 183 cifs_sb->mnt_cifs_flags &
184 CIFS_MOUNT_MAP_SPECIAL_CHR); 184 CIFS_MOUNT_MAP_SPECIAL_CHR);
185 cFYI(1, ("set POSIX default ACL rc %d", rc)); 185 cFYI(1, "set POSIX default ACL rc %d", rc);
186#else 186#else
187 cFYI(1, ("set default POSIX ACL not supported")); 187 cFYI(1, "set default POSIX ACL not supported");
188#endif 188#endif
189 } else { 189 } else {
190 cFYI(1, ("illegal xattr request %s (only user namespace" 190 cFYI(1, "illegal xattr request %s (only user namespace"
191 " supported)", ea_name)); 191 " supported)", ea_name);
192 /* BB what if no namespace prefix? */ 192 /* BB what if no namespace prefix? */
193 /* Should we just pass them to server, except for 193 /* Should we just pass them to server, except for
194 system and perhaps security prefixes? */ 194 system and perhaps security prefixes? */
@@ -235,13 +235,13 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
235 /* return dos attributes as pseudo xattr */ 235 /* return dos attributes as pseudo xattr */
236 /* return alt name if available as pseudo attr */ 236 /* return alt name if available as pseudo attr */
237 if (ea_name == NULL) { 237 if (ea_name == NULL) {
238 cFYI(1, ("Null xattr names not supported")); 238 cFYI(1, "Null xattr names not supported");
239 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { 239 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) {
240 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 240 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
241 goto get_ea_exit; 241 goto get_ea_exit;
242 242
243 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) { 243 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) {
244 cFYI(1, ("attempt to query cifs inode metadata")); 244 cFYI(1, "attempt to query cifs inode metadata");
245 /* revalidate/getattr then populate from inode */ 245 /* revalidate/getattr then populate from inode */
246 } /* BB add else when above is implemented */ 246 } /* BB add else when above is implemented */
247 ea_name += 5; /* skip past user. prefix */ 247 ea_name += 5; /* skip past user. prefix */
@@ -287,7 +287,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
287 } 287 }
288#endif /* EXPERIMENTAL */ 288#endif /* EXPERIMENTAL */
289#else 289#else
290 cFYI(1, ("query POSIX ACL not supported yet")); 290 cFYI(1, "query POSIX ACL not supported yet");
291#endif /* CONFIG_CIFS_POSIX */ 291#endif /* CONFIG_CIFS_POSIX */
292 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, 292 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
293 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { 293 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -299,18 +299,18 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
299 cifs_sb->mnt_cifs_flags & 299 cifs_sb->mnt_cifs_flags &
300 CIFS_MOUNT_MAP_SPECIAL_CHR); 300 CIFS_MOUNT_MAP_SPECIAL_CHR);
301#else 301#else
302 cFYI(1, ("query POSIX default ACL not supported yet")); 302 cFYI(1, "query POSIX default ACL not supported yet");
303#endif 303#endif
304 } else if (strncmp(ea_name, 304 } else if (strncmp(ea_name,
305 CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { 305 CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) {
306 cFYI(1, ("Trusted xattr namespace not supported yet")); 306 cFYI(1, "Trusted xattr namespace not supported yet");
307 } else if (strncmp(ea_name, 307 } else if (strncmp(ea_name,
308 CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) { 308 CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) {
309 cFYI(1, ("Security xattr namespace not supported yet")); 309 cFYI(1, "Security xattr namespace not supported yet");
310 } else 310 } else
311 cFYI(1, 311 cFYI(1,
312 ("illegal xattr request %s (only user namespace supported)", 312 "illegal xattr request %s (only user namespace supported)",
313 ea_name)); 313 ea_name);
314 314
315 /* We could add an additional check for streams ie 315 /* We could add an additional check for streams ie
316 if proc/fs/cifs/streamstoxattr is set then 316 if proc/fs/cifs/streamstoxattr is set then
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 4c813f2cdc52..7196077b1688 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -217,7 +217,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
217 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 217 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
218 host_file = cfi->cfi_container; 218 host_file = cfi->cfi_container;
219 219
220 err = vfs_fsync(host_file, host_file->f_path.dentry, datasync); 220 err = vfs_fsync(host_file, datasync);
221 if ( !err && !datasync ) { 221 if ( !err && !datasync ) {
222 lock_kernel(); 222 lock_kernel();
223 err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); 223 err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 773f2ce9aa06..ca25d96d45c9 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Pioctl operations for Coda. 2 * Pioctl operations for Coda.
3 * Original version: (C) 1996 Peter Braam 3 * Original version: (C) 1996 Peter Braam
4 * Rewritten for Linux 2.1: (C) 1997 Carnegie Mellon University 4 * Rewritten for Linux 2.1: (C) 1997 Carnegie Mellon University
5 * 5 *
6 * Carnegie Mellon encourages users of this code to contribute improvements 6 * Carnegie Mellon encourages users of this code to contribute improvements
@@ -23,21 +23,22 @@
23#include <linux/coda_fs_i.h> 23#include <linux/coda_fs_i.h>
24#include <linux/coda_psdev.h> 24#include <linux/coda_psdev.h>
25 25
26#include <linux/smp_lock.h>
27
26/* pioctl ops */ 28/* pioctl ops */
27static int coda_ioctl_permission(struct inode *inode, int mask); 29static int coda_ioctl_permission(struct inode *inode, int mask);
28static int coda_pioctl(struct inode * inode, struct file * filp, 30static long coda_pioctl(struct file *filp, unsigned int cmd,
29 unsigned int cmd, unsigned long user_data); 31 unsigned long user_data);
30 32
31/* exported from this file */ 33/* exported from this file */
32const struct inode_operations coda_ioctl_inode_operations = 34const struct inode_operations coda_ioctl_inode_operations = {
33{
34 .permission = coda_ioctl_permission, 35 .permission = coda_ioctl_permission,
35 .setattr = coda_setattr, 36 .setattr = coda_setattr,
36}; 37};
37 38
38const struct file_operations coda_ioctl_operations = { 39const struct file_operations coda_ioctl_operations = {
39 .owner = THIS_MODULE, 40 .owner = THIS_MODULE,
40 .ioctl = coda_pioctl, 41 .unlocked_ioctl = coda_pioctl,
41}; 42};
42 43
43/* the coda pioctl inode ops */ 44/* the coda pioctl inode ops */
@@ -46,48 +47,53 @@ static int coda_ioctl_permission(struct inode *inode, int mask)
46 return (mask & MAY_EXEC) ? -EACCES : 0; 47 return (mask & MAY_EXEC) ? -EACCES : 0;
47} 48}
48 49
49static int coda_pioctl(struct inode * inode, struct file * filp, 50static long coda_pioctl(struct file *filp, unsigned int cmd,
50 unsigned int cmd, unsigned long user_data) 51 unsigned long user_data)
51{ 52{
52 struct path path; 53 struct path path;
53 int error; 54 int error;
54 struct PioctlData data; 55 struct PioctlData data;
55 struct inode *target_inode = NULL; 56 struct inode *inode = filp->f_dentry->d_inode;
56 struct coda_inode_info *cnp; 57 struct inode *target_inode = NULL;
58 struct coda_inode_info *cnp;
57 59
58 /* get the Pioctl data arguments from user space */ 60 lock_kernel();
59 if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) { 61
60 return -EINVAL; 62 /* get the Pioctl data arguments from user space */
61 } 63 if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) {
62 64 error = -EINVAL;
63 /* 65 goto out;
64 * Look up the pathname. Note that the pathname is in
65 * user memory, and namei takes care of this
66 */
67 if (data.follow) {
68 error = user_path(data.path, &path);
69 } else {
70 error = user_lpath(data.path, &path);
71 } 66 }
72 67
73 if ( error ) { 68 /*
74 return error; 69 * Look up the pathname. Note that the pathname is in
75 } else { 70 * user memory, and namei takes care of this
71 */
72 if (data.follow)
73 error = user_path(data.path, &path);
74 else
75 error = user_lpath(data.path, &path);
76
77 if (error)
78 goto out;
79 else
76 target_inode = path.dentry->d_inode; 80 target_inode = path.dentry->d_inode;
77 } 81
78
79 /* return if it is not a Coda inode */ 82 /* return if it is not a Coda inode */
80 if ( target_inode->i_sb != inode->i_sb ) { 83 if (target_inode->i_sb != inode->i_sb) {
81 path_put(&path); 84 path_put(&path);
82 return -EINVAL; 85 error = -EINVAL;
86 goto out;
83 } 87 }
84 88
85 /* now proceed to make the upcall */ 89 /* now proceed to make the upcall */
86 cnp = ITOC(target_inode); 90 cnp = ITOC(target_inode);
87 91
88 error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); 92 error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
89 93
90 path_put(&path); 94 path_put(&path);
91 return error;
92}
93 95
96out:
97 unlock_kernel();
98 return error;
99}
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index be4392ca2098..66b9cf79c5ba 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -73,8 +73,7 @@ static unsigned int coda_psdev_poll(struct file *file, poll_table * wait)
73 return mask; 73 return mask;
74} 74}
75 75
76static int coda_psdev_ioctl(struct inode * inode, struct file * filp, 76static long coda_psdev_ioctl(struct file * filp, unsigned int cmd, unsigned long arg)
77 unsigned int cmd, unsigned long arg)
78{ 77{
79 unsigned int data; 78 unsigned int data;
80 79
@@ -344,7 +343,7 @@ static const struct file_operations coda_psdev_fops = {
344 .read = coda_psdev_read, 343 .read = coda_psdev_read,
345 .write = coda_psdev_write, 344 .write = coda_psdev_write,
346 .poll = coda_psdev_poll, 345 .poll = coda_psdev_poll,
347 .ioctl = coda_psdev_ioctl, 346 .unlocked_ioctl = coda_psdev_ioctl,
348 .open = coda_psdev_open, 347 .open = coda_psdev_open,
349 .release = coda_psdev_release, 348 .release = coda_psdev_release,
350}; 349};
diff --git a/fs/dcache.c b/fs/dcache.c
index f1358e5c3a59..d96047b4a633 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
536 */ 536 */
537static void prune_dcache(int count) 537static void prune_dcache(int count)
538{ 538{
539 struct super_block *sb; 539 struct super_block *sb, *n;
540 int w_count; 540 int w_count;
541 int unused = dentry_stat.nr_unused; 541 int unused = dentry_stat.nr_unused;
542 int prune_ratio; 542 int prune_ratio;
@@ -545,13 +545,14 @@ static void prune_dcache(int count)
545 if (unused == 0 || count == 0) 545 if (unused == 0 || count == 0)
546 return; 546 return;
547 spin_lock(&dcache_lock); 547 spin_lock(&dcache_lock);
548restart:
549 if (count >= unused) 548 if (count >= unused)
550 prune_ratio = 1; 549 prune_ratio = 1;
551 else 550 else
552 prune_ratio = unused / count; 551 prune_ratio = unused / count;
553 spin_lock(&sb_lock); 552 spin_lock(&sb_lock);
554 list_for_each_entry(sb, &super_blocks, s_list) { 553 list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
554 if (list_empty(&sb->s_instances))
555 continue;
555 if (sb->s_nr_dentry_unused == 0) 556 if (sb->s_nr_dentry_unused == 0)
556 continue; 557 continue;
557 sb->s_count++; 558 sb->s_count++;
@@ -590,14 +591,10 @@ restart:
590 } 591 }
591 spin_lock(&sb_lock); 592 spin_lock(&sb_lock);
592 count -= pruned; 593 count -= pruned;
593 /* 594 __put_super(sb);
594 * restart only when sb is no longer on the list and 595 /* more work left to do? */
595 * we have more work to do. 596 if (count <= 0)
596 */ 597 break;
597 if (__put_super_and_need_restart(sb) && count > 0) {
598 spin_unlock(&sb_lock);
599 goto restart;
600 }
601 } 598 }
602 spin_unlock(&sb_lock); 599 spin_unlock(&sb_lock);
603 spin_unlock(&dcache_lock); 600 spin_unlock(&dcache_lock);
@@ -1529,6 +1526,7 @@ void d_delete(struct dentry * dentry)
1529 spin_lock(&dentry->d_lock); 1526 spin_lock(&dentry->d_lock);
1530 isdir = S_ISDIR(dentry->d_inode->i_mode); 1527 isdir = S_ISDIR(dentry->d_inode->i_mode);
1531 if (atomic_read(&dentry->d_count) == 1) { 1528 if (atomic_read(&dentry->d_count) == 1) {
1529 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1532 dentry_iput(dentry); 1530 dentry_iput(dentry);
1533 fsnotify_nameremove(dentry, isdir); 1531 fsnotify_nameremove(dentry, isdir);
1534 return; 1532 return;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 0120247b41c0..8b3ffd5b5235 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -384,18 +384,15 @@ static int devpts_get_sb(struct file_system_type *fs_type,
384 s->s_flags |= MS_ACTIVE; 384 s->s_flags |= MS_ACTIVE;
385 } 385 }
386 386
387 simple_set_mnt(mnt, s);
388
389 memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts)); 387 memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts));
390 388
391 error = mknod_ptmx(s); 389 error = mknod_ptmx(s);
392 if (error) 390 if (error)
393 goto out_dput; 391 goto out_undo_sget;
394 392
395 return 0; 393 simple_set_mnt(mnt, s);
396 394
397out_dput: 395 return 0;
398 dput(s->s_root); /* undo dget() in simple_set_mnt() */
399 396
400out_undo_sget: 397out_undo_sget:
401 deactivate_locked_super(s); 398 deactivate_locked_super(s);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 17903b491298..031dbe3a15ca 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -733,10 +733,7 @@ static void lkb_add_ordered(struct list_head *new, struct list_head *head,
733 if (lkb->lkb_rqmode < mode) 733 if (lkb->lkb_rqmode < mode)
734 break; 734 break;
735 735
736 if (!lkb) 736 __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
737 list_add_tail(new, head);
738 else
739 __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
740} 737}
741 738
742/* add/remove lkb to rsb's grant/convert/wait queue */ 739/* add/remove lkb to rsb's grant/convert/wait queue */
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 8b6e73c47435..b6272853130c 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -215,6 +215,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode)
215 if (!ast_type) { 215 if (!ast_type) {
216 kref_get(&lkb->lkb_ref); 216 kref_get(&lkb->lkb_ref);
217 list_add_tail(&lkb->lkb_astqueue, &proc->asts); 217 list_add_tail(&lkb->lkb_astqueue, &proc->asts);
218 lkb->lkb_ast_first = type;
218 wake_up_interruptible(&proc->wait); 219 wake_up_interruptible(&proc->wait);
219 } 220 }
220 if (type == AST_COMP && (ast_type & AST_COMP)) 221 if (type == AST_COMP && (ast_type & AST_COMP))
@@ -223,7 +224,6 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode)
223 224
224 eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type); 225 eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
225 if (eol) { 226 if (eol) {
226 lkb->lkb_ast_type &= ~AST_BAST;
227 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; 227 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
228 } 228 }
229 229
@@ -706,7 +706,7 @@ static int device_close(struct inode *inode, struct file *file)
706} 706}
707 707
708static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type, 708static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
709 int bmode, char __user *buf, size_t count) 709 int mode, char __user *buf, size_t count)
710{ 710{
711#ifdef CONFIG_COMPAT 711#ifdef CONFIG_COMPAT
712 struct dlm_lock_result32 result32; 712 struct dlm_lock_result32 result32;
@@ -733,7 +733,7 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
733 if (type == AST_BAST) { 733 if (type == AST_BAST) {
734 result.user_astaddr = ua->bastaddr; 734 result.user_astaddr = ua->bastaddr;
735 result.user_astparam = ua->bastparam; 735 result.user_astparam = ua->bastparam;
736 result.bast_mode = bmode; 736 result.bast_mode = mode;
737 } else { 737 } else {
738 result.user_astaddr = ua->castaddr; 738 result.user_astaddr = ua->castaddr;
739 result.user_astparam = ua->castparam; 739 result.user_astparam = ua->castparam;
@@ -801,7 +801,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
801 struct dlm_user_proc *proc = file->private_data; 801 struct dlm_user_proc *proc = file->private_data;
802 struct dlm_lkb *lkb; 802 struct dlm_lkb *lkb;
803 DECLARE_WAITQUEUE(wait, current); 803 DECLARE_WAITQUEUE(wait, current);
804 int error, type=0, bmode=0, removed = 0; 804 int error = 0, removed;
805 int ret_type, ret_mode;
806 int bastmode, castmode, do_bast, do_cast;
805 807
806 if (count == sizeof(struct dlm_device_version)) { 808 if (count == sizeof(struct dlm_device_version)) {
807 error = copy_version_to_user(buf, count); 809 error = copy_version_to_user(buf, count);
@@ -820,6 +822,8 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
820#endif 822#endif
821 return -EINVAL; 823 return -EINVAL;
822 824
825 try_another:
826
823 /* do we really need this? can a read happen after a close? */ 827 /* do we really need this? can a read happen after a close? */
824 if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)) 828 if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
825 return -EINVAL; 829 return -EINVAL;
@@ -855,13 +859,55 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
855 859
856 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue); 860 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
857 861
858 if (lkb->lkb_ast_type & AST_COMP) { 862 removed = 0;
859 lkb->lkb_ast_type &= ~AST_COMP; 863 ret_type = 0;
860 type = AST_COMP; 864 ret_mode = 0;
861 } else if (lkb->lkb_ast_type & AST_BAST) { 865 do_bast = lkb->lkb_ast_type & AST_BAST;
862 lkb->lkb_ast_type &= ~AST_BAST; 866 do_cast = lkb->lkb_ast_type & AST_COMP;
863 type = AST_BAST; 867 bastmode = lkb->lkb_bastmode;
864 bmode = lkb->lkb_bastmode; 868 castmode = lkb->lkb_castmode;
869
870 /* when both are queued figure out which to do first and
871 switch first so the other goes in the next read */
872
873 if (do_cast && do_bast) {
874 if (lkb->lkb_ast_first == AST_COMP) {
875 ret_type = AST_COMP;
876 ret_mode = castmode;
877 lkb->lkb_ast_type &= ~AST_COMP;
878 lkb->lkb_ast_first = AST_BAST;
879 } else {
880 ret_type = AST_BAST;
881 ret_mode = bastmode;
882 lkb->lkb_ast_type &= ~AST_BAST;
883 lkb->lkb_ast_first = AST_COMP;
884 }
885 } else {
886 ret_type = lkb->lkb_ast_first;
887 ret_mode = (ret_type == AST_COMP) ? castmode : bastmode;
888 lkb->lkb_ast_type &= ~ret_type;
889 lkb->lkb_ast_first = 0;
890 }
891
892 /* if we're doing a bast but the bast is unnecessary, then
893 switch to do nothing or do a cast if that was needed next */
894
895 if ((ret_type == AST_BAST) &&
896 dlm_modes_compat(bastmode, lkb->lkb_castmode_done)) {
897 ret_type = 0;
898 ret_mode = 0;
899
900 if (do_cast) {
901 ret_type = AST_COMP;
902 ret_mode = castmode;
903 lkb->lkb_ast_type &= ~AST_COMP;
904 lkb->lkb_ast_first = 0;
905 }
906 }
907
908 if (lkb->lkb_ast_first != lkb->lkb_ast_type) {
909 log_print("device_read %x ast_first %x ast_type %x",
910 lkb->lkb_id, lkb->lkb_ast_first, lkb->lkb_ast_type);
865 } 911 }
866 912
867 if (!lkb->lkb_ast_type) { 913 if (!lkb->lkb_ast_type) {
@@ -870,15 +916,29 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
870 } 916 }
871 spin_unlock(&proc->asts_spin); 917 spin_unlock(&proc->asts_spin);
872 918
873 error = copy_result_to_user(lkb->lkb_ua, 919 if (ret_type) {
874 test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags), 920 error = copy_result_to_user(lkb->lkb_ua,
875 type, bmode, buf, count); 921 test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
922 ret_type, ret_mode, buf, count);
923
924 if (ret_type == AST_COMP)
925 lkb->lkb_castmode_done = castmode;
926 if (ret_type == AST_BAST)
927 lkb->lkb_bastmode_done = bastmode;
928 }
876 929
877 /* removes reference for the proc->asts lists added by 930 /* removes reference for the proc->asts lists added by
878 dlm_user_add_ast() and may result in the lkb being freed */ 931 dlm_user_add_ast() and may result in the lkb being freed */
932
879 if (removed) 933 if (removed)
880 dlm_put_lkb(lkb); 934 dlm_put_lkb(lkb);
881 935
936 /* the bast that was queued was eliminated (see unnecessary above),
937 leaving nothing to return */
938
939 if (!ret_type)
940 goto try_another;
941
882 return error; 942 return error;
883} 943}
884 944
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 31f4b0e6d72c..83c4f600786a 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -12,7 +12,7 @@
12/* A global variable is a bit ugly, but it keeps the code simple */ 12/* A global variable is a bit ugly, but it keeps the code simple */
13int sysctl_drop_caches; 13int sysctl_drop_caches;
14 14
15static void drop_pagecache_sb(struct super_block *sb) 15static void drop_pagecache_sb(struct super_block *sb, void *unused)
16{ 16{
17 struct inode *inode, *toput_inode = NULL; 17 struct inode *inode, *toput_inode = NULL;
18 18
@@ -33,26 +33,6 @@ static void drop_pagecache_sb(struct super_block *sb)
33 iput(toput_inode); 33 iput(toput_inode);
34} 34}
35 35
36static void drop_pagecache(void)
37{
38 struct super_block *sb;
39
40 spin_lock(&sb_lock);
41restart:
42 list_for_each_entry(sb, &super_blocks, s_list) {
43 sb->s_count++;
44 spin_unlock(&sb_lock);
45 down_read(&sb->s_umount);
46 if (sb->s_root)
47 drop_pagecache_sb(sb);
48 up_read(&sb->s_umount);
49 spin_lock(&sb_lock);
50 if (__put_super_and_need_restart(sb))
51 goto restart;
52 }
53 spin_unlock(&sb_lock);
54}
55
56static void drop_slab(void) 36static void drop_slab(void)
57{ 37{
58 int nr_objects; 38 int nr_objects;
@@ -68,7 +48,7 @@ int drop_caches_sysctl_handler(ctl_table *table, int write,
68 proc_dointvec_minmax(table, write, buffer, length, ppos); 48 proc_dointvec_minmax(table, write, buffer, length, ppos);
69 if (write) { 49 if (write) {
70 if (sysctl_drop_caches & 1) 50 if (sysctl_drop_caches & 1)
71 drop_pagecache(); 51 iterate_supers(drop_pagecache_sb, NULL);
72 if (sysctl_drop_caches & 2) 52 if (sysctl_drop_caches & 2)
73 drop_slab(); 53 drop_slab();
74 } 54 }
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index bfc2e0f78f00..0032a9f5a3a9 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -731,15 +731,14 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
731int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, 731int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
732 struct page *page_for_lower, 732 struct page *page_for_lower,
733 size_t offset_in_page, size_t size); 733 size_t offset_in_page, size_t size);
734int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, 734int ecryptfs_write(struct inode *inode, char *data, loff_t offset, size_t size);
735 size_t size);
736int ecryptfs_read_lower(char *data, loff_t offset, size_t size, 735int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
737 struct inode *ecryptfs_inode); 736 struct inode *ecryptfs_inode);
738int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, 737int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
739 pgoff_t page_index, 738 pgoff_t page_index,
740 size_t offset_in_page, size_t size, 739 size_t offset_in_page, size_t size,
741 struct inode *ecryptfs_inode); 740 struct inode *ecryptfs_inode);
742struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); 741struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index);
743int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); 742int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
744int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, 743int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
745 struct user_namespace *user_ns); 744 struct user_namespace *user_ns);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index e7440a6f5ebf..3bdddbcc785f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -276,9 +276,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
276static int 276static int
277ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) 277ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
278{ 278{
279 return vfs_fsync(ecryptfs_file_to_lower(file), 279 return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
280 ecryptfs_dentry_to_lower(dentry),
281 datasync);
282} 280}
283 281
284static int ecryptfs_fasync(int fd, struct file *file, int flag) 282static int ecryptfs_fasync(int fd, struct file *file, int flag)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e2d4418affac..65dee2f336ae 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -142,19 +142,10 @@ out:
142static int grow_file(struct dentry *ecryptfs_dentry) 142static int grow_file(struct dentry *ecryptfs_dentry)
143{ 143{
144 struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; 144 struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
145 struct file fake_file;
146 struct ecryptfs_file_info tmp_file_info;
147 char zero_virt[] = { 0x00 }; 145 char zero_virt[] = { 0x00 };
148 int rc = 0; 146 int rc = 0;
149 147
150 memset(&fake_file, 0, sizeof(fake_file)); 148 rc = ecryptfs_write(ecryptfs_inode, zero_virt, 0, 1);
151 fake_file.f_path.dentry = ecryptfs_dentry;
152 memset(&tmp_file_info, 0, sizeof(tmp_file_info));
153 ecryptfs_set_file_private(&fake_file, &tmp_file_info);
154 ecryptfs_set_file_lower(
155 &fake_file,
156 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file);
157 rc = ecryptfs_write(&fake_file, zero_virt, 0, 1);
158 i_size_write(ecryptfs_inode, 0); 149 i_size_write(ecryptfs_inode, 0);
159 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); 150 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
160 ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |= 151 ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |=
@@ -784,8 +775,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
784{ 775{
785 int rc = 0; 776 int rc = 0;
786 struct inode *inode = dentry->d_inode; 777 struct inode *inode = dentry->d_inode;
787 struct dentry *lower_dentry;
788 struct file fake_ecryptfs_file;
789 struct ecryptfs_crypt_stat *crypt_stat; 778 struct ecryptfs_crypt_stat *crypt_stat;
790 loff_t i_size = i_size_read(inode); 779 loff_t i_size = i_size_read(inode);
791 loff_t lower_size_before_truncate; 780 loff_t lower_size_before_truncate;
@@ -796,23 +785,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
796 goto out; 785 goto out;
797 } 786 }
798 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 787 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
799 /* Set up a fake ecryptfs file, this is used to interface with
800 * the file in the underlying filesystem so that the
801 * truncation has an effect there as well. */
802 memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file));
803 fake_ecryptfs_file.f_path.dentry = dentry;
804 /* Released at out_free: label */
805 ecryptfs_set_file_private(&fake_ecryptfs_file,
806 kmem_cache_alloc(ecryptfs_file_info_cache,
807 GFP_KERNEL));
808 if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) {
809 rc = -ENOMEM;
810 goto out;
811 }
812 lower_dentry = ecryptfs_dentry_to_lower(dentry);
813 ecryptfs_set_file_lower(
814 &fake_ecryptfs_file,
815 ecryptfs_inode_to_private(dentry->d_inode)->lower_file);
816 /* Switch on growing or shrinking file */ 788 /* Switch on growing or shrinking file */
817 if (ia->ia_size > i_size) { 789 if (ia->ia_size > i_size) {
818 char zero[] = { 0x00 }; 790 char zero[] = { 0x00 };
@@ -822,7 +794,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
822 * this triggers code that will fill in 0's throughout 794 * this triggers code that will fill in 0's throughout
823 * the intermediate portion of the previous end of the 795 * the intermediate portion of the previous end of the
824 * file and the new and of the file */ 796 * file and the new and of the file */
825 rc = ecryptfs_write(&fake_ecryptfs_file, zero, 797 rc = ecryptfs_write(inode, zero,
826 (ia->ia_size - 1), 1); 798 (ia->ia_size - 1), 1);
827 } else { /* ia->ia_size < i_size_read(inode) */ 799 } else { /* ia->ia_size < i_size_read(inode) */
828 /* We're chopping off all the pages down to the page 800 /* We're chopping off all the pages down to the page
@@ -835,10 +807,10 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
835 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 807 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
836 rc = vmtruncate(inode, ia->ia_size); 808 rc = vmtruncate(inode, ia->ia_size);
837 if (rc) 809 if (rc)
838 goto out_free; 810 goto out;
839 lower_ia->ia_size = ia->ia_size; 811 lower_ia->ia_size = ia->ia_size;
840 lower_ia->ia_valid |= ATTR_SIZE; 812 lower_ia->ia_valid |= ATTR_SIZE;
841 goto out_free; 813 goto out;
842 } 814 }
843 if (num_zeros) { 815 if (num_zeros) {
844 char *zeros_virt; 816 char *zeros_virt;
@@ -846,16 +818,16 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
846 zeros_virt = kzalloc(num_zeros, GFP_KERNEL); 818 zeros_virt = kzalloc(num_zeros, GFP_KERNEL);
847 if (!zeros_virt) { 819 if (!zeros_virt) {
848 rc = -ENOMEM; 820 rc = -ENOMEM;
849 goto out_free; 821 goto out;
850 } 822 }
851 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt, 823 rc = ecryptfs_write(inode, zeros_virt,
852 ia->ia_size, num_zeros); 824 ia->ia_size, num_zeros);
853 kfree(zeros_virt); 825 kfree(zeros_virt);
854 if (rc) { 826 if (rc) {
855 printk(KERN_ERR "Error attempting to zero out " 827 printk(KERN_ERR "Error attempting to zero out "
856 "the remainder of the end page on " 828 "the remainder of the end page on "
857 "reducing truncate; rc = [%d]\n", rc); 829 "reducing truncate; rc = [%d]\n", rc);
858 goto out_free; 830 goto out;
859 } 831 }
860 } 832 }
861 vmtruncate(inode, ia->ia_size); 833 vmtruncate(inode, ia->ia_size);
@@ -864,7 +836,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
864 printk(KERN_ERR "Problem with " 836 printk(KERN_ERR "Problem with "
865 "ecryptfs_write_inode_size_to_metadata; " 837 "ecryptfs_write_inode_size_to_metadata; "
866 "rc = [%d]\n", rc); 838 "rc = [%d]\n", rc);
867 goto out_free; 839 goto out;
868 } 840 }
869 /* We are reducing the size of the ecryptfs file, and need to 841 /* We are reducing the size of the ecryptfs file, and need to
870 * know if we need to reduce the size of the lower file. */ 842 * know if we need to reduce the size of the lower file. */
@@ -878,10 +850,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
878 } else 850 } else
879 lower_ia->ia_valid &= ~ATTR_SIZE; 851 lower_ia->ia_valid &= ~ATTR_SIZE;
880 } 852 }
881out_free:
882 if (ecryptfs_file_to_private(&fake_ecryptfs_file))
883 kmem_cache_free(ecryptfs_file_info_cache,
884 ecryptfs_file_to_private(&fake_ecryptfs_file));
885out: 853out:
886 return rc; 854 return rc;
887} 855}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 760983d0f25e..cbd4e18adb20 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -281,7 +281,7 @@ static void ecryptfs_init_mount_crypt_stat(
281 * 281 *
282 * Returns zero on success; non-zero on error 282 * Returns zero on success; non-zero on error
283 */ 283 */
284static int ecryptfs_parse_options(struct super_block *sb, char *options) 284static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
285{ 285{
286 char *p; 286 char *p;
287 int rc = 0; 287 int rc = 0;
@@ -293,7 +293,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
293 int fn_cipher_key_bytes; 293 int fn_cipher_key_bytes;
294 int fn_cipher_key_bytes_set = 0; 294 int fn_cipher_key_bytes_set = 0;
295 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 295 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
296 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; 296 &sbi->mount_crypt_stat;
297 substring_t args[MAX_OPT_ARGS]; 297 substring_t args[MAX_OPT_ARGS];
298 int token; 298 int token;
299 char *sig_src; 299 char *sig_src;
@@ -483,68 +483,7 @@ out:
483} 483}
484 484
485struct kmem_cache *ecryptfs_sb_info_cache; 485struct kmem_cache *ecryptfs_sb_info_cache;
486 486static struct file_system_type ecryptfs_fs_type;
487/**
488 * ecryptfs_fill_super
489 * @sb: The ecryptfs super block
490 * @raw_data: The options passed to mount
491 * @silent: Not used but required by function prototype
492 *
493 * Sets up what we can of the sb, rest is done in ecryptfs_read_super
494 *
495 * Returns zero on success; non-zero otherwise
496 */
497static int
498ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
499{
500 struct ecryptfs_sb_info *esi;
501 int rc = 0;
502
503 /* Released in ecryptfs_put_super() */
504 ecryptfs_set_superblock_private(sb,
505 kmem_cache_zalloc(ecryptfs_sb_info_cache,
506 GFP_KERNEL));
507 esi = ecryptfs_superblock_to_private(sb);
508 if (!esi) {
509 ecryptfs_printk(KERN_WARNING, "Out of memory\n");
510 rc = -ENOMEM;
511 goto out;
512 }
513
514 rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
515 if (rc)
516 goto out;
517
518 sb->s_bdi = &esi->bdi;
519 sb->s_op = &ecryptfs_sops;
520 /* Released through deactivate_super(sb) from get_sb_nodev */
521 sb->s_root = d_alloc(NULL, &(const struct qstr) {
522 .hash = 0,.name = "/",.len = 1});
523 if (!sb->s_root) {
524 ecryptfs_printk(KERN_ERR, "d_alloc failed\n");
525 rc = -ENOMEM;
526 goto out;
527 }
528 sb->s_root->d_op = &ecryptfs_dops;
529 sb->s_root->d_sb = sb;
530 sb->s_root->d_parent = sb->s_root;
531 /* Released in d_release when dput(sb->s_root) is called */
532 /* through deactivate_super(sb) from get_sb_nodev() */
533 ecryptfs_set_dentry_private(sb->s_root,
534 kmem_cache_zalloc(ecryptfs_dentry_info_cache,
535 GFP_KERNEL));
536 if (!ecryptfs_dentry_to_private(sb->s_root)) {
537 ecryptfs_printk(KERN_ERR,
538 "dentry_info_cache alloc failed\n");
539 rc = -ENOMEM;
540 goto out;
541 }
542 rc = 0;
543out:
544 /* Should be able to rely on deactivate_super called from
545 * get_sb_nodev */
546 return rc;
547}
548 487
549/** 488/**
550 * ecryptfs_read_super 489 * ecryptfs_read_super
@@ -565,6 +504,13 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
565 ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); 504 ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n");
566 goto out; 505 goto out;
567 } 506 }
507 if (path.dentry->d_sb->s_type == &ecryptfs_fs_type) {
508 rc = -EINVAL;
509 printk(KERN_ERR "Mount on filesystem of type "
510 "eCryptfs explicitly disallowed due to "
511 "known incompatibilities\n");
512 goto out_free;
513 }
568 ecryptfs_set_superblock_lower(sb, path.dentry->d_sb); 514 ecryptfs_set_superblock_lower(sb, path.dentry->d_sb);
569 sb->s_maxbytes = path.dentry->d_sb->s_maxbytes; 515 sb->s_maxbytes = path.dentry->d_sb->s_maxbytes;
570 sb->s_blocksize = path.dentry->d_sb->s_blocksize; 516 sb->s_blocksize = path.dentry->d_sb->s_blocksize;
@@ -588,11 +534,8 @@ out:
588 * @dev_name: The path to mount over 534 * @dev_name: The path to mount over
589 * @raw_data: The options passed into the kernel 535 * @raw_data: The options passed into the kernel
590 * 536 *
591 * The whole ecryptfs_get_sb process is broken into 4 functions: 537 * The whole ecryptfs_get_sb process is broken into 3 functions:
592 * ecryptfs_parse_options(): handle options passed to ecryptfs, if any 538 * ecryptfs_parse_options(): handle options passed to ecryptfs, if any
593 * ecryptfs_fill_super(): used by get_sb_nodev, fills out the super_block
594 * with as much information as it can before needing
595 * the lower filesystem.
596 * ecryptfs_read_super(): this accesses the lower filesystem and uses 539 * ecryptfs_read_super(): this accesses the lower filesystem and uses
597 * ecryptfs_interpose to perform most of the linking 540 * ecryptfs_interpose to perform most of the linking
598 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c) 541 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
@@ -601,30 +544,78 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
601 const char *dev_name, void *raw_data, 544 const char *dev_name, void *raw_data,
602 struct vfsmount *mnt) 545 struct vfsmount *mnt)
603{ 546{
547 struct super_block *s;
548 struct ecryptfs_sb_info *sbi;
549 struct ecryptfs_dentry_info *root_info;
550 const char *err = "Getting sb failed";
604 int rc; 551 int rc;
605 struct super_block *sb;
606 552
607 rc = get_sb_nodev(fs_type, flags, raw_data, ecryptfs_fill_super, mnt); 553 sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL);
608 if (rc < 0) { 554 if (!sbi) {
609 printk(KERN_ERR "Getting sb failed; rc = [%d]\n", rc); 555 rc = -ENOMEM;
610 goto out; 556 goto out;
611 } 557 }
612 sb = mnt->mnt_sb; 558
613 rc = ecryptfs_parse_options(sb, raw_data); 559 rc = ecryptfs_parse_options(sbi, raw_data);
614 if (rc) { 560 if (rc) {
615 printk(KERN_ERR "Error parsing options; rc = [%d]\n", rc); 561 err = "Error parsing options";
616 goto out_abort; 562 goto out;
563 }
564
565 s = sget(fs_type, NULL, set_anon_super, NULL);
566 if (IS_ERR(s)) {
567 rc = PTR_ERR(s);
568 goto out;
617 } 569 }
618 rc = ecryptfs_read_super(sb, dev_name); 570
571 s->s_flags = flags;
572 rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
619 if (rc) { 573 if (rc) {
620 printk(KERN_ERR "Reading sb failed; rc = [%d]\n", rc); 574 deactivate_locked_super(s);
621 goto out_abort; 575 goto out;
622 } 576 }
623 goto out; 577
624out_abort: 578 ecryptfs_set_superblock_private(s, sbi);
625 dput(sb->s_root); /* aka mnt->mnt_root, as set by get_sb_nodev() */ 579 s->s_bdi = &sbi->bdi;
626 deactivate_locked_super(sb); 580
581 /* ->kill_sb() will take care of sbi after that point */
582 sbi = NULL;
583 s->s_op = &ecryptfs_sops;
584
585 rc = -ENOMEM;
586 s->s_root = d_alloc(NULL, &(const struct qstr) {
587 .hash = 0,.name = "/",.len = 1});
588 if (!s->s_root) {
589 deactivate_locked_super(s);
590 goto out;
591 }
592 s->s_root->d_op = &ecryptfs_dops;
593 s->s_root->d_sb = s;
594 s->s_root->d_parent = s->s_root;
595
596 root_info = kmem_cache_zalloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
597 if (!root_info) {
598 deactivate_locked_super(s);
599 goto out;
600 }
601 /* ->kill_sb() will take care of root_info */
602 ecryptfs_set_dentry_private(s->s_root, root_info);
603 s->s_flags |= MS_ACTIVE;
604 rc = ecryptfs_read_super(s, dev_name);
605 if (rc) {
606 deactivate_locked_super(s);
607 err = "Reading sb failed";
608 goto out;
609 }
610 simple_set_mnt(mnt, s);
611 return 0;
612
627out: 613out:
614 if (sbi) {
615 ecryptfs_destroy_mount_crypt_stat(&sbi->mount_crypt_stat);
616 kmem_cache_free(ecryptfs_sb_info_cache, sbi);
617 }
618 printk(KERN_ERR "%s; rc = [%d]\n", err, rc);
628 return rc; 619 return rc;
629} 620}
630 621
@@ -633,11 +624,16 @@ out:
633 * @sb: The ecryptfs super block 624 * @sb: The ecryptfs super block
634 * 625 *
635 * Used to bring the superblock down and free the private data. 626 * Used to bring the superblock down and free the private data.
636 * Private data is free'd in ecryptfs_put_super()
637 */ 627 */
638static void ecryptfs_kill_block_super(struct super_block *sb) 628static void ecryptfs_kill_block_super(struct super_block *sb)
639{ 629{
640 generic_shutdown_super(sb); 630 struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
631 kill_anon_super(sb);
632 if (!sb_info)
633 return;
634 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
635 bdi_destroy(&sb_info->bdi);
636 kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
641} 637}
642 638
643static struct file_system_type ecryptfs_fs_type = { 639static struct file_system_type ecryptfs_fs_type = {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 2ee9a3a7b68c..b1d82756544b 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -44,17 +44,9 @@
44 * Returns locked and up-to-date page (if ok), with increased 44 * Returns locked and up-to-date page (if ok), with increased
45 * refcnt. 45 * refcnt.
46 */ 46 */
47struct page *ecryptfs_get_locked_page(struct file *file, loff_t index) 47struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index)
48{ 48{
49 struct dentry *dentry; 49 struct page *page = read_mapping_page(inode->i_mapping, index, NULL);
50 struct inode *inode;
51 struct address_space *mapping;
52 struct page *page;
53
54 dentry = file->f_path.dentry;
55 inode = dentry->d_inode;
56 mapping = inode->i_mapping;
57 page = read_mapping_page(mapping, index, (void *)file);
58 if (!IS_ERR(page)) 50 if (!IS_ERR(page))
59 lock_page(page); 51 lock_page(page);
60 return page; 52 return page;
@@ -198,7 +190,7 @@ out:
198static int ecryptfs_readpage(struct file *file, struct page *page) 190static int ecryptfs_readpage(struct file *file, struct page *page)
199{ 191{
200 struct ecryptfs_crypt_stat *crypt_stat = 192 struct ecryptfs_crypt_stat *crypt_stat =
201 &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; 193 &ecryptfs_inode_to_private(page->mapping->host)->crypt_stat;
202 int rc = 0; 194 int rc = 0;
203 195
204 if (!crypt_stat 196 if (!crypt_stat
@@ -300,8 +292,7 @@ static int ecryptfs_write_begin(struct file *file,
300 292
301 if (!PageUptodate(page)) { 293 if (!PageUptodate(page)) {
302 struct ecryptfs_crypt_stat *crypt_stat = 294 struct ecryptfs_crypt_stat *crypt_stat =
303 &ecryptfs_inode_to_private( 295 &ecryptfs_inode_to_private(mapping->host)->crypt_stat;
304 file->f_path.dentry->d_inode)->crypt_stat;
305 296
306 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) 297 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
307 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { 298 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
@@ -487,7 +478,7 @@ static int ecryptfs_write_end(struct file *file,
487 unsigned to = from + copied; 478 unsigned to = from + copied;
488 struct inode *ecryptfs_inode = mapping->host; 479 struct inode *ecryptfs_inode = mapping->host;
489 struct ecryptfs_crypt_stat *crypt_stat = 480 struct ecryptfs_crypt_stat *crypt_stat =
490 &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; 481 &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
491 int rc; 482 int rc;
492 483
493 if (crypt_stat->flags & ECRYPTFS_NEW_FILE) { 484 if (crypt_stat->flags & ECRYPTFS_NEW_FILE) {
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 0cc4fafd6552..db184ef15d3d 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -93,7 +93,7 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
93 93
94/** 94/**
95 * ecryptfs_write 95 * ecryptfs_write
96 * @ecryptfs_file: The eCryptfs file into which to write 96 * @ecryptfs_inode: The eCryptfs file into which to write
97 * @data: Virtual address where data to write is located 97 * @data: Virtual address where data to write is located
98 * @offset: Offset in the eCryptfs file at which to begin writing the 98 * @offset: Offset in the eCryptfs file at which to begin writing the
99 * data from @data 99 * data from @data
@@ -109,12 +109,11 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
109 * 109 *
110 * Returns zero on success; non-zero otherwise 110 * Returns zero on success; non-zero otherwise
111 */ 111 */
112int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, 112int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
113 size_t size) 113 size_t size)
114{ 114{
115 struct page *ecryptfs_page; 115 struct page *ecryptfs_page;
116 struct ecryptfs_crypt_stat *crypt_stat; 116 struct ecryptfs_crypt_stat *crypt_stat;
117 struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode;
118 char *ecryptfs_page_virt; 117 char *ecryptfs_page_virt;
119 loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); 118 loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode);
120 loff_t data_offset = 0; 119 loff_t data_offset = 0;
@@ -145,7 +144,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
145 if (num_bytes > total_remaining_zeros) 144 if (num_bytes > total_remaining_zeros)
146 num_bytes = total_remaining_zeros; 145 num_bytes = total_remaining_zeros;
147 } 146 }
148 ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file, 147 ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode,
149 ecryptfs_page_idx); 148 ecryptfs_page_idx);
150 if (IS_ERR(ecryptfs_page)) { 149 if (IS_ERR(ecryptfs_page)) {
151 rc = PTR_ERR(ecryptfs_page); 150 rc = PTR_ERR(ecryptfs_page);
@@ -302,10 +301,10 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
302int ecryptfs_read(char *data, loff_t offset, size_t size, 301int ecryptfs_read(char *data, loff_t offset, size_t size,
303 struct file *ecryptfs_file) 302 struct file *ecryptfs_file)
304{ 303{
304 struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode;
305 struct page *ecryptfs_page; 305 struct page *ecryptfs_page;
306 char *ecryptfs_page_virt; 306 char *ecryptfs_page_virt;
307 loff_t ecryptfs_file_size = 307 loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode);
308 i_size_read(ecryptfs_file->f_dentry->d_inode);
309 loff_t data_offset = 0; 308 loff_t data_offset = 0;
310 loff_t pos; 309 loff_t pos;
311 int rc = 0; 310 int rc = 0;
@@ -327,7 +326,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
327 326
328 if (num_bytes > total_remaining_bytes) 327 if (num_bytes > total_remaining_bytes)
329 num_bytes = total_remaining_bytes; 328 num_bytes = total_remaining_bytes;
330 ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file, 329 ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode,
331 ecryptfs_page_idx); 330 ecryptfs_page_idx);
332 if (IS_ERR(ecryptfs_page)) { 331 if (IS_ERR(ecryptfs_page)) {
333 rc = PTR_ERR(ecryptfs_page); 332 rc = PTR_ERR(ecryptfs_page);
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 0c0ae491d231..0435886e4a9f 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -109,27 +109,6 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
109} 109}
110 110
111/** 111/**
112 * ecryptfs_put_super
113 * @sb: Pointer to the ecryptfs super block
114 *
115 * Final actions when unmounting a file system.
116 * This will handle deallocation and release of our private data.
117 */
118static void ecryptfs_put_super(struct super_block *sb)
119{
120 struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
121
122 lock_kernel();
123
124 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
125 bdi_destroy(&sb_info->bdi);
126 kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
127 ecryptfs_set_superblock_private(sb, NULL);
128
129 unlock_kernel();
130}
131
132/**
133 * ecryptfs_statfs 112 * ecryptfs_statfs
134 * @sb: The ecryptfs super block 113 * @sb: The ecryptfs super block
135 * @buf: The struct kstatfs to fill in with stats 114 * @buf: The struct kstatfs to fill in with stats
@@ -203,7 +182,6 @@ const struct super_operations ecryptfs_sops = {
203 .alloc_inode = ecryptfs_alloc_inode, 182 .alloc_inode = ecryptfs_alloc_inode,
204 .destroy_inode = ecryptfs_destroy_inode, 183 .destroy_inode = ecryptfs_destroy_inode,
205 .drop_inode = generic_delete_inode, 184 .drop_inode = generic_delete_inode,
206 .put_super = ecryptfs_put_super,
207 .statfs = ecryptfs_statfs, 185 .statfs = ecryptfs_statfs,
208 .remount_fs = NULL, 186 .remount_fs = NULL,
209 .clear_inode = ecryptfs_clear_inode, 187 .clear_inode = ecryptfs_clear_inode,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index bd056a5b4efc..3817149919cb 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1140,8 +1140,7 @@ retry:
1140 * ep_poll_callback() when events will become available. 1140 * ep_poll_callback() when events will become available.
1141 */ 1141 */
1142 init_waitqueue_entry(&wait, current); 1142 init_waitqueue_entry(&wait, current);
1143 wait.flags |= WQ_FLAG_EXCLUSIVE; 1143 __add_wait_queue_exclusive(&ep->wq, &wait);
1144 __add_wait_queue(&ep->wq, &wait);
1145 1144
1146 for (;;) { 1145 for (;;) {
1147 /* 1146 /*
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 4cfab1cc75c0..d91e9d829bc1 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -608,7 +608,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
608 de->inode_no = cpu_to_le64(parent->i_ino); 608 de->inode_no = cpu_to_le64(parent->i_ino);
609 memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); 609 memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR));
610 exofs_set_de_type(de, inode); 610 exofs_set_de_type(de, inode);
611 kunmap_atomic(page, KM_USER0); 611 kunmap_atomic(kaddr, KM_USER0);
612 err = exofs_commit_chunk(page, 0, chunk_size); 612 err = exofs_commit_chunk(page, 0, chunk_size);
613fail: 613fail:
614 page_cache_release(page); 614 page_cache_release(page);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 76d2a79ef93e..4bb6ef822e46 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -755,6 +755,21 @@ static int exofs_write_end(struct file *file, struct address_space *mapping,
755 return ret; 755 return ret;
756} 756}
757 757
758static int exofs_releasepage(struct page *page, gfp_t gfp)
759{
760 EXOFS_DBGMSG("page 0x%lx\n", page->index);
761 WARN_ON(1);
762 return try_to_free_buffers(page);
763}
764
765static void exofs_invalidatepage(struct page *page, unsigned long offset)
766{
767 EXOFS_DBGMSG("page_has_buffers=>%d\n", page_has_buffers(page));
768 WARN_ON(1);
769
770 block_invalidatepage(page, offset);
771}
772
758const struct address_space_operations exofs_aops = { 773const struct address_space_operations exofs_aops = {
759 .readpage = exofs_readpage, 774 .readpage = exofs_readpage,
760 .readpages = exofs_readpages, 775 .readpages = exofs_readpages,
@@ -762,6 +777,21 @@ const struct address_space_operations exofs_aops = {
762 .writepages = exofs_writepages, 777 .writepages = exofs_writepages,
763 .write_begin = exofs_write_begin_export, 778 .write_begin = exofs_write_begin_export,
764 .write_end = exofs_write_end, 779 .write_end = exofs_write_end,
780 .releasepage = exofs_releasepage,
781 .set_page_dirty = __set_page_dirty_nobuffers,
782 .invalidatepage = exofs_invalidatepage,
783
784 /* Not implemented Yet */
785 .bmap = NULL, /* TODO: use osd's OSD_ACT_READ_MAP */
786 .direct_IO = NULL, /* TODO: Should be trivial to do */
787
788 /* With these NULL has special meaning or default is not exported */
789 .sync_page = NULL,
790 .get_xip_mem = NULL,
791 .migratepage = NULL,
792 .launder_page = NULL,
793 .is_partially_uptodate = NULL,
794 .error_remove_page = NULL,
765}; 795};
766 796
767/****************************************************************************** 797/******************************************************************************
@@ -1123,16 +1153,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1123 sbi = sb->s_fs_info; 1153 sbi = sb->s_fs_info;
1124 1154
1125 sb->s_dirt = 1; 1155 sb->s_dirt = 1;
1126 inode->i_uid = current->cred->fsuid; 1156 inode_init_owner(inode, dir, mode);
1127 if (dir->i_mode & S_ISGID) {
1128 inode->i_gid = dir->i_gid;
1129 if (S_ISDIR(mode))
1130 mode |= S_ISGID;
1131 } else {
1132 inode->i_gid = current->cred->fsgid;
1133 }
1134 inode->i_mode = mode;
1135
1136 inode->i_ino = sbi->s_nextid++; 1157 inode->i_ino = sbi->s_nextid++;
1137 inode->i_blkbits = EXOFS_BLKSHIFT; 1158 inode->i_blkbits = EXOFS_BLKSHIFT;
1138 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1159 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index a99e54318c3d..ca7e2a0ed98a 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -420,7 +420,7 @@ release_and_out:
420 return error; 420 return error;
421} 421}
422 422
423struct xattr_handler ext2_xattr_acl_access_handler = { 423const struct xattr_handler ext2_xattr_acl_access_handler = {
424 .prefix = POSIX_ACL_XATTR_ACCESS, 424 .prefix = POSIX_ACL_XATTR_ACCESS,
425 .flags = ACL_TYPE_ACCESS, 425 .flags = ACL_TYPE_ACCESS,
426 .list = ext2_xattr_list_acl_access, 426 .list = ext2_xattr_list_acl_access,
@@ -428,7 +428,7 @@ struct xattr_handler ext2_xattr_acl_access_handler = {
428 .set = ext2_xattr_set_acl, 428 .set = ext2_xattr_set_acl,
429}; 429};
430 430
431struct xattr_handler ext2_xattr_acl_default_handler = { 431const struct xattr_handler ext2_xattr_acl_default_handler = {
432 .prefix = POSIX_ACL_XATTR_DEFAULT, 432 .prefix = POSIX_ACL_XATTR_DEFAULT,
433 .flags = ACL_TYPE_DEFAULT, 433 .flags = ACL_TYPE_DEFAULT,
434 .list = ext2_xattr_list_acl_default, 434 .list = ext2_xattr_list_acl_default,
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 3cf038c055d7..e8766a396776 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1332,6 +1332,12 @@ retry_alloc:
1332 1332
1333 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1333 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1334 /* 1334 /*
1335 * skip this group (and avoid loading bitmap) if there
1336 * are no free blocks
1337 */
1338 if (!free_blocks)
1339 continue;
1340 /*
1335 * skip this group if the number of 1341 * skip this group if the number of
1336 * free blocks is less than half of the reservation 1342 * free blocks is less than half of the reservation
1337 * window size. 1343 * window size.
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ad7d572ee8dc..938dbc739d00 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -106,7 +106,7 @@ void ext2_free_inode (struct inode * inode)
106 struct super_block * sb = inode->i_sb; 106 struct super_block * sb = inode->i_sb;
107 int is_directory; 107 int is_directory;
108 unsigned long ino; 108 unsigned long ino;
109 struct buffer_head *bitmap_bh = NULL; 109 struct buffer_head *bitmap_bh;
110 unsigned long block_group; 110 unsigned long block_group;
111 unsigned long bit; 111 unsigned long bit;
112 struct ext2_super_block * es; 112 struct ext2_super_block * es;
@@ -135,14 +135,13 @@ void ext2_free_inode (struct inode * inode)
135 ino > le32_to_cpu(es->s_inodes_count)) { 135 ino > le32_to_cpu(es->s_inodes_count)) {
136 ext2_error (sb, "ext2_free_inode", 136 ext2_error (sb, "ext2_free_inode",
137 "reserved or nonexistent inode %lu", ino); 137 "reserved or nonexistent inode %lu", ino);
138 goto error_return; 138 return;
139 } 139 }
140 block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); 140 block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
141 bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb); 141 bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
142 brelse(bitmap_bh);
143 bitmap_bh = read_inode_bitmap(sb, block_group); 142 bitmap_bh = read_inode_bitmap(sb, block_group);
144 if (!bitmap_bh) 143 if (!bitmap_bh)
145 goto error_return; 144 return;
146 145
147 /* Ok, now we can actually update the inode bitmaps.. */ 146 /* Ok, now we can actually update the inode bitmaps.. */
148 if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group), 147 if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group),
@@ -154,7 +153,7 @@ void ext2_free_inode (struct inode * inode)
154 mark_buffer_dirty(bitmap_bh); 153 mark_buffer_dirty(bitmap_bh);
155 if (sb->s_flags & MS_SYNCHRONOUS) 154 if (sb->s_flags & MS_SYNCHRONOUS)
156 sync_dirty_buffer(bitmap_bh); 155 sync_dirty_buffer(bitmap_bh);
157error_return: 156
158 brelse(bitmap_bh); 157 brelse(bitmap_bh);
159} 158}
160 159
@@ -550,16 +549,12 @@ got:
550 549
551 sb->s_dirt = 1; 550 sb->s_dirt = 1;
552 mark_buffer_dirty(bh2); 551 mark_buffer_dirty(bh2);
553 inode->i_uid = current_fsuid(); 552 if (test_opt(sb, GRPID)) {
554 if (test_opt (sb, GRPID)) 553 inode->i_mode = mode;
555 inode->i_gid = dir->i_gid; 554 inode->i_uid = current_fsuid();
556 else if (dir->i_mode & S_ISGID) {
557 inode->i_gid = dir->i_gid; 555 inode->i_gid = dir->i_gid;
558 if (S_ISDIR(mode))
559 mode |= S_ISGID;
560 } else 556 } else
561 inode->i_gid = current_fsgid(); 557 inode_init_owner(inode, dir, mode);
562 inode->i_mode = mode;
563 558
564 inode->i_ino = ino; 559 inode->i_ino = ino;
565 inode->i_blocks = 0; 560 inode->i_blocks = 0;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fc13cc119aad..527c46d9bc1f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -22,7 +22,6 @@
22 * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 22 * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000
23 */ 23 */
24 24
25#include <linux/smp_lock.h>
26#include <linux/time.h> 25#include <linux/time.h>
27#include <linux/highuid.h> 26#include <linux/highuid.h>
28#include <linux/pagemap.h> 27#include <linux/pagemap.h>
@@ -1406,11 +1405,11 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
1406 /* If this is the first large file 1405 /* If this is the first large file
1407 * created, add a flag to the superblock. 1406 * created, add a flag to the superblock.
1408 */ 1407 */
1409 lock_kernel(); 1408 spin_lock(&EXT2_SB(sb)->s_lock);
1410 ext2_update_dynamic_rev(sb); 1409 ext2_update_dynamic_rev(sb);
1411 EXT2_SET_RO_COMPAT_FEATURE(sb, 1410 EXT2_SET_RO_COMPAT_FEATURE(sb,
1412 EXT2_FEATURE_RO_COMPAT_LARGE_FILE); 1411 EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
1413 unlock_kernel(); 1412 spin_unlock(&EXT2_SB(sb)->s_lock);
1414 ext2_write_super(sb); 1413 ext2_write_super(sb);
1415 } 1414 }
1416 } 1415 }
@@ -1467,7 +1466,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1467 if (error) 1466 if (error)
1468 return error; 1467 return error;
1469 1468
1470 if (iattr->ia_valid & ATTR_SIZE) 1469 if (is_quota_modification(inode, iattr))
1471 dquot_initialize(inode); 1470 dquot_initialize(inode);
1472 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || 1471 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
1473 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { 1472 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 42e4a303b675..71e9eb1fa696 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -26,7 +26,6 @@
26#include <linux/random.h> 26#include <linux/random.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/exportfs.h> 28#include <linux/exportfs.h>
29#include <linux/smp_lock.h>
30#include <linux/vfs.h> 29#include <linux/vfs.h>
31#include <linux/seq_file.h> 30#include <linux/seq_file.h>
32#include <linux/mount.h> 31#include <linux/mount.h>
@@ -39,7 +38,7 @@
39#include "xip.h" 38#include "xip.h"
40 39
41static void ext2_sync_super(struct super_block *sb, 40static void ext2_sync_super(struct super_block *sb,
42 struct ext2_super_block *es); 41 struct ext2_super_block *es, int wait);
43static int ext2_remount (struct super_block * sb, int * flags, char * data); 42static int ext2_remount (struct super_block * sb, int * flags, char * data);
44static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); 43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
45static int ext2_sync_fs(struct super_block *sb, int wait); 44static int ext2_sync_fs(struct super_block *sb, int wait);
@@ -52,9 +51,11 @@ void ext2_error (struct super_block * sb, const char * function,
52 struct ext2_super_block *es = sbi->s_es; 51 struct ext2_super_block *es = sbi->s_es;
53 52
54 if (!(sb->s_flags & MS_RDONLY)) { 53 if (!(sb->s_flags & MS_RDONLY)) {
54 spin_lock(&sbi->s_lock);
55 sbi->s_mount_state |= EXT2_ERROR_FS; 55 sbi->s_mount_state |= EXT2_ERROR_FS;
56 es->s_state |= cpu_to_le16(EXT2_ERROR_FS); 56 es->s_state |= cpu_to_le16(EXT2_ERROR_FS);
57 ext2_sync_super(sb, es); 57 spin_unlock(&sbi->s_lock);
58 ext2_sync_super(sb, es, 1);
58 } 59 }
59 60
60 va_start(args, fmt); 61 va_start(args, fmt);
@@ -84,6 +85,9 @@ void ext2_msg(struct super_block *sb, const char *prefix,
84 va_end(args); 85 va_end(args);
85} 86}
86 87
88/*
89 * This must be called with sbi->s_lock held.
90 */
87void ext2_update_dynamic_rev(struct super_block *sb) 91void ext2_update_dynamic_rev(struct super_block *sb)
88{ 92{
89 struct ext2_super_block *es = EXT2_SB(sb)->s_es; 93 struct ext2_super_block *es = EXT2_SB(sb)->s_es;
@@ -115,8 +119,6 @@ static void ext2_put_super (struct super_block * sb)
115 int i; 119 int i;
116 struct ext2_sb_info *sbi = EXT2_SB(sb); 120 struct ext2_sb_info *sbi = EXT2_SB(sb);
117 121
118 lock_kernel();
119
120 if (sb->s_dirt) 122 if (sb->s_dirt)
121 ext2_write_super(sb); 123 ext2_write_super(sb);
122 124
@@ -124,8 +126,10 @@ static void ext2_put_super (struct super_block * sb)
124 if (!(sb->s_flags & MS_RDONLY)) { 126 if (!(sb->s_flags & MS_RDONLY)) {
125 struct ext2_super_block *es = sbi->s_es; 127 struct ext2_super_block *es = sbi->s_es;
126 128
129 spin_lock(&sbi->s_lock);
127 es->s_state = cpu_to_le16(sbi->s_mount_state); 130 es->s_state = cpu_to_le16(sbi->s_mount_state);
128 ext2_sync_super(sb, es); 131 spin_unlock(&sbi->s_lock);
132 ext2_sync_super(sb, es, 1);
129 } 133 }
130 db_count = sbi->s_gdb_count; 134 db_count = sbi->s_gdb_count;
131 for (i = 0; i < db_count; i++) 135 for (i = 0; i < db_count; i++)
@@ -140,8 +144,6 @@ static void ext2_put_super (struct super_block * sb)
140 sb->s_fs_info = NULL; 144 sb->s_fs_info = NULL;
141 kfree(sbi->s_blockgroup_lock); 145 kfree(sbi->s_blockgroup_lock);
142 kfree(sbi); 146 kfree(sbi);
143
144 unlock_kernel();
145} 147}
146 148
147static struct kmem_cache * ext2_inode_cachep; 149static struct kmem_cache * ext2_inode_cachep;
@@ -209,6 +211,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
209 struct ext2_super_block *es = sbi->s_es; 211 struct ext2_super_block *es = sbi->s_es;
210 unsigned long def_mount_opts; 212 unsigned long def_mount_opts;
211 213
214 spin_lock(&sbi->s_lock);
212 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 215 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
213 216
214 if (sbi->s_sb_block != 1) 217 if (sbi->s_sb_block != 1)
@@ -281,6 +284,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
281 if (!test_opt(sb, RESERVATION)) 284 if (!test_opt(sb, RESERVATION))
282 seq_puts(seq, ",noreservation"); 285 seq_puts(seq, ",noreservation");
283 286
287 spin_unlock(&sbi->s_lock);
284 return 0; 288 return 0;
285} 289}
286 290
@@ -606,7 +610,6 @@ static int ext2_setup_super (struct super_block * sb,
606 if (!le16_to_cpu(es->s_max_mnt_count)) 610 if (!le16_to_cpu(es->s_max_mnt_count))
607 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); 611 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
608 le16_add_cpu(&es->s_mnt_count, 1); 612 le16_add_cpu(&es->s_mnt_count, 1);
609 ext2_write_super(sb);
610 if (test_opt (sb, DEBUG)) 613 if (test_opt (sb, DEBUG))
611 ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " 614 ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
612 "bpg=%lu, ipg=%lu, mo=%04lx]", 615 "bpg=%lu, ipg=%lu, mo=%04lx]",
@@ -767,6 +770,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
767 sb->s_fs_info = sbi; 770 sb->s_fs_info = sbi;
768 sbi->s_sb_block = sb_block; 771 sbi->s_sb_block = sb_block;
769 772
773 spin_lock_init(&sbi->s_lock);
774
770 /* 775 /*
771 * See what the current blocksize for the device is, and 776 * See what the current blocksize for the device is, and
772 * use that as the blocksize. Otherwise (or if the blocksize 777 * use that as the blocksize. Otherwise (or if the blocksize
@@ -1079,7 +1084,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1079 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) 1084 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
1080 ext2_msg(sb, KERN_WARNING, 1085 ext2_msg(sb, KERN_WARNING,
1081 "warning: mounting ext3 filesystem as ext2"); 1086 "warning: mounting ext3 filesystem as ext2");
1082 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1087 if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY))
1088 sb->s_flags |= MS_RDONLY;
1089 ext2_write_super(sb);
1083 return 0; 1090 return 0;
1084 1091
1085cantfind_ext2: 1092cantfind_ext2:
@@ -1120,30 +1127,26 @@ static void ext2_clear_super_error(struct super_block *sb)
1120 * be remapped. Nothing we can do but to retry the 1127 * be remapped. Nothing we can do but to retry the
1121 * write and hope for the best. 1128 * write and hope for the best.
1122 */ 1129 */
1123 printk(KERN_ERR "EXT2-fs: %s previous I/O error to " 1130 ext2_msg(sb, KERN_ERR,
1124 "superblock detected", sb->s_id); 1131 "previous I/O error to superblock detected\n");
1125 clear_buffer_write_io_error(sbh); 1132 clear_buffer_write_io_error(sbh);
1126 set_buffer_uptodate(sbh); 1133 set_buffer_uptodate(sbh);
1127 } 1134 }
1128} 1135}
1129 1136
1130static void ext2_commit_super (struct super_block * sb, 1137static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
1131 struct ext2_super_block * es) 1138 int wait)
1132{
1133 ext2_clear_super_error(sb);
1134 es->s_wtime = cpu_to_le32(get_seconds());
1135 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
1136 sb->s_dirt = 0;
1137}
1138
1139static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
1140{ 1139{
1141 ext2_clear_super_error(sb); 1140 ext2_clear_super_error(sb);
1141 spin_lock(&EXT2_SB(sb)->s_lock);
1142 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); 1142 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
1143 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); 1143 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
1144 es->s_wtime = cpu_to_le32(get_seconds()); 1144 es->s_wtime = cpu_to_le32(get_seconds());
1145 /* unlock before we do IO */
1146 spin_unlock(&EXT2_SB(sb)->s_lock);
1145 mark_buffer_dirty(EXT2_SB(sb)->s_sbh); 1147 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
1146 sync_dirty_buffer(EXT2_SB(sb)->s_sbh); 1148 if (wait)
1149 sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
1147 sb->s_dirt = 0; 1150 sb->s_dirt = 0;
1148} 1151}
1149 1152
@@ -1157,43 +1160,18 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
1157 * may have been checked while mounted and e2fsck may have 1160 * may have been checked while mounted and e2fsck may have
1158 * set s_state to EXT2_VALID_FS after some corrections. 1161 * set s_state to EXT2_VALID_FS after some corrections.
1159 */ 1162 */
1160
1161static int ext2_sync_fs(struct super_block *sb, int wait) 1163static int ext2_sync_fs(struct super_block *sb, int wait)
1162{ 1164{
1165 struct ext2_sb_info *sbi = EXT2_SB(sb);
1163 struct ext2_super_block *es = EXT2_SB(sb)->s_es; 1166 struct ext2_super_block *es = EXT2_SB(sb)->s_es;
1164 struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
1165
1166 lock_kernel();
1167 if (buffer_write_io_error(sbh)) {
1168 /*
1169 * Oh, dear. A previous attempt to write the
1170 * superblock failed. This could happen because the
1171 * USB device was yanked out. Or it could happen to
1172 * be a transient write error and maybe the block will
1173 * be remapped. Nothing we can do but to retry the
1174 * write and hope for the best.
1175 */
1176 ext2_msg(sb, KERN_ERR,
1177 "previous I/O error to superblock detected\n");
1178 clear_buffer_write_io_error(sbh);
1179 set_buffer_uptodate(sbh);
1180 }
1181 1167
1168 spin_lock(&sbi->s_lock);
1182 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { 1169 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
1183 ext2_debug("setting valid to 0\n"); 1170 ext2_debug("setting valid to 0\n");
1184 es->s_state &= cpu_to_le16(~EXT2_VALID_FS); 1171 es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
1185 es->s_free_blocks_count =
1186 cpu_to_le32(ext2_count_free_blocks(sb));
1187 es->s_free_inodes_count =
1188 cpu_to_le32(ext2_count_free_inodes(sb));
1189 es->s_mtime = cpu_to_le32(get_seconds());
1190 ext2_sync_super(sb, es);
1191 } else {
1192 ext2_commit_super(sb, es);
1193 } 1172 }
1194 sb->s_dirt = 0; 1173 spin_unlock(&sbi->s_lock);
1195 unlock_kernel(); 1174 ext2_sync_super(sb, es, wait);
1196
1197 return 0; 1175 return 0;
1198} 1176}
1199 1177
@@ -1215,7 +1193,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1215 unsigned long old_sb_flags; 1193 unsigned long old_sb_flags;
1216 int err; 1194 int err;
1217 1195
1218 lock_kernel(); 1196 spin_lock(&sbi->s_lock);
1219 1197
1220 /* Store the old options */ 1198 /* Store the old options */
1221 old_sb_flags = sb->s_flags; 1199 old_sb_flags = sb->s_flags;
@@ -1254,13 +1232,13 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1254 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; 1232 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
1255 } 1233 }
1256 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { 1234 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
1257 unlock_kernel(); 1235 spin_unlock(&sbi->s_lock);
1258 return 0; 1236 return 0;
1259 } 1237 }
1260 if (*flags & MS_RDONLY) { 1238 if (*flags & MS_RDONLY) {
1261 if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || 1239 if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
1262 !(sbi->s_mount_state & EXT2_VALID_FS)) { 1240 !(sbi->s_mount_state & EXT2_VALID_FS)) {
1263 unlock_kernel(); 1241 spin_unlock(&sbi->s_lock);
1264 return 0; 1242 return 0;
1265 } 1243 }
1266 /* 1244 /*
@@ -1269,6 +1247,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1269 */ 1247 */
1270 es->s_state = cpu_to_le16(sbi->s_mount_state); 1248 es->s_state = cpu_to_le16(sbi->s_mount_state);
1271 es->s_mtime = cpu_to_le32(get_seconds()); 1249 es->s_mtime = cpu_to_le32(get_seconds());
1250 spin_unlock(&sbi->s_lock);
1251 ext2_sync_super(sb, es, 1);
1272 } else { 1252 } else {
1273 __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, 1253 __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
1274 ~EXT2_FEATURE_RO_COMPAT_SUPP); 1254 ~EXT2_FEATURE_RO_COMPAT_SUPP);
@@ -1288,16 +1268,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1288 sbi->s_mount_state = le16_to_cpu(es->s_state); 1268 sbi->s_mount_state = le16_to_cpu(es->s_state);
1289 if (!ext2_setup_super (sb, es, 0)) 1269 if (!ext2_setup_super (sb, es, 0))
1290 sb->s_flags &= ~MS_RDONLY; 1270 sb->s_flags &= ~MS_RDONLY;
1271 spin_unlock(&sbi->s_lock);
1272 ext2_write_super(sb);
1291 } 1273 }
1292 ext2_sync_super(sb, es);
1293 unlock_kernel();
1294 return 0; 1274 return 0;
1295restore_opts: 1275restore_opts:
1296 sbi->s_mount_opt = old_opts.s_mount_opt; 1276 sbi->s_mount_opt = old_opts.s_mount_opt;
1297 sbi->s_resuid = old_opts.s_resuid; 1277 sbi->s_resuid = old_opts.s_resuid;
1298 sbi->s_resgid = old_opts.s_resgid; 1278 sbi->s_resgid = old_opts.s_resgid;
1299 sb->s_flags = old_sb_flags; 1279 sb->s_flags = old_sb_flags;
1300 unlock_kernel(); 1280 spin_unlock(&sbi->s_lock);
1301 return err; 1281 return err;
1302} 1282}
1303 1283
@@ -1308,6 +1288,8 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1308 struct ext2_super_block *es = sbi->s_es; 1288 struct ext2_super_block *es = sbi->s_es;
1309 u64 fsid; 1289 u64 fsid;
1310 1290
1291 spin_lock(&sbi->s_lock);
1292
1311 if (test_opt (sb, MINIX_DF)) 1293 if (test_opt (sb, MINIX_DF))
1312 sbi->s_overhead_last = 0; 1294 sbi->s_overhead_last = 0;
1313 else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { 1295 else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
@@ -1362,6 +1344,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1362 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 1344 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
1363 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 1345 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
1364 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 1346 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1347 spin_unlock(&sbi->s_lock);
1365 return 0; 1348 return 0;
1366} 1349}
1367 1350
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index e44dc92609be..7c3915780b19 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -101,7 +101,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *,
101 101
102static struct mb_cache *ext2_xattr_cache; 102static struct mb_cache *ext2_xattr_cache;
103 103
104static struct xattr_handler *ext2_xattr_handler_map[] = { 104static const struct xattr_handler *ext2_xattr_handler_map[] = {
105 [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, 105 [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
106#ifdef CONFIG_EXT2_FS_POSIX_ACL 106#ifdef CONFIG_EXT2_FS_POSIX_ACL
107 [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler, 107 [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler,
@@ -113,7 +113,7 @@ static struct xattr_handler *ext2_xattr_handler_map[] = {
113#endif 113#endif
114}; 114};
115 115
116struct xattr_handler *ext2_xattr_handlers[] = { 116const struct xattr_handler *ext2_xattr_handlers[] = {
117 &ext2_xattr_user_handler, 117 &ext2_xattr_user_handler,
118 &ext2_xattr_trusted_handler, 118 &ext2_xattr_trusted_handler,
119#ifdef CONFIG_EXT2_FS_POSIX_ACL 119#ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -126,10 +126,10 @@ struct xattr_handler *ext2_xattr_handlers[] = {
126 NULL 126 NULL
127}; 127};
128 128
129static inline struct xattr_handler * 129static inline const struct xattr_handler *
130ext2_xattr_handler(int name_index) 130ext2_xattr_handler(int name_index)
131{ 131{
132 struct xattr_handler *handler = NULL; 132 const struct xattr_handler *handler = NULL;
133 133
134 if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map)) 134 if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map))
135 handler = ext2_xattr_handler_map[name_index]; 135 handler = ext2_xattr_handler_map[name_index];
@@ -298,7 +298,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
298 /* list the attribute names */ 298 /* list the attribute names */
299 for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); 299 for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
300 entry = EXT2_XATTR_NEXT(entry)) { 300 entry = EXT2_XATTR_NEXT(entry)) {
301 struct xattr_handler *handler = 301 const struct xattr_handler *handler =
302 ext2_xattr_handler(entry->e_name_index); 302 ext2_xattr_handler(entry->e_name_index);
303 303
304 if (handler) { 304 if (handler) {
@@ -345,7 +345,9 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
345 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) 345 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
346 return; 346 return;
347 347
348 spin_lock(&EXT2_SB(sb)->s_lock);
348 EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); 349 EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
350 spin_unlock(&EXT2_SB(sb)->s_lock);
349 sb->s_dirt = 1; 351 sb->s_dirt = 1;
350 mark_buffer_dirty(EXT2_SB(sb)->s_sbh); 352 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
351} 353}
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index bf8175b2ced9..a1a1c2184616 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -55,11 +55,11 @@ struct ext2_xattr_entry {
55 55
56# ifdef CONFIG_EXT2_FS_XATTR 56# ifdef CONFIG_EXT2_FS_XATTR
57 57
58extern struct xattr_handler ext2_xattr_user_handler; 58extern const struct xattr_handler ext2_xattr_user_handler;
59extern struct xattr_handler ext2_xattr_trusted_handler; 59extern const struct xattr_handler ext2_xattr_trusted_handler;
60extern struct xattr_handler ext2_xattr_acl_access_handler; 60extern const struct xattr_handler ext2_xattr_acl_access_handler;
61extern struct xattr_handler ext2_xattr_acl_default_handler; 61extern const struct xattr_handler ext2_xattr_acl_default_handler;
62extern struct xattr_handler ext2_xattr_security_handler; 62extern const struct xattr_handler ext2_xattr_security_handler;
63 63
64extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); 64extern ssize_t ext2_listxattr(struct dentry *, char *, size_t);
65 65
@@ -72,7 +72,7 @@ extern void ext2_xattr_put_super(struct super_block *);
72extern int init_ext2_xattr(void); 72extern int init_ext2_xattr(void);
73extern void exit_ext2_xattr(void); 73extern void exit_ext2_xattr(void);
74 74
75extern struct xattr_handler *ext2_xattr_handlers[]; 75extern const struct xattr_handler *ext2_xattr_handlers[];
76 76
77# else /* CONFIG_EXT2_FS_XATTR */ 77# else /* CONFIG_EXT2_FS_XATTR */
78 78
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index b118c6383c6d..3004e15d5da5 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -67,7 +67,7 @@ ext2_init_security(struct inode *inode, struct inode *dir)
67 return err; 67 return err;
68} 68}
69 69
70struct xattr_handler ext2_xattr_security_handler = { 70const struct xattr_handler ext2_xattr_security_handler = {
71 .prefix = XATTR_SECURITY_PREFIX, 71 .prefix = XATTR_SECURITY_PREFIX,
72 .list = ext2_xattr_security_list, 72 .list = ext2_xattr_security_list,
73 .get = ext2_xattr_security_get, 73 .get = ext2_xattr_security_get,
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 2a26d71f4771..667e46a8d62d 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -50,7 +50,7 @@ ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
50 value, size, flags); 50 value, size, flags);
51} 51}
52 52
53struct xattr_handler ext2_xattr_trusted_handler = { 53const struct xattr_handler ext2_xattr_trusted_handler = {
54 .prefix = XATTR_TRUSTED_PREFIX, 54 .prefix = XATTR_TRUSTED_PREFIX,
55 .list = ext2_xattr_trusted_list, 55 .list = ext2_xattr_trusted_list,
56 .get = ext2_xattr_trusted_get, 56 .get = ext2_xattr_trusted_get,
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 3f6caf3684b4..099d20f47163 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -54,7 +54,7 @@ ext2_xattr_user_set(struct dentry *dentry, const char *name,
54 name, value, size, flags); 54 name, value, size, flags);
55} 55}
56 56
57struct xattr_handler ext2_xattr_user_handler = { 57const struct xattr_handler ext2_xattr_user_handler = {
58 .prefix = XATTR_USER_PREFIX, 58 .prefix = XATTR_USER_PREFIX,
59 .list = ext2_xattr_user_list, 59 .list = ext2_xattr_user_list,
60 .get = ext2_xattr_user_get, 60 .get = ext2_xattr_user_get,
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 82ba34158661..01552abbca3c 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -456,7 +456,7 @@ release_and_out:
456 return error; 456 return error;
457} 457}
458 458
459struct xattr_handler ext3_xattr_acl_access_handler = { 459const struct xattr_handler ext3_xattr_acl_access_handler = {
460 .prefix = POSIX_ACL_XATTR_ACCESS, 460 .prefix = POSIX_ACL_XATTR_ACCESS,
461 .flags = ACL_TYPE_ACCESS, 461 .flags = ACL_TYPE_ACCESS,
462 .list = ext3_xattr_list_acl_access, 462 .list = ext3_xattr_list_acl_access,
@@ -464,7 +464,7 @@ struct xattr_handler ext3_xattr_acl_access_handler = {
464 .set = ext3_xattr_set_acl, 464 .set = ext3_xattr_set_acl,
465}; 465};
466 466
467struct xattr_handler ext3_xattr_acl_default_handler = { 467const struct xattr_handler ext3_xattr_acl_default_handler = {
468 .prefix = POSIX_ACL_XATTR_DEFAULT, 468 .prefix = POSIX_ACL_XATTR_DEFAULT,
469 .flags = ACL_TYPE_DEFAULT, 469 .flags = ACL_TYPE_DEFAULT,
470 .list = ext3_xattr_list_acl_default, 470 .list = ext3_xattr_list_acl_default,
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a177122a1b25..4a32511f4ded 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1584,6 +1584,12 @@ retry_alloc:
1584 goto io_error; 1584 goto io_error;
1585 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1585 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1586 /* 1586 /*
1587 * skip this group (and avoid loading bitmap) if there
1588 * are no free blocks
1589 */
1590 if (!free_blocks)
1591 continue;
1592 /*
1587 * skip this group if the number of 1593 * skip this group if the number of
1588 * free blocks is less than half of the reservation 1594 * free blocks is less than half of the reservation
1589 * window size. 1595 * window size.
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 8209f266e9ad..fcf7487734b6 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -48,7 +48,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
48 struct inode *inode = dentry->d_inode; 48 struct inode *inode = dentry->d_inode;
49 struct ext3_inode_info *ei = EXT3_I(inode); 49 struct ext3_inode_info *ei = EXT3_I(inode);
50 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; 50 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
51 int ret = 0; 51 int ret, needs_barrier = 0;
52 tid_t commit_tid; 52 tid_t commit_tid;
53 53
54 if (inode->i_sb->s_flags & MS_RDONLY) 54 if (inode->i_sb->s_flags & MS_RDONLY)
@@ -70,28 +70,27 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
70 * (they were dirtied by commit). But that's OK - the blocks are 70 * (they were dirtied by commit). But that's OK - the blocks are
71 * safe in-journal, which is all fsync() needs to ensure. 71 * safe in-journal, which is all fsync() needs to ensure.
72 */ 72 */
73 if (ext3_should_journal_data(inode)) { 73 if (ext3_should_journal_data(inode))
74 ret = ext3_force_commit(inode->i_sb); 74 return ext3_force_commit(inode->i_sb);
75 goto out;
76 }
77 75
78 if (datasync) 76 if (datasync)
79 commit_tid = atomic_read(&ei->i_datasync_tid); 77 commit_tid = atomic_read(&ei->i_datasync_tid);
80 else 78 else
81 commit_tid = atomic_read(&ei->i_sync_tid); 79 commit_tid = atomic_read(&ei->i_sync_tid);
82 80
83 if (log_start_commit(journal, commit_tid)) { 81 if (test_opt(inode->i_sb, BARRIER) &&
84 log_wait_commit(journal, commit_tid); 82 !journal_trans_will_send_data_barrier(journal, commit_tid))
85 goto out; 83 needs_barrier = 1;
86 } 84 log_start_commit(journal, commit_tid);
85 ret = log_wait_commit(journal, commit_tid);
87 86
88 /* 87 /*
89 * In case we didn't commit a transaction, we have to flush 88 * In case we didn't commit a transaction, we have to flush
90 * disk caches manually so that data really is on persistent 89 * disk caches manually so that data really is on persistent
91 * storage 90 * storage
92 */ 91 */
93 if (test_opt(inode->i_sb, BARRIER)) 92 if (needs_barrier)
94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 93 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
95out: 94 BLKDEV_IFL_WAIT);
96 return ret; 95 return ret;
97} 96}
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 0d0e97ed3ff6..498021eb88fb 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -538,16 +538,13 @@ got:
538 if (S_ISDIR(mode)) 538 if (S_ISDIR(mode))
539 percpu_counter_inc(&sbi->s_dirs_counter); 539 percpu_counter_inc(&sbi->s_dirs_counter);
540 540
541 inode->i_uid = current_fsuid(); 541
542 if (test_opt (sb, GRPID)) 542 if (test_opt(sb, GRPID)) {
543 inode->i_gid = dir->i_gid; 543 inode->i_mode = mode;
544 else if (dir->i_mode & S_ISGID) { 544 inode->i_uid = current_fsuid();
545 inode->i_gid = dir->i_gid; 545 inode->i_gid = dir->i_gid;
546 if (S_ISDIR(mode))
547 mode |= S_ISGID;
548 } else 546 } else
549 inode->i_gid = current_fsgid(); 547 inode_init_owner(inode, dir, mode);
550 inode->i_mode = mode;
551 548
552 inode->i_ino = ino; 549 inode->i_ino = ino;
553 /* This is the optimal IO size (for stat), not the fs block size */ 550 /* This is the optimal IO size (for stat), not the fs block size */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ea33bdf0a300..735f0190ec2a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3151,7 +3151,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3151 if (error) 3151 if (error)
3152 return error; 3152 return error;
3153 3153
3154 if (ia_valid & ATTR_SIZE) 3154 if (is_quota_modification(inode, attr))
3155 dquot_initialize(inode); 3155 dquot_initialize(inode);
3156 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 3156 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3157 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 3157 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 1bee604cc6cd..0fc1293d0e96 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -653,8 +653,12 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
653 seq_printf(seq, ",commit=%u", 653 seq_printf(seq, ",commit=%u",
654 (unsigned) (sbi->s_commit_interval / HZ)); 654 (unsigned) (sbi->s_commit_interval / HZ));
655 } 655 }
656 if (test_opt(sb, BARRIER)) 656
657 seq_puts(seq, ",barrier=1"); 657 /*
658 * Always display barrier state so it's clear what the status is.
659 */
660 seq_puts(seq, ",barrier=");
661 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
658 if (test_opt(sb, NOBH)) 662 if (test_opt(sb, NOBH))
659 seq_puts(seq, ",nobh"); 663 seq_puts(seq, ",nobh");
660 664
@@ -810,8 +814,8 @@ enum {
810 Opt_data_err_abort, Opt_data_err_ignore, 814 Opt_data_err_abort, Opt_data_err_ignore,
811 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 815 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
812 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 816 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
813 Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, 817 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
814 Opt_usrquota, Opt_grpquota 818 Opt_resize, Opt_usrquota, Opt_grpquota
815}; 819};
816 820
817static const match_table_t tokens = { 821static const match_table_t tokens = {
@@ -865,6 +869,8 @@ static const match_table_t tokens = {
865 {Opt_quota, "quota"}, 869 {Opt_quota, "quota"},
866 {Opt_usrquota, "usrquota"}, 870 {Opt_usrquota, "usrquota"},
867 {Opt_barrier, "barrier=%u"}, 871 {Opt_barrier, "barrier=%u"},
872 {Opt_barrier, "barrier"},
873 {Opt_nobarrier, "nobarrier"},
868 {Opt_resize, "resize"}, 874 {Opt_resize, "resize"},
869 {Opt_err, NULL}, 875 {Opt_err, NULL},
870}; 876};
@@ -967,7 +973,11 @@ static int parse_options (char *options, struct super_block *sb,
967 int token; 973 int token;
968 if (!*p) 974 if (!*p)
969 continue; 975 continue;
970 976 /*
977 * Initialize args struct so we know whether arg was
978 * found; some options take optional arguments.
979 */
980 args[0].to = args[0].from = 0;
971 token = match_token(p, tokens, args); 981 token = match_token(p, tokens, args);
972 switch (token) { 982 switch (token) {
973 case Opt_bsd_df: 983 case Opt_bsd_df:
@@ -1215,9 +1225,15 @@ set_qf_format:
1215 case Opt_abort: 1225 case Opt_abort:
1216 set_opt(sbi->s_mount_opt, ABORT); 1226 set_opt(sbi->s_mount_opt, ABORT);
1217 break; 1227 break;
1228 case Opt_nobarrier:
1229 clear_opt(sbi->s_mount_opt, BARRIER);
1230 break;
1218 case Opt_barrier: 1231 case Opt_barrier:
1219 if (match_int(&args[0], &option)) 1232 if (args[0].from) {
1220 return 0; 1233 if (match_int(&args[0], &option))
1234 return 0;
1235 } else
1236 option = 1; /* No argument, default to 1 */
1221 if (option) 1237 if (option)
1222 set_opt(sbi->s_mount_opt, BARRIER); 1238 set_opt(sbi->s_mount_opt, BARRIER);
1223 else 1239 else
@@ -1890,21 +1906,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1890 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1906 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1891 spin_lock_init(&sbi->s_next_gen_lock); 1907 spin_lock_init(&sbi->s_next_gen_lock);
1892 1908
1893 err = percpu_counter_init(&sbi->s_freeblocks_counter,
1894 ext3_count_free_blocks(sb));
1895 if (!err) {
1896 err = percpu_counter_init(&sbi->s_freeinodes_counter,
1897 ext3_count_free_inodes(sb));
1898 }
1899 if (!err) {
1900 err = percpu_counter_init(&sbi->s_dirs_counter,
1901 ext3_count_dirs(sb));
1902 }
1903 if (err) {
1904 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
1905 goto failed_mount3;
1906 }
1907
1908 /* per fileystem reservation list head & lock */ 1909 /* per fileystem reservation list head & lock */
1909 spin_lock_init(&sbi->s_rsv_window_lock); 1910 spin_lock_init(&sbi->s_rsv_window_lock);
1910 sbi->s_rsv_window_root = RB_ROOT; 1911 sbi->s_rsv_window_root = RB_ROOT;
@@ -1945,15 +1946,29 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1945 if (!test_opt(sb, NOLOAD) && 1946 if (!test_opt(sb, NOLOAD) &&
1946 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1947 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
1947 if (ext3_load_journal(sb, es, journal_devnum)) 1948 if (ext3_load_journal(sb, es, journal_devnum))
1948 goto failed_mount3; 1949 goto failed_mount2;
1949 } else if (journal_inum) { 1950 } else if (journal_inum) {
1950 if (ext3_create_journal(sb, es, journal_inum)) 1951 if (ext3_create_journal(sb, es, journal_inum))
1951 goto failed_mount3; 1952 goto failed_mount2;
1952 } else { 1953 } else {
1953 if (!silent) 1954 if (!silent)
1954 ext3_msg(sb, KERN_ERR, 1955 ext3_msg(sb, KERN_ERR,
1955 "error: no journal found. " 1956 "error: no journal found. "
1956 "mounting ext3 over ext2?"); 1957 "mounting ext3 over ext2?");
1958 goto failed_mount2;
1959 }
1960 err = percpu_counter_init(&sbi->s_freeblocks_counter,
1961 ext3_count_free_blocks(sb));
1962 if (!err) {
1963 err = percpu_counter_init(&sbi->s_freeinodes_counter,
1964 ext3_count_free_inodes(sb));
1965 }
1966 if (!err) {
1967 err = percpu_counter_init(&sbi->s_dirs_counter,
1968 ext3_count_dirs(sb));
1969 }
1970 if (err) {
1971 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
1957 goto failed_mount3; 1972 goto failed_mount3;
1958 } 1973 }
1959 1974
@@ -1978,7 +1993,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1978 ext3_msg(sb, KERN_ERR, 1993 ext3_msg(sb, KERN_ERR,
1979 "error: journal does not support " 1994 "error: journal does not support "
1980 "requested data journaling mode"); 1995 "requested data journaling mode");
1981 goto failed_mount4; 1996 goto failed_mount3;
1982 } 1997 }
1983 default: 1998 default:
1984 break; 1999 break;
@@ -2001,19 +2016,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2001 if (IS_ERR(root)) { 2016 if (IS_ERR(root)) {
2002 ext3_msg(sb, KERN_ERR, "error: get root inode failed"); 2017 ext3_msg(sb, KERN_ERR, "error: get root inode failed");
2003 ret = PTR_ERR(root); 2018 ret = PTR_ERR(root);
2004 goto failed_mount4; 2019 goto failed_mount3;
2005 } 2020 }
2006 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2021 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2007 iput(root); 2022 iput(root);
2008 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); 2023 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2009 goto failed_mount4; 2024 goto failed_mount3;
2010 } 2025 }
2011 sb->s_root = d_alloc_root(root); 2026 sb->s_root = d_alloc_root(root);
2012 if (!sb->s_root) { 2027 if (!sb->s_root) {
2013 ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); 2028 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2014 iput(root); 2029 iput(root);
2015 ret = -ENOMEM; 2030 ret = -ENOMEM;
2016 goto failed_mount4; 2031 goto failed_mount3;
2017 } 2032 }
2018 2033
2019 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2034 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
@@ -2039,12 +2054,11 @@ cantfind_ext3:
2039 sb->s_id); 2054 sb->s_id);
2040 goto failed_mount; 2055 goto failed_mount;
2041 2056
2042failed_mount4:
2043 journal_destroy(sbi->s_journal);
2044failed_mount3: 2057failed_mount3:
2045 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2058 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2046 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2059 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2047 percpu_counter_destroy(&sbi->s_dirs_counter); 2060 percpu_counter_destroy(&sbi->s_dirs_counter);
2061 journal_destroy(sbi->s_journal);
2048failed_mount2: 2062failed_mount2:
2049 for (i = 0; i < db_count; i++) 2063 for (i = 0; i < db_count; i++)
2050 brelse(sbi->s_group_desc[i]); 2064 brelse(sbi->s_group_desc[i]);
@@ -2278,6 +2292,9 @@ static int ext3_load_journal(struct super_block *sb,
2278 return -EINVAL; 2292 return -EINVAL;
2279 } 2293 }
2280 2294
2295 if (!(journal->j_flags & JFS_BARRIER))
2296 printk(KERN_INFO "EXT3-fs: barriers not enabled\n");
2297
2281 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2298 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2282 err = journal_update_format(journal); 2299 err = journal_update_format(journal);
2283 if (err) { 2300 if (err) {
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 534a94c3a933..71fb8d65e54c 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -104,7 +104,7 @@ static int ext3_xattr_list(struct dentry *dentry, char *buffer,
104 104
105static struct mb_cache *ext3_xattr_cache; 105static struct mb_cache *ext3_xattr_cache;
106 106
107static struct xattr_handler *ext3_xattr_handler_map[] = { 107static const struct xattr_handler *ext3_xattr_handler_map[] = {
108 [EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler, 108 [EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler,
109#ifdef CONFIG_EXT3_FS_POSIX_ACL 109#ifdef CONFIG_EXT3_FS_POSIX_ACL
110 [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler, 110 [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler,
@@ -116,7 +116,7 @@ static struct xattr_handler *ext3_xattr_handler_map[] = {
116#endif 116#endif
117}; 117};
118 118
119struct xattr_handler *ext3_xattr_handlers[] = { 119const struct xattr_handler *ext3_xattr_handlers[] = {
120 &ext3_xattr_user_handler, 120 &ext3_xattr_user_handler,
121 &ext3_xattr_trusted_handler, 121 &ext3_xattr_trusted_handler,
122#ifdef CONFIG_EXT3_FS_POSIX_ACL 122#ifdef CONFIG_EXT3_FS_POSIX_ACL
@@ -129,10 +129,10 @@ struct xattr_handler *ext3_xattr_handlers[] = {
129 NULL 129 NULL
130}; 130};
131 131
132static inline struct xattr_handler * 132static inline const struct xattr_handler *
133ext3_xattr_handler(int name_index) 133ext3_xattr_handler(int name_index)
134{ 134{
135 struct xattr_handler *handler = NULL; 135 const struct xattr_handler *handler = NULL;
136 136
137 if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map)) 137 if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
138 handler = ext3_xattr_handler_map[name_index]; 138 handler = ext3_xattr_handler_map[name_index];
@@ -338,7 +338,7 @@ ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
338 size_t rest = buffer_size; 338 size_t rest = buffer_size;
339 339
340 for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) { 340 for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
341 struct xattr_handler *handler = 341 const struct xattr_handler *handler =
342 ext3_xattr_handler(entry->e_name_index); 342 ext3_xattr_handler(entry->e_name_index);
343 343
344 if (handler) { 344 if (handler) {
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 148a4dfc82ab..377fe7201169 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -58,11 +58,11 @@ struct ext3_xattr_entry {
58 58
59# ifdef CONFIG_EXT3_FS_XATTR 59# ifdef CONFIG_EXT3_FS_XATTR
60 60
61extern struct xattr_handler ext3_xattr_user_handler; 61extern const struct xattr_handler ext3_xattr_user_handler;
62extern struct xattr_handler ext3_xattr_trusted_handler; 62extern const struct xattr_handler ext3_xattr_trusted_handler;
63extern struct xattr_handler ext3_xattr_acl_access_handler; 63extern const struct xattr_handler ext3_xattr_acl_access_handler;
64extern struct xattr_handler ext3_xattr_acl_default_handler; 64extern const struct xattr_handler ext3_xattr_acl_default_handler;
65extern struct xattr_handler ext3_xattr_security_handler; 65extern const struct xattr_handler ext3_xattr_security_handler;
66 66
67extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); 67extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
68 68
@@ -76,7 +76,7 @@ extern void ext3_xattr_put_super(struct super_block *);
76extern int init_ext3_xattr(void); 76extern int init_ext3_xattr(void);
77extern void exit_ext3_xattr(void); 77extern void exit_ext3_xattr(void);
78 78
79extern struct xattr_handler *ext3_xattr_handlers[]; 79extern const struct xattr_handler *ext3_xattr_handlers[];
80 80
81# else /* CONFIG_EXT3_FS_XATTR */ 81# else /* CONFIG_EXT3_FS_XATTR */
82 82
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 3af91f476dff..03a99bfc59f9 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -69,7 +69,7 @@ ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
69 return err; 69 return err;
70} 70}
71 71
72struct xattr_handler ext3_xattr_security_handler = { 72const struct xattr_handler ext3_xattr_security_handler = {
73 .prefix = XATTR_SECURITY_PREFIX, 73 .prefix = XATTR_SECURITY_PREFIX,
74 .list = ext3_xattr_security_list, 74 .list = ext3_xattr_security_list,
75 .get = ext3_xattr_security_get, 75 .get = ext3_xattr_security_get,
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index e5562845ed96..dc8edda9ffe0 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -51,7 +51,7 @@ ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
51 value, size, flags); 51 value, size, flags);
52} 52}
53 53
54struct xattr_handler ext3_xattr_trusted_handler = { 54const struct xattr_handler ext3_xattr_trusted_handler = {
55 .prefix = XATTR_TRUSTED_PREFIX, 55 .prefix = XATTR_TRUSTED_PREFIX,
56 .list = ext3_xattr_trusted_list, 56 .list = ext3_xattr_trusted_list,
57 .get = ext3_xattr_trusted_get, 57 .get = ext3_xattr_trusted_get,
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 3bcfe9ee0a68..7a321974d584 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -54,7 +54,7 @@ ext3_xattr_user_set(struct dentry *dentry, const char *name,
54 name, value, size, flags); 54 name, value, size, flags);
55} 55}
56 56
57struct xattr_handler ext3_xattr_user_handler = { 57const struct xattr_handler ext3_xattr_user_handler = {
58 .prefix = XATTR_USER_PREFIX, 58 .prefix = XATTR_USER_PREFIX,
59 .list = ext3_xattr_user_list, 59 .list = ext3_xattr_user_list,
60 .get = ext3_xattr_user_get, 60 .get = ext3_xattr_user_get,
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 8a2a29d35a6f..feaf498feaa6 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -454,7 +454,7 @@ release_and_out:
454 return error; 454 return error;
455} 455}
456 456
457struct xattr_handler ext4_xattr_acl_access_handler = { 457const struct xattr_handler ext4_xattr_acl_access_handler = {
458 .prefix = POSIX_ACL_XATTR_ACCESS, 458 .prefix = POSIX_ACL_XATTR_ACCESS,
459 .flags = ACL_TYPE_ACCESS, 459 .flags = ACL_TYPE_ACCESS,
460 .list = ext4_xattr_list_acl_access, 460 .list = ext4_xattr_list_acl_access,
@@ -462,7 +462,7 @@ struct xattr_handler ext4_xattr_acl_access_handler = {
462 .set = ext4_xattr_set_acl, 462 .set = ext4_xattr_set_acl,
463}; 463};
464 464
465struct xattr_handler ext4_xattr_acl_default_handler = { 465const struct xattr_handler ext4_xattr_acl_default_handler = {
466 .prefix = POSIX_ACL_XATTR_DEFAULT, 466 .prefix = POSIX_ACL_XATTR_DEFAULT,
467 .flags = ACL_TYPE_DEFAULT, 467 .flags = ACL_TYPE_DEFAULT,
468 .list = ext4_xattr_list_acl_default, 468 .list = ext4_xattr_list_acl_default,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0d0c3239c1cd..ef3d980e67cb 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -100,9 +100,11 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
100 if (ext4_should_writeback_data(inode) && 100 if (ext4_should_writeback_data(inode) &&
101 (journal->j_fs_dev != journal->j_dev) && 101 (journal->j_fs_dev != journal->j_dev) &&
102 (journal->j_flags & JBD2_BARRIER)) 102 (journal->j_flags & JBD2_BARRIER))
103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 103 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
104 NULL, BLKDEV_IFL_WAIT);
104 jbd2_log_wait_commit(journal, commit_tid); 105 jbd2_log_wait_commit(journal, commit_tid);
105 } else if (journal->j_flags & JBD2_BARRIER) 106 } else if (journal->j_flags & JBD2_BARRIER)
106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 107 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
108 BLKDEV_IFL_WAIT);
107 return ret; 109 return ret;
108} 110}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 57f6eef6ccd6..1a0e183a2f04 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -979,16 +979,12 @@ got:
979 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); 979 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
980 } 980 }
981 981
982 inode->i_uid = current_fsuid(); 982 if (test_opt(sb, GRPID)) {
983 if (test_opt(sb, GRPID)) 983 inode->i_mode = mode;
984 inode->i_uid = current_fsuid();
984 inode->i_gid = dir->i_gid; 985 inode->i_gid = dir->i_gid;
985 else if (dir->i_mode & S_ISGID) {
986 inode->i_gid = dir->i_gid;
987 if (S_ISDIR(mode))
988 mode |= S_ISGID;
989 } else 986 } else
990 inode->i_gid = current_fsgid(); 987 inode_init_owner(inode, dir, mode);
991 inode->i_mode = mode;
992 988
993 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); 989 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
994 /* This is the optimal IO size (for stat), not the fs block size */ 990 /* This is the optimal IO size (for stat), not the fs block size */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 81d605412844..3e0f6af9d08d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5425,7 +5425,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5425 if (error) 5425 if (error)
5426 return error; 5426 return error;
5427 5427
5428 if (ia_valid & ATTR_SIZE) 5428 if (is_quota_modification(inode, attr))
5429 dquot_initialize(inode); 5429 dquot_initialize(inode);
5430 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 5430 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
5431 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 5431 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b4c5aa8489d8..2de0e9515089 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -97,7 +97,7 @@ static int ext4_xattr_list(struct dentry *dentry, char *buffer,
97 97
98static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
99 99
100static struct xattr_handler *ext4_xattr_handler_map[] = { 100static const struct xattr_handler *ext4_xattr_handler_map[] = {
101 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, 101 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
102#ifdef CONFIG_EXT4_FS_POSIX_ACL 102#ifdef CONFIG_EXT4_FS_POSIX_ACL
103 [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, 103 [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler,
@@ -109,7 +109,7 @@ static struct xattr_handler *ext4_xattr_handler_map[] = {
109#endif 109#endif
110}; 110};
111 111
112struct xattr_handler *ext4_xattr_handlers[] = { 112const struct xattr_handler *ext4_xattr_handlers[] = {
113 &ext4_xattr_user_handler, 113 &ext4_xattr_user_handler,
114 &ext4_xattr_trusted_handler, 114 &ext4_xattr_trusted_handler,
115#ifdef CONFIG_EXT4_FS_POSIX_ACL 115#ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -122,10 +122,10 @@ struct xattr_handler *ext4_xattr_handlers[] = {
122 NULL 122 NULL
123}; 123};
124 124
125static inline struct xattr_handler * 125static inline const struct xattr_handler *
126ext4_xattr_handler(int name_index) 126ext4_xattr_handler(int name_index)
127{ 127{
128 struct xattr_handler *handler = NULL; 128 const struct xattr_handler *handler = NULL;
129 129
130 if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map)) 130 if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
131 handler = ext4_xattr_handler_map[name_index]; 131 handler = ext4_xattr_handler_map[name_index];
@@ -332,7 +332,7 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
332 size_t rest = buffer_size; 332 size_t rest = buffer_size;
333 333
334 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { 334 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
335 struct xattr_handler *handler = 335 const struct xattr_handler *handler =
336 ext4_xattr_handler(entry->e_name_index); 336 ext4_xattr_handler(entry->e_name_index);
337 337
338 if (handler) { 338 if (handler) {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 8ede88b18c29..518e96e43905 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -65,11 +65,11 @@ struct ext4_xattr_entry {
65 65
66# ifdef CONFIG_EXT4_FS_XATTR 66# ifdef CONFIG_EXT4_FS_XATTR
67 67
68extern struct xattr_handler ext4_xattr_user_handler; 68extern const struct xattr_handler ext4_xattr_user_handler;
69extern struct xattr_handler ext4_xattr_trusted_handler; 69extern const struct xattr_handler ext4_xattr_trusted_handler;
70extern struct xattr_handler ext4_xattr_acl_access_handler; 70extern const struct xattr_handler ext4_xattr_acl_access_handler;
71extern struct xattr_handler ext4_xattr_acl_default_handler; 71extern const struct xattr_handler ext4_xattr_acl_default_handler;
72extern struct xattr_handler ext4_xattr_security_handler; 72extern const struct xattr_handler ext4_xattr_security_handler;
73 73
74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); 74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
75 75
@@ -86,7 +86,7 @@ extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
86extern int init_ext4_xattr(void); 86extern int init_ext4_xattr(void);
87extern void exit_ext4_xattr(void); 87extern void exit_ext4_xattr(void);
88 88
89extern struct xattr_handler *ext4_xattr_handlers[]; 89extern const struct xattr_handler *ext4_xattr_handlers[];
90 90
91# else /* CONFIG_EXT4_FS_XATTR */ 91# else /* CONFIG_EXT4_FS_XATTR */
92 92
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 8b145e98df07..9b21268e121c 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -69,7 +69,7 @@ ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
69 return err; 69 return err;
70} 70}
71 71
72struct xattr_handler ext4_xattr_security_handler = { 72const struct xattr_handler ext4_xattr_security_handler = {
73 .prefix = XATTR_SECURITY_PREFIX, 73 .prefix = XATTR_SECURITY_PREFIX,
74 .list = ext4_xattr_security_list, 74 .list = ext4_xattr_security_list,
75 .get = ext4_xattr_security_get, 75 .get = ext4_xattr_security_get,
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index 15b50edc6587..37e6ebca2cc3 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -51,7 +51,7 @@ ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
51 name, value, size, flags); 51 name, value, size, flags);
52} 52}
53 53
54struct xattr_handler ext4_xattr_trusted_handler = { 54const struct xattr_handler ext4_xattr_trusted_handler = {
55 .prefix = XATTR_TRUSTED_PREFIX, 55 .prefix = XATTR_TRUSTED_PREFIX,
56 .list = ext4_xattr_trusted_list, 56 .list = ext4_xattr_trusted_list,
57 .get = ext4_xattr_trusted_get, 57 .get = ext4_xattr_trusted_get,
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index c4ce05746ce1..98c375352d0e 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -54,7 +54,7 @@ ext4_xattr_user_set(struct dentry *dentry, const char *name,
54 name, value, size, flags); 54 name, value, size, flags);
55} 55}
56 56
57struct xattr_handler ext4_xattr_user_handler = { 57const struct xattr_handler ext4_xattr_user_handler = {
58 .prefix = XATTR_USER_PREFIX, 58 .prefix = XATTR_USER_PREFIX,
59 .list = ext4_xattr_user_list, 59 .list = ext4_xattr_user_list,
60 .get = ext4_xattr_user_get, 60 .get = ext4_xattr_user_get,
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 530b4ca01510..ee42b9e0b16a 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -19,6 +19,7 @@
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20#include <linux/compat.h> 20#include <linux/compat.h>
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22#include <linux/kernel.h>
22#include "fat.h" 23#include "fat.h"
23 24
24/* 25/*
@@ -140,28 +141,22 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
140{ 141{
141 const wchar_t *ip; 142 const wchar_t *ip;
142 wchar_t ec; 143 wchar_t ec;
143 unsigned char *op, nc; 144 unsigned char *op;
144 int charlen; 145 int charlen;
145 int k;
146 146
147 ip = uni; 147 ip = uni;
148 op = ascii; 148 op = ascii;
149 149
150 while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) { 150 while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) {
151 ec = *ip++; 151 ec = *ip++;
152 if ( (charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { 152 if ((charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) {
153 op += charlen; 153 op += charlen;
154 len -= charlen; 154 len -= charlen;
155 } else { 155 } else {
156 if (uni_xlate == 1) { 156 if (uni_xlate == 1) {
157 *op = ':'; 157 *op++ = ':';
158 for (k = 4; k > 0; k--) { 158 op = pack_hex_byte(op, ec >> 8);
159 nc = ec & 0xF; 159 op = pack_hex_byte(op, ec);
160 op[k] = nc > 9 ? nc + ('a' - 10)
161 : nc + '0';
162 ec >>= 4;
163 }
164 op += 5;
165 len -= 5; 160 len -= 5;
166 } else { 161 } else {
167 *op++ = '?'; 162 *op++ = '?';
@@ -758,9 +753,10 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
758 return ret; 753 return ret;
759} 754}
760 755
761static int fat_dir_ioctl(struct inode *inode, struct file *filp, 756static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
762 unsigned int cmd, unsigned long arg) 757 unsigned long arg)
763{ 758{
759 struct inode *inode = filp->f_path.dentry->d_inode;
764 struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg; 760 struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
765 int short_only, both; 761 int short_only, both;
766 762
@@ -774,7 +770,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp,
774 both = 1; 770 both = 1;
775 break; 771 break;
776 default: 772 default:
777 return fat_generic_ioctl(inode, filp, cmd, arg); 773 return fat_generic_ioctl(filp, cmd, arg);
778 } 774 }
779 775
780 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2]))) 776 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
@@ -814,7 +810,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
814 both = 1; 810 both = 1;
815 break; 811 break;
816 default: 812 default:
817 return -ENOIOCTLCMD; 813 return fat_generic_ioctl(filp, cmd, (unsigned long)arg);
818 } 814 }
819 815
820 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2]))) 816 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2])))
@@ -836,7 +832,7 @@ const struct file_operations fat_dir_operations = {
836 .llseek = generic_file_llseek, 832 .llseek = generic_file_llseek,
837 .read = generic_read_dir, 833 .read = generic_read_dir,
838 .readdir = fat_readdir, 834 .readdir = fat_readdir,
839 .ioctl = fat_dir_ioctl, 835 .unlocked_ioctl = fat_dir_ioctl,
840#ifdef CONFIG_COMPAT 836#ifdef CONFIG_COMPAT
841 .compat_ioctl = fat_compat_dir_ioctl, 837 .compat_ioctl = fat_compat_dir_ioctl,
842#endif 838#endif
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e6efdfa0f6db..eb821ee1a333 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -298,8 +298,8 @@ extern int fat_free_clusters(struct inode *inode, int cluster);
298extern int fat_count_free_clusters(struct super_block *sb); 298extern int fat_count_free_clusters(struct super_block *sb);
299 299
300/* fat/file.c */ 300/* fat/file.c */
301extern int fat_generic_ioctl(struct inode *inode, struct file *filp, 301extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
302 unsigned int cmd, unsigned long arg); 302 unsigned long arg);
303extern const struct file_operations fat_file_operations; 303extern const struct file_operations fat_file_operations;
304extern const struct inode_operations fat_file_inode_operations; 304extern const struct inode_operations fat_file_inode_operations;
305extern int fat_setattr(struct dentry * dentry, struct iattr * attr); 305extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index e8c159de236b..a14c2f6a489e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/capability.h> 9#include <linux/capability.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/compat.h>
11#include <linux/mount.h> 12#include <linux/mount.h>
12#include <linux/time.h> 13#include <linux/time.h>
13#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
@@ -114,9 +115,9 @@ out:
114 return err; 115 return err;
115} 116}
116 117
117int fat_generic_ioctl(struct inode *inode, struct file *filp, 118long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
118 unsigned int cmd, unsigned long arg)
119{ 119{
120 struct inode *inode = filp->f_path.dentry->d_inode;
120 u32 __user *user_attr = (u32 __user *)arg; 121 u32 __user *user_attr = (u32 __user *)arg;
121 122
122 switch (cmd) { 123 switch (cmd) {
@@ -129,6 +130,15 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
129 } 130 }
130} 131}
131 132
133#ifdef CONFIG_COMPAT
134static long fat_generic_compat_ioctl(struct file *filp, unsigned int cmd,
135 unsigned long arg)
136
137{
138 return fat_generic_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
139}
140#endif
141
132static int fat_file_release(struct inode *inode, struct file *filp) 142static int fat_file_release(struct inode *inode, struct file *filp)
133{ 143{
134 if ((filp->f_mode & FMODE_WRITE) && 144 if ((filp->f_mode & FMODE_WRITE) &&
@@ -159,7 +169,10 @@ const struct file_operations fat_file_operations = {
159 .aio_write = generic_file_aio_write, 169 .aio_write = generic_file_aio_write,
160 .mmap = generic_file_mmap, 170 .mmap = generic_file_mmap,
161 .release = fat_file_release, 171 .release = fat_file_release,
162 .ioctl = fat_generic_ioctl, 172 .unlocked_ioctl = fat_generic_ioctl,
173#ifdef CONFIG_COMPAT
174 .compat_ioctl = fat_generic_compat_ioctl,
175#endif
163 .fsync = fat_file_fsync, 176 .fsync = fat_file_fsync,
164 .splice_read = generic_file_splice_read, 177 .splice_read = generic_file_splice_read,
165}; 178};
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 0ce143bd7d56..c611818893b2 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1497,10 +1497,8 @@ out_fail:
1497 iput(fat_inode); 1497 iput(fat_inode);
1498 if (root_inode) 1498 if (root_inode)
1499 iput(root_inode); 1499 iput(root_inode);
1500 if (sbi->nls_io) 1500 unload_nls(sbi->nls_io);
1501 unload_nls(sbi->nls_io); 1501 unload_nls(sbi->nls_disk);
1502 if (sbi->nls_disk)
1503 unload_nls(sbi->nls_disk);
1504 if (sbi->options.iocharset != fat_default_iocharset) 1502 if (sbi->options.iocharset != fat_default_iocharset)
1505 kfree(sbi->options.iocharset); 1503 kfree(sbi->options.iocharset);
1506 sb->s_fs_info = NULL; 1504 sb->s_fs_info = NULL;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 452d02f9075e..f74d270ba155 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -14,6 +14,7 @@
14#include <linux/dnotify.h> 14#include <linux/dnotify.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/pipe_fs_i.h>
17#include <linux/security.h> 18#include <linux/security.h>
18#include <linux/ptrace.h> 19#include <linux/ptrace.h>
19#include <linux/signal.h> 20#include <linux/signal.h>
@@ -412,6 +413,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
412 case F_NOTIFY: 413 case F_NOTIFY:
413 err = fcntl_dirnotify(fd, filp, arg); 414 err = fcntl_dirnotify(fd, filp, arg);
414 break; 415 break;
416 case F_SETPIPE_SZ:
417 case F_GETPIPE_SZ:
418 err = pipe_fcntl(filp, cmd, arg);
419 break;
415 default: 420 default:
416 break; 421 break;
417 } 422 }
@@ -614,9 +619,15 @@ int send_sigurg(struct fown_struct *fown)
614 return ret; 619 return ret;
615} 620}
616 621
617static DEFINE_RWLOCK(fasync_lock); 622static DEFINE_SPINLOCK(fasync_lock);
618static struct kmem_cache *fasync_cache __read_mostly; 623static struct kmem_cache *fasync_cache __read_mostly;
619 624
625static void fasync_free_rcu(struct rcu_head *head)
626{
627 kmem_cache_free(fasync_cache,
628 container_of(head, struct fasync_struct, fa_rcu));
629}
630
620/* 631/*
621 * Remove a fasync entry. If successfully removed, return 632 * Remove a fasync entry. If successfully removed, return
622 * positive and clear the FASYNC flag. If no entry exists, 633 * positive and clear the FASYNC flag. If no entry exists,
@@ -625,8 +636,6 @@ static struct kmem_cache *fasync_cache __read_mostly;
625 * NOTE! It is very important that the FASYNC flag always 636 * NOTE! It is very important that the FASYNC flag always
626 * match the state "is the filp on a fasync list". 637 * match the state "is the filp on a fasync list".
627 * 638 *
628 * We always take the 'filp->f_lock', in since fasync_lock
629 * needs to be irq-safe.
630 */ 639 */
631static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 640static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
632{ 641{
@@ -634,17 +643,22 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
634 int result = 0; 643 int result = 0;
635 644
636 spin_lock(&filp->f_lock); 645 spin_lock(&filp->f_lock);
637 write_lock_irq(&fasync_lock); 646 spin_lock(&fasync_lock);
638 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 647 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
639 if (fa->fa_file != filp) 648 if (fa->fa_file != filp)
640 continue; 649 continue;
650
651 spin_lock_irq(&fa->fa_lock);
652 fa->fa_file = NULL;
653 spin_unlock_irq(&fa->fa_lock);
654
641 *fp = fa->fa_next; 655 *fp = fa->fa_next;
642 kmem_cache_free(fasync_cache, fa); 656 call_rcu(&fa->fa_rcu, fasync_free_rcu);
643 filp->f_flags &= ~FASYNC; 657 filp->f_flags &= ~FASYNC;
644 result = 1; 658 result = 1;
645 break; 659 break;
646 } 660 }
647 write_unlock_irq(&fasync_lock); 661 spin_unlock(&fasync_lock);
648 spin_unlock(&filp->f_lock); 662 spin_unlock(&filp->f_lock);
649 return result; 663 return result;
650} 664}
@@ -666,25 +680,30 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
666 return -ENOMEM; 680 return -ENOMEM;
667 681
668 spin_lock(&filp->f_lock); 682 spin_lock(&filp->f_lock);
669 write_lock_irq(&fasync_lock); 683 spin_lock(&fasync_lock);
670 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 684 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
671 if (fa->fa_file != filp) 685 if (fa->fa_file != filp)
672 continue; 686 continue;
687
688 spin_lock_irq(&fa->fa_lock);
673 fa->fa_fd = fd; 689 fa->fa_fd = fd;
690 spin_unlock_irq(&fa->fa_lock);
691
674 kmem_cache_free(fasync_cache, new); 692 kmem_cache_free(fasync_cache, new);
675 goto out; 693 goto out;
676 } 694 }
677 695
696 spin_lock_init(&new->fa_lock);
678 new->magic = FASYNC_MAGIC; 697 new->magic = FASYNC_MAGIC;
679 new->fa_file = filp; 698 new->fa_file = filp;
680 new->fa_fd = fd; 699 new->fa_fd = fd;
681 new->fa_next = *fapp; 700 new->fa_next = *fapp;
682 *fapp = new; 701 rcu_assign_pointer(*fapp, new);
683 result = 1; 702 result = 1;
684 filp->f_flags |= FASYNC; 703 filp->f_flags |= FASYNC;
685 704
686out: 705out:
687 write_unlock_irq(&fasync_lock); 706 spin_unlock(&fasync_lock);
688 spin_unlock(&filp->f_lock); 707 spin_unlock(&filp->f_lock);
689 return result; 708 return result;
690} 709}
@@ -704,37 +723,41 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
704 723
705EXPORT_SYMBOL(fasync_helper); 724EXPORT_SYMBOL(fasync_helper);
706 725
707void __kill_fasync(struct fasync_struct *fa, int sig, int band) 726/*
727 * rcu_read_lock() is held
728 */
729static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
708{ 730{
709 while (fa) { 731 while (fa) {
710 struct fown_struct * fown; 732 struct fown_struct *fown;
711 if (fa->magic != FASYNC_MAGIC) { 733 if (fa->magic != FASYNC_MAGIC) {
712 printk(KERN_ERR "kill_fasync: bad magic number in " 734 printk(KERN_ERR "kill_fasync: bad magic number in "
713 "fasync_struct!\n"); 735 "fasync_struct!\n");
714 return; 736 return;
715 } 737 }
716 fown = &fa->fa_file->f_owner; 738 spin_lock(&fa->fa_lock);
717 /* Don't send SIGURG to processes which have not set a 739 if (fa->fa_file) {
718 queued signum: SIGURG has its own default signalling 740 fown = &fa->fa_file->f_owner;
719 mechanism. */ 741 /* Don't send SIGURG to processes which have not set a
720 if (!(sig == SIGURG && fown->signum == 0)) 742 queued signum: SIGURG has its own default signalling
721 send_sigio(fown, fa->fa_fd, band); 743 mechanism. */
722 fa = fa->fa_next; 744 if (!(sig == SIGURG && fown->signum == 0))
745 send_sigio(fown, fa->fa_fd, band);
746 }
747 spin_unlock(&fa->fa_lock);
748 fa = rcu_dereference(fa->fa_next);
723 } 749 }
724} 750}
725 751
726EXPORT_SYMBOL(__kill_fasync);
727
728void kill_fasync(struct fasync_struct **fp, int sig, int band) 752void kill_fasync(struct fasync_struct **fp, int sig, int band)
729{ 753{
730 /* First a quick test without locking: usually 754 /* First a quick test without locking: usually
731 * the list is empty. 755 * the list is empty.
732 */ 756 */
733 if (*fp) { 757 if (*fp) {
734 read_lock(&fasync_lock); 758 rcu_read_lock();
735 /* reread *fp after obtaining the lock */ 759 kill_fasync_rcu(rcu_dereference(*fp), sig, band);
736 __kill_fasync(*fp, sig, band); 760 rcu_read_unlock();
737 read_unlock(&fasync_lock);
738 } 761 }
739} 762}
740EXPORT_SYMBOL(kill_fasync); 763EXPORT_SYMBOL(kill_fasync);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4b37f7cea4dd..5c4161f1fd9a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -42,9 +42,10 @@ struct wb_writeback_args {
42 long nr_pages; 42 long nr_pages;
43 struct super_block *sb; 43 struct super_block *sb;
44 enum writeback_sync_modes sync_mode; 44 enum writeback_sync_modes sync_mode;
45 int for_kupdate:1; 45 unsigned int for_kupdate:1;
46 int range_cyclic:1; 46 unsigned int range_cyclic:1;
47 int for_background:1; 47 unsigned int for_background:1;
48 unsigned int sb_pinned:1;
48}; 49};
49 50
50/* 51/*
@@ -192,7 +193,8 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
192} 193}
193 194
194static void bdi_alloc_queue_work(struct backing_dev_info *bdi, 195static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
195 struct wb_writeback_args *args) 196 struct wb_writeback_args *args,
197 int wait)
196{ 198{
197 struct bdi_work *work; 199 struct bdi_work *work;
198 200
@@ -204,6 +206,8 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
204 if (work) { 206 if (work) {
205 bdi_work_init(work, args); 207 bdi_work_init(work, args);
206 bdi_queue_work(bdi, work); 208 bdi_queue_work(bdi, work);
209 if (wait)
210 bdi_wait_on_work_clear(work);
207 } else { 211 } else {
208 struct bdi_writeback *wb = &bdi->wb; 212 struct bdi_writeback *wb = &bdi->wb;
209 213
@@ -230,6 +234,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
230 .sync_mode = WB_SYNC_ALL, 234 .sync_mode = WB_SYNC_ALL,
231 .nr_pages = LONG_MAX, 235 .nr_pages = LONG_MAX,
232 .range_cyclic = 0, 236 .range_cyclic = 0,
237 /*
238 * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
239 * lets make it explicitly clear.
240 */
241 .sb_pinned = 1,
233 }; 242 };
234 struct bdi_work work; 243 struct bdi_work work;
235 244
@@ -245,21 +254,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
245 * @bdi: the backing device to write from 254 * @bdi: the backing device to write from
246 * @sb: write inodes from this super_block 255 * @sb: write inodes from this super_block
247 * @nr_pages: the number of pages to write 256 * @nr_pages: the number of pages to write
257 * @sb_locked: caller already holds sb umount sem.
248 * 258 *
249 * Description: 259 * Description:
250 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 260 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
251 * started when this function returns, we make no guarentees on 261 * started when this function returns, we make no guarentees on
252 * completion. Caller need not hold sb s_umount semaphore. 262 * completion. Caller specifies whether sb umount sem is held already or not.
253 * 263 *
254 */ 264 */
255void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 265void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
256 long nr_pages) 266 long nr_pages, int sb_locked)
257{ 267{
258 struct wb_writeback_args args = { 268 struct wb_writeback_args args = {
259 .sb = sb, 269 .sb = sb,
260 .sync_mode = WB_SYNC_NONE, 270 .sync_mode = WB_SYNC_NONE,
261 .nr_pages = nr_pages, 271 .nr_pages = nr_pages,
262 .range_cyclic = 1, 272 .range_cyclic = 1,
273 .sb_pinned = sb_locked,
263 }; 274 };
264 275
265 /* 276 /*
@@ -271,7 +282,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
271 args.for_background = 1; 282 args.for_background = 1;
272 } 283 }
273 284
274 bdi_alloc_queue_work(bdi, &args); 285 bdi_alloc_queue_work(bdi, &args, sb_locked);
275} 286}
276 287
277/* 288/*
@@ -452,11 +463,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
452 463
453 BUG_ON(inode->i_state & I_SYNC); 464 BUG_ON(inode->i_state & I_SYNC);
454 465
455 /* Set I_SYNC, reset I_DIRTY */ 466 /* Set I_SYNC, reset I_DIRTY_PAGES */
456 dirty = inode->i_state & I_DIRTY;
457 inode->i_state |= I_SYNC; 467 inode->i_state |= I_SYNC;
458 inode->i_state &= ~I_DIRTY; 468 inode->i_state &= ~I_DIRTY_PAGES;
459
460 spin_unlock(&inode_lock); 469 spin_unlock(&inode_lock);
461 470
462 ret = do_writepages(mapping, wbc); 471 ret = do_writepages(mapping, wbc);
@@ -472,6 +481,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
472 ret = err; 481 ret = err;
473 } 482 }
474 483
484 /*
485 * Some filesystems may redirty the inode during the writeback
486 * due to delalloc, clear dirty metadata flags right before
487 * write_inode()
488 */
489 spin_lock(&inode_lock);
490 dirty = inode->i_state & I_DIRTY;
491 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
492 spin_unlock(&inode_lock);
475 /* Don't write the inode if only I_DIRTY_PAGES was set */ 493 /* Don't write the inode if only I_DIRTY_PAGES was set */
476 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 494 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
477 int err = write_inode(inode, wbc); 495 int err = write_inode(inode, wbc);
@@ -577,7 +595,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
577 /* 595 /*
578 * Caller must already hold the ref for this 596 * Caller must already hold the ref for this
579 */ 597 */
580 if (wbc->sync_mode == WB_SYNC_ALL) { 598 if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
581 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 599 WARN_ON(!rwsem_is_locked(&sb->s_umount));
582 return SB_NOT_PINNED; 600 return SB_NOT_PINNED;
583 } 601 }
@@ -751,6 +769,7 @@ static long wb_writeback(struct bdi_writeback *wb,
751 .for_kupdate = args->for_kupdate, 769 .for_kupdate = args->for_kupdate,
752 .for_background = args->for_background, 770 .for_background = args->for_background,
753 .range_cyclic = args->range_cyclic, 771 .range_cyclic = args->range_cyclic,
772 .sb_pinned = args->sb_pinned,
754 }; 773 };
755 unsigned long oldest_jif; 774 unsigned long oldest_jif;
756 long wrote = 0; 775 long wrote = 0;
@@ -852,6 +871,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
852 unsigned long expired; 871 unsigned long expired;
853 long nr_pages; 872 long nr_pages;
854 873
874 /*
875 * When set to zero, disable periodic writeback
876 */
877 if (!dirty_writeback_interval)
878 return 0;
879
855 expired = wb->last_old_flush + 880 expired = wb->last_old_flush +
856 msecs_to_jiffies(dirty_writeback_interval * 10); 881 msecs_to_jiffies(dirty_writeback_interval * 10);
857 if (time_before(jiffies, expired)) 882 if (time_before(jiffies, expired))
@@ -887,6 +912,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
887 912
888 while ((work = get_next_work_item(bdi, wb)) != NULL) { 913 while ((work = get_next_work_item(bdi, wb)) != NULL) {
889 struct wb_writeback_args args = work->args; 914 struct wb_writeback_args args = work->args;
915 int post_clear;
890 916
891 /* 917 /*
892 * Override sync mode, in case we must wait for completion 918 * Override sync mode, in case we must wait for completion
@@ -894,11 +920,13 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
894 if (force_wait) 920 if (force_wait)
895 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; 921 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
896 922
923 post_clear = WB_SYNC_ALL || args.sb_pinned;
924
897 /* 925 /*
898 * If this isn't a data integrity operation, just notify 926 * If this isn't a data integrity operation, just notify
899 * that we have seen this work and we are now starting it. 927 * that we have seen this work and we are now starting it.
900 */ 928 */
901 if (args.sync_mode == WB_SYNC_NONE) 929 if (!post_clear)
902 wb_clear_pending(wb, work); 930 wb_clear_pending(wb, work);
903 931
904 wrote += wb_writeback(wb, &args); 932 wrote += wb_writeback(wb, &args);
@@ -907,7 +935,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
907 * This is a data integrity writeback, so only do the 935 * This is a data integrity writeback, so only do the
908 * notification when we have completed the work. 936 * notification when we have completed the work.
909 */ 937 */
910 if (args.sync_mode == WB_SYNC_ALL) 938 if (post_clear)
911 wb_clear_pending(wb, work); 939 wb_clear_pending(wb, work);
912 } 940 }
913 941
@@ -947,8 +975,17 @@ int bdi_writeback_task(struct bdi_writeback *wb)
947 break; 975 break;
948 } 976 }
949 977
950 wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); 978 if (dirty_writeback_interval) {
951 schedule_timeout_interruptible(wait_jiffies); 979 wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
980 schedule_timeout_interruptible(wait_jiffies);
981 } else {
982 set_current_state(TASK_INTERRUPTIBLE);
983 if (list_empty_careful(&wb->bdi->work_list) &&
984 !kthread_should_stop())
985 schedule();
986 __set_current_state(TASK_RUNNING);
987 }
988
952 try_to_freeze(); 989 try_to_freeze();
953 } 990 }
954 991
@@ -974,7 +1011,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages)
974 if (!bdi_has_dirty_io(bdi)) 1011 if (!bdi_has_dirty_io(bdi))
975 continue; 1012 continue;
976 1013
977 bdi_alloc_queue_work(bdi, &args); 1014 bdi_alloc_queue_work(bdi, &args, 0);
978 } 1015 }
979 1016
980 rcu_read_unlock(); 1017 rcu_read_unlock();
@@ -1183,6 +1220,18 @@ static void wait_sb_inodes(struct super_block *sb)
1183 iput(old_inode); 1220 iput(old_inode);
1184} 1221}
1185 1222
1223static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
1224{
1225 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1226 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1227 long nr_to_write;
1228
1229 nr_to_write = nr_dirty + nr_unstable +
1230 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1231
1232 bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
1233}
1234
1186/** 1235/**
1187 * writeback_inodes_sb - writeback dirty inodes from given super_block 1236 * writeback_inodes_sb - writeback dirty inodes from given super_block
1188 * @sb: the superblock 1237 * @sb: the superblock
@@ -1194,18 +1243,23 @@ static void wait_sb_inodes(struct super_block *sb)
1194 */ 1243 */
1195void writeback_inodes_sb(struct super_block *sb) 1244void writeback_inodes_sb(struct super_block *sb)
1196{ 1245{
1197 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 1246 __writeback_inodes_sb(sb, 0);
1198 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1199 long nr_to_write;
1200
1201 nr_to_write = nr_dirty + nr_unstable +
1202 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1203
1204 bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
1205} 1247}
1206EXPORT_SYMBOL(writeback_inodes_sb); 1248EXPORT_SYMBOL(writeback_inodes_sb);
1207 1249
1208/** 1250/**
1251 * writeback_inodes_sb_locked - writeback dirty inodes from given super_block
1252 * @sb: the superblock
1253 *
1254 * Like writeback_inodes_sb(), except the caller already holds the
1255 * sb umount sem.
1256 */
1257void writeback_inodes_sb_locked(struct super_block *sb)
1258{
1259 __writeback_inodes_sb(sb, 1);
1260}
1261
1262/**
1209 * writeback_inodes_sb_if_idle - start writeback if none underway 1263 * writeback_inodes_sb_if_idle - start writeback if none underway
1210 * @sb: the superblock 1264 * @sb: the superblock
1211 * 1265 *
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index fe5df5457656..99800e564157 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -201,7 +201,7 @@ generic_check_acl(struct inode *inode, int mask)
201 return -EAGAIN; 201 return -EAGAIN;
202} 202}
203 203
204struct xattr_handler generic_acl_access_handler = { 204const struct xattr_handler generic_acl_access_handler = {
205 .prefix = POSIX_ACL_XATTR_ACCESS, 205 .prefix = POSIX_ACL_XATTR_ACCESS,
206 .flags = ACL_TYPE_ACCESS, 206 .flags = ACL_TYPE_ACCESS,
207 .list = generic_acl_list, 207 .list = generic_acl_list,
@@ -209,7 +209,7 @@ struct xattr_handler generic_acl_access_handler = {
209 .set = generic_acl_set, 209 .set = generic_acl_set,
210}; 210};
211 211
212struct xattr_handler generic_acl_default_handler = { 212const struct xattr_handler generic_acl_default_handler = {
213 .prefix = POSIX_ACL_XATTR_DEFAULT, 213 .prefix = POSIX_ACL_XATTR_DEFAULT,
214 .flags = ACL_TYPE_DEFAULT, 214 .flags = ACL_TYPE_DEFAULT,
215 .list = generic_acl_list, 215 .list = generic_acl_list,
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 87ee309d4c24..9fb76b0a0485 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -335,7 +335,7 @@ out:
335 return error; 335 return error;
336} 336}
337 337
338struct xattr_handler gfs2_xattr_system_handler = { 338const struct xattr_handler gfs2_xattr_system_handler = {
339 .prefix = XATTR_SYSTEM_PREFIX, 339 .prefix = XATTR_SYSTEM_PREFIX,
340 .flags = GFS2_EATYPE_SYS, 340 .flags = GFS2_EATYPE_SYS,
341 .get = gfs2_xattr_system_get, 341 .get = gfs2_xattr_system_get,
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 9306a2e6620c..b522b0cb39ea 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -19,6 +19,6 @@
19extern int gfs2_check_acl(struct inode *inode, int mask); 19extern int gfs2_check_acl(struct inode *inode, int mask);
20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); 20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); 21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
22extern struct xattr_handler gfs2_xattr_system_handler; 22extern const struct xattr_handler gfs2_xattr_system_handler;
23 23
24#endif /* __ACL_DOT_H__ */ 24#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0c1d0b82dcf1..a739a0a48067 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -418,6 +418,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
418static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) 418static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
419{ 419{
420 struct buffer_head *dibh; 420 struct buffer_head *dibh;
421 u64 dsize = i_size_read(&ip->i_inode);
421 void *kaddr; 422 void *kaddr;
422 int error; 423 int error;
423 424
@@ -437,9 +438,10 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
437 return error; 438 return error;
438 439
439 kaddr = kmap_atomic(page, KM_USER0); 440 kaddr = kmap_atomic(page, KM_USER0);
440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), 441 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
441 ip->i_disksize); 442 dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
442 memset(kaddr + ip->i_disksize, 0, PAGE_CACHE_SIZE - ip->i_disksize); 443 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
444 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
443 kunmap_atomic(kaddr, KM_USER0); 445 kunmap_atomic(kaddr, KM_USER0);
444 flush_dcache_page(page); 446 flush_dcache_page(page);
445 brelse(dibh); 447 brelse(dibh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 5e411d5f4697..4a48c0f4b402 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -71,11 +71,13 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
71 71
72 if (!PageUptodate(page)) { 72 if (!PageUptodate(page)) {
73 void *kaddr = kmap(page); 73 void *kaddr = kmap(page);
74 u64 dsize = i_size_read(inode);
75
76 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
77 dsize = dibh->b_size - sizeof(struct gfs2_dinode);
74 78
75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), 79 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
76 ip->i_disksize); 80 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
77 memset(kaddr + ip->i_disksize, 0,
78 PAGE_CACHE_SIZE - ip->i_disksize);
79 kunmap(page); 81 kunmap(page);
80 82
81 SetPageUptodate(page); 83 SetPageUptodate(page);
@@ -1038,13 +1040,14 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
1038 goto out; 1040 goto out;
1039 1041
1040 if (gfs2_is_stuffed(ip)) { 1042 if (gfs2_is_stuffed(ip)) {
1041 ip->i_disksize = size; 1043 u64 dsize = size + sizeof(struct gfs2_inode);
1042 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1044 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1043 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1045 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1044 gfs2_dinode_out(ip, dibh->b_data); 1046 gfs2_dinode_out(ip, dibh->b_data);
1045 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); 1047 if (dsize > dibh->b_size)
1048 dsize = dibh->b_size;
1049 gfs2_buffer_clear_tail(dibh, dsize);
1046 error = 1; 1050 error = 1;
1047
1048 } else { 1051 } else {
1049 if (size & (u64)(sdp->sd_sb.sb_bsize - 1)) 1052 if (size & (u64)(sdp->sd_sb.sb_bsize - 1))
1050 error = gfs2_block_truncate_page(ip->i_inode.i_mapping); 1053 error = gfs2_block_truncate_page(ip->i_inode.i_mapping);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 25fddc100f18..8295c5b5d4a9 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1475,7 +1475,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1475 inode = gfs2_inode_lookup(dir->i_sb, 1475 inode = gfs2_inode_lookup(dir->i_sb,
1476 be16_to_cpu(dent->de_type), 1476 be16_to_cpu(dent->de_type),
1477 be64_to_cpu(dent->de_inum.no_addr), 1477 be64_to_cpu(dent->de_inum.no_addr),
1478 be64_to_cpu(dent->de_inum.no_formal_ino), 0); 1478 be64_to_cpu(dent->de_inum.no_formal_ino));
1479 brelse(bh); 1479 brelse(bh);
1480 return inode; 1480 return inode;
1481 } 1481 }
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index c22c21174833..dfe237a3f8ad 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -168,7 +168,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
168 if (error) 168 if (error)
169 goto fail; 169 goto fail;
170 170
171 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0, 0); 171 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0);
172 if (IS_ERR(inode)) { 172 if (IS_ERR(inode)) {
173 error = PTR_ERR(inode); 173 error = PTR_ERR(inode);
174 goto fail; 174 goto fail;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 454d4b4eb36b..ddcdbf493536 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -855,6 +855,9 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
855 gh->gh_flags = flags; 855 gh->gh_flags = flags;
856 gh->gh_iflags = 0; 856 gh->gh_iflags = 0;
857 gh->gh_ip = (unsigned long)__builtin_return_address(0); 857 gh->gh_ip = (unsigned long)__builtin_return_address(0);
858 if (gh->gh_owner_pid)
859 put_pid(gh->gh_owner_pid);
860 gh->gh_owner_pid = get_pid(task_pid(current));
858} 861}
859 862
860/** 863/**
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 3aac46f6853e..b5d7363b22da 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -439,9 +439,6 @@ struct gfs2_args {
439struct gfs2_tune { 439struct gfs2_tune {
440 spinlock_t gt_spin; 440 spinlock_t gt_spin;
441 441
442 unsigned int gt_incore_log_blocks;
443 unsigned int gt_log_flush_secs;
444
445 unsigned int gt_logd_secs; 442 unsigned int gt_logd_secs;
446 443
447 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ 444 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
@@ -462,6 +459,7 @@ enum {
462 SDF_SHUTDOWN = 2, 459 SDF_SHUTDOWN = 2,
463 SDF_NOBARRIERS = 3, 460 SDF_NOBARRIERS = 3,
464 SDF_NORECOVERY = 4, 461 SDF_NORECOVERY = 4,
462 SDF_DEMOTE = 5,
465}; 463};
466 464
467#define GFS2_FSNAME_LEN 256 465#define GFS2_FSNAME_LEN 256
@@ -618,6 +616,7 @@ struct gfs2_sbd {
618 unsigned int sd_log_commited_databuf; 616 unsigned int sd_log_commited_databuf;
619 int sd_log_commited_revoke; 617 int sd_log_commited_revoke;
620 618
619 atomic_t sd_log_pinned;
621 unsigned int sd_log_num_buf; 620 unsigned int sd_log_num_buf;
622 unsigned int sd_log_num_revoke; 621 unsigned int sd_log_num_revoke;
623 unsigned int sd_log_num_rg; 622 unsigned int sd_log_num_rg;
@@ -629,15 +628,17 @@ struct gfs2_sbd {
629 struct list_head sd_log_le_databuf; 628 struct list_head sd_log_le_databuf;
630 struct list_head sd_log_le_ordered; 629 struct list_head sd_log_le_ordered;
631 630
631 atomic_t sd_log_thresh1;
632 atomic_t sd_log_thresh2;
632 atomic_t sd_log_blks_free; 633 atomic_t sd_log_blks_free;
633 struct mutex sd_log_reserve_mutex; 634 wait_queue_head_t sd_log_waitq;
635 wait_queue_head_t sd_logd_waitq;
634 636
635 u64 sd_log_sequence; 637 u64 sd_log_sequence;
636 unsigned int sd_log_head; 638 unsigned int sd_log_head;
637 unsigned int sd_log_tail; 639 unsigned int sd_log_tail;
638 int sd_log_idle; 640 int sd_log_idle;
639 641
640 unsigned long sd_log_flush_time;
641 struct rw_semaphore sd_log_flush_lock; 642 struct rw_semaphore sd_log_flush_lock;
642 atomic_t sd_log_in_flight; 643 atomic_t sd_log_in_flight;
643 wait_queue_head_t sd_log_flush_wait; 644 wait_queue_head_t sd_log_flush_wait;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b1bf2694fb2b..51d8061fa07a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -158,7 +158,6 @@ void gfs2_set_iop(struct inode *inode)
158 * @sb: The super block 158 * @sb: The super block
159 * @no_addr: The inode number 159 * @no_addr: The inode number
160 * @type: The type of the inode 160 * @type: The type of the inode
161 * @skip_freeing: set this not return an inode if it is currently being freed.
162 * 161 *
163 * Returns: A VFS inode, or an error 162 * Returns: A VFS inode, or an error
164 */ 163 */
@@ -166,17 +165,14 @@ void gfs2_set_iop(struct inode *inode)
166struct inode *gfs2_inode_lookup(struct super_block *sb, 165struct inode *gfs2_inode_lookup(struct super_block *sb,
167 unsigned int type, 166 unsigned int type,
168 u64 no_addr, 167 u64 no_addr,
169 u64 no_formal_ino, int skip_freeing) 168 u64 no_formal_ino)
170{ 169{
171 struct inode *inode; 170 struct inode *inode;
172 struct gfs2_inode *ip; 171 struct gfs2_inode *ip;
173 struct gfs2_glock *io_gl; 172 struct gfs2_glock *io_gl;
174 int error; 173 int error;
175 174
176 if (skip_freeing) 175 inode = gfs2_iget(sb, no_addr);
177 inode = gfs2_iget_skip(sb, no_addr);
178 else
179 inode = gfs2_iget(sb, no_addr);
180 ip = GFS2_I(inode); 176 ip = GFS2_I(inode);
181 177
182 if (!inode) 178 if (!inode)
@@ -234,13 +230,100 @@ fail_glock:
234fail_iopen: 230fail_iopen:
235 gfs2_glock_put(io_gl); 231 gfs2_glock_put(io_gl);
236fail_put: 232fail_put:
237 ip->i_gl->gl_object = NULL; 233 if (inode->i_state & I_NEW)
234 ip->i_gl->gl_object = NULL;
238 gfs2_glock_put(ip->i_gl); 235 gfs2_glock_put(ip->i_gl);
239fail: 236fail:
240 iget_failed(inode); 237 if (inode->i_state & I_NEW)
238 iget_failed(inode);
239 else
240 iput(inode);
241 return ERR_PTR(error); 241 return ERR_PTR(error);
242} 242}
243 243
244/**
245 * gfs2_unlinked_inode_lookup - Lookup an unlinked inode for reclamation
246 * @sb: The super block
247 * no_addr: The inode number
248 * @@inode: A pointer to the inode found, if any
249 *
250 * Returns: 0 and *inode if no errors occurred. If an error occurs,
251 * the resulting *inode may or may not be NULL.
252 */
253
254int gfs2_unlinked_inode_lookup(struct super_block *sb, u64 no_addr,
255 struct inode **inode)
256{
257 struct gfs2_sbd *sdp;
258 struct gfs2_inode *ip;
259 struct gfs2_glock *io_gl;
260 int error;
261 struct gfs2_holder gh;
262
263 *inode = gfs2_iget_skip(sb, no_addr);
264
265 if (!(*inode))
266 return -ENOBUFS;
267
268 if (!((*inode)->i_state & I_NEW))
269 return -ENOBUFS;
270
271 ip = GFS2_I(*inode);
272 sdp = GFS2_SB(*inode);
273 ip->i_no_formal_ino = -1;
274
275 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
276 if (unlikely(error))
277 goto fail;
278 ip->i_gl->gl_object = ip;
279
280 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
281 if (unlikely(error))
282 goto fail_put;
283
284 set_bit(GIF_INVALID, &ip->i_flags);
285 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT,
286 &ip->i_iopen_gh);
287 if (unlikely(error)) {
288 if (error == GLR_TRYFAILED)
289 error = 0;
290 goto fail_iopen;
291 }
292 ip->i_iopen_gh.gh_gl->gl_object = ip;
293 gfs2_glock_put(io_gl);
294
295 (*inode)->i_mode = DT2IF(DT_UNKNOWN);
296
297 /*
298 * We must read the inode in order to work out its type in
299 * this case. Note that this doesn't happen often as we normally
300 * know the type beforehand. This code path only occurs during
301 * unlinked inode recovery (where it is safe to do this glock,
302 * which is not true in the general case).
303 */
304 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY,
305 &gh);
306 if (unlikely(error)) {
307 if (error == GLR_TRYFAILED)
308 error = 0;
309 goto fail_glock;
310 }
311 /* Inode is now uptodate */
312 gfs2_glock_dq_uninit(&gh);
313 gfs2_set_iop(*inode);
314
315 return 0;
316fail_glock:
317 gfs2_glock_dq(&ip->i_iopen_gh);
318fail_iopen:
319 gfs2_glock_put(io_gl);
320fail_put:
321 ip->i_gl->gl_object = NULL;
322 gfs2_glock_put(ip->i_gl);
323fail:
324 return error;
325}
326
244static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 327static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
245{ 328{
246 const struct gfs2_dinode *str = buf; 329 const struct gfs2_dinode *str = buf;
@@ -862,7 +945,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
862 goto fail_gunlock2; 945 goto fail_gunlock2;
863 946
864 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, 947 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
865 inum.no_formal_ino, 0); 948 inum.no_formal_ino);
866 if (IS_ERR(inode)) 949 if (IS_ERR(inode))
867 goto fail_gunlock2; 950 goto fail_gunlock2;
868 951
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index c341aaf67adb..e161461d4c57 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -83,8 +83,9 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
83 83
84extern void gfs2_set_iop(struct inode *inode); 84extern void gfs2_set_iop(struct inode *inode);
85extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 85extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
86 u64 no_addr, u64 no_formal_ino, 86 u64 no_addr, u64 no_formal_ino);
87 int skip_freeing); 87extern int gfs2_unlinked_inode_lookup(struct super_block *sb, u64 no_addr,
88 struct inode **inode);
88extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 89extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
89 90
90extern int gfs2_inode_refresh(struct gfs2_inode *ip); 91extern int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e5bf4b59d46e..b593f0e28f25 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -168,12 +168,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
168 return list_empty(&ai->ai_ail1_list); 168 return list_empty(&ai->ai_ail1_list);
169} 169}
170 170
171static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) 171static void gfs2_ail1_start(struct gfs2_sbd *sdp)
172{ 172{
173 struct list_head *head; 173 struct list_head *head;
174 u64 sync_gen; 174 u64 sync_gen;
175 struct list_head *first; 175 struct gfs2_ail *ai;
176 struct gfs2_ail *first_ai, *ai, *tmp;
177 int done = 0; 176 int done = 0;
178 177
179 gfs2_log_lock(sdp); 178 gfs2_log_lock(sdp);
@@ -184,21 +183,9 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
184 } 183 }
185 sync_gen = sdp->sd_ail_sync_gen++; 184 sync_gen = sdp->sd_ail_sync_gen++;
186 185
187 first = head->prev;
188 first_ai = list_entry(first, struct gfs2_ail, ai_list);
189 first_ai->ai_sync_gen = sync_gen;
190 gfs2_ail1_start_one(sdp, first_ai); /* This may drop log lock */
191
192 if (flags & DIO_ALL)
193 first = NULL;
194
195 while(!done) { 186 while(!done) {
196 if (first && (head->prev != first ||
197 gfs2_ail1_empty_one(sdp, first_ai, 0)))
198 break;
199
200 done = 1; 187 done = 1;
201 list_for_each_entry_safe_reverse(ai, tmp, head, ai_list) { 188 list_for_each_entry_reverse(ai, head, ai_list) {
202 if (ai->ai_sync_gen >= sync_gen) 189 if (ai->ai_sync_gen >= sync_gen)
203 continue; 190 continue;
204 ai->ai_sync_gen = sync_gen; 191 ai->ai_sync_gen = sync_gen;
@@ -290,58 +277,57 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
290 * flush time, so we ensure that we have just enough free blocks at all 277 * flush time, so we ensure that we have just enough free blocks at all
291 * times to avoid running out during a log flush. 278 * times to avoid running out during a log flush.
292 * 279 *
280 * We no longer flush the log here, instead we wake up logd to do that
281 * for us. To avoid the thundering herd and to ensure that we deal fairly
282 * with queued waiters, we use an exclusive wait. This means that when we
283 * get woken with enough journal space to get our reservation, we need to
284 * wake the next waiter on the list.
285 *
293 * Returns: errno 286 * Returns: errno
294 */ 287 */
295 288
296int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) 289int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
297{ 290{
298 unsigned int try = 0;
299 unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); 291 unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
292 unsigned wanted = blks + reserved_blks;
293 DEFINE_WAIT(wait);
294 int did_wait = 0;
295 unsigned int free_blocks;
300 296
301 if (gfs2_assert_warn(sdp, blks) || 297 if (gfs2_assert_warn(sdp, blks) ||
302 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) 298 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
303 return -EINVAL; 299 return -EINVAL;
304 300retry:
305 mutex_lock(&sdp->sd_log_reserve_mutex); 301 free_blocks = atomic_read(&sdp->sd_log_blks_free);
306 gfs2_log_lock(sdp); 302 if (unlikely(free_blocks <= wanted)) {
307 while(atomic_read(&sdp->sd_log_blks_free) <= (blks + reserved_blks)) { 303 do {
308 gfs2_log_unlock(sdp); 304 prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
309 gfs2_ail1_empty(sdp, 0); 305 TASK_UNINTERRUPTIBLE);
310 gfs2_log_flush(sdp, NULL); 306 wake_up(&sdp->sd_logd_waitq);
311 307 did_wait = 1;
312 if (try++) 308 if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
313 gfs2_ail1_start(sdp, 0); 309 io_schedule();
314 gfs2_log_lock(sdp); 310 free_blocks = atomic_read(&sdp->sd_log_blks_free);
311 } while(free_blocks <= wanted);
312 finish_wait(&sdp->sd_log_waitq, &wait);
315 } 313 }
316 atomic_sub(blks, &sdp->sd_log_blks_free); 314 if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
315 free_blocks - blks) != free_blocks)
316 goto retry;
317 trace_gfs2_log_blocks(sdp, -blks); 317 trace_gfs2_log_blocks(sdp, -blks);
318 gfs2_log_unlock(sdp); 318
319 mutex_unlock(&sdp->sd_log_reserve_mutex); 319 /*
320 * If we waited, then so might others, wake them up _after_ we get
321 * our share of the log.
322 */
323 if (unlikely(did_wait))
324 wake_up(&sdp->sd_log_waitq);
320 325
321 down_read(&sdp->sd_log_flush_lock); 326 down_read(&sdp->sd_log_flush_lock);
322 327
323 return 0; 328 return 0;
324} 329}
325 330
326/**
327 * gfs2_log_release - Release a given number of log blocks
328 * @sdp: The GFS2 superblock
329 * @blks: The number of blocks
330 *
331 */
332
333void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
334{
335
336 gfs2_log_lock(sdp);
337 atomic_add(blks, &sdp->sd_log_blks_free);
338 trace_gfs2_log_blocks(sdp, blks);
339 gfs2_assert_withdraw(sdp,
340 atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
341 gfs2_log_unlock(sdp);
342 up_read(&sdp->sd_log_flush_lock);
343}
344
345static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) 331static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
346{ 332{
347 struct gfs2_journal_extent *je; 333 struct gfs2_journal_extent *je;
@@ -559,11 +545,10 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
559 545
560 ail2_empty(sdp, new_tail); 546 ail2_empty(sdp, new_tail);
561 547
562 gfs2_log_lock(sdp);
563 atomic_add(dist, &sdp->sd_log_blks_free); 548 atomic_add(dist, &sdp->sd_log_blks_free);
564 trace_gfs2_log_blocks(sdp, dist); 549 trace_gfs2_log_blocks(sdp, dist);
565 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); 550 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
566 gfs2_log_unlock(sdp); 551 sdp->sd_jdesc->jd_blocks);
567 552
568 sdp->sd_log_tail = new_tail; 553 sdp->sd_log_tail = new_tail;
569} 554}
@@ -615,6 +600,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
615 if (buffer_eopnotsupp(bh)) { 600 if (buffer_eopnotsupp(bh)) {
616 clear_buffer_eopnotsupp(bh); 601 clear_buffer_eopnotsupp(bh);
617 set_buffer_uptodate(bh); 602 set_buffer_uptodate(bh);
603 fs_info(sdp, "barrier sync failed - disabling barriers\n");
618 set_bit(SDF_NOBARRIERS, &sdp->sd_flags); 604 set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
619 lock_buffer(bh); 605 lock_buffer(bh);
620skip_barrier: 606skip_barrier:
@@ -822,6 +808,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
822 * @sdp: the filesystem 808 * @sdp: the filesystem
823 * @tr: the transaction 809 * @tr: the transaction
824 * 810 *
811 * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
812 * or the total number of used blocks (pinned blocks plus AIL blocks)
813 * is greater than thresh2.
814 *
815 * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
816 * journal size.
817 *
825 * Returns: errno 818 * Returns: errno
826 */ 819 */
827 820
@@ -832,10 +825,10 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
832 825
833 up_read(&sdp->sd_log_flush_lock); 826 up_read(&sdp->sd_log_flush_lock);
834 827
835 gfs2_log_lock(sdp); 828 if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
836 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) 829 ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
837 wake_up_process(sdp->sd_logd_process); 830 atomic_read(&sdp->sd_log_thresh2)))
838 gfs2_log_unlock(sdp); 831 wake_up(&sdp->sd_logd_waitq);
839} 832}
840 833
841/** 834/**
@@ -882,13 +875,23 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
882{ 875{
883 gfs2_log_flush(sdp, NULL); 876 gfs2_log_flush(sdp, NULL);
884 for (;;) { 877 for (;;) {
885 gfs2_ail1_start(sdp, DIO_ALL); 878 gfs2_ail1_start(sdp);
886 if (gfs2_ail1_empty(sdp, DIO_ALL)) 879 if (gfs2_ail1_empty(sdp, DIO_ALL))
887 break; 880 break;
888 msleep(10); 881 msleep(10);
889 } 882 }
890} 883}
891 884
885static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
886{
887 return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
888}
889
890static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
891{
892 unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
893 return used_blocks >= atomic_read(&sdp->sd_log_thresh2);
894}
892 895
893/** 896/**
894 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks 897 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
@@ -901,28 +904,43 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
901int gfs2_logd(void *data) 904int gfs2_logd(void *data)
902{ 905{
903 struct gfs2_sbd *sdp = data; 906 struct gfs2_sbd *sdp = data;
904 unsigned long t; 907 unsigned long t = 1;
905 int need_flush; 908 DEFINE_WAIT(wait);
909 unsigned preflush;
906 910
907 while (!kthread_should_stop()) { 911 while (!kthread_should_stop()) {
908 /* Advance the log tail */
909 912
910 t = sdp->sd_log_flush_time + 913 preflush = atomic_read(&sdp->sd_log_pinned);
911 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; 914 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
915 gfs2_ail1_empty(sdp, DIO_ALL);
916 gfs2_log_flush(sdp, NULL);
917 gfs2_ail1_empty(sdp, DIO_ALL);
918 }
912 919
913 gfs2_ail1_empty(sdp, DIO_ALL); 920 if (gfs2_ail_flush_reqd(sdp)) {
914 gfs2_log_lock(sdp); 921 gfs2_ail1_start(sdp);
915 need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); 922 io_schedule();
916 gfs2_log_unlock(sdp); 923 gfs2_ail1_empty(sdp, 0);
917 if (need_flush || time_after_eq(jiffies, t)) {
918 gfs2_log_flush(sdp, NULL); 924 gfs2_log_flush(sdp, NULL);
919 sdp->sd_log_flush_time = jiffies; 925 gfs2_ail1_empty(sdp, DIO_ALL);
920 } 926 }
921 927
928 wake_up(&sdp->sd_log_waitq);
922 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 929 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
923 if (freezing(current)) 930 if (freezing(current))
924 refrigerator(); 931 refrigerator();
925 schedule_timeout_interruptible(t); 932
933 do {
934 prepare_to_wait(&sdp->sd_logd_waitq, &wait,
935 TASK_UNINTERRUPTIBLE);
936 if (!gfs2_ail_flush_reqd(sdp) &&
937 !gfs2_jrnl_flush_reqd(sdp) &&
938 !kthread_should_stop())
939 t = schedule_timeout(t);
940 } while(t && !gfs2_ail_flush_reqd(sdp) &&
941 !gfs2_jrnl_flush_reqd(sdp) &&
942 !kthread_should_stop());
943 finish_wait(&sdp->sd_logd_waitq, &wait);
926 } 944 }
927 945
928 return 0; 946 return 0;
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 7c64510ccfd2..eb570b4ad443 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -51,7 +51,6 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
51 unsigned int ssize); 51 unsigned int ssize);
52 52
53int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 53int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
54void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
55void gfs2_log_incr_head(struct gfs2_sbd *sdp); 54void gfs2_log_incr_head(struct gfs2_sbd *sdp);
56 55
57struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); 56struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index adc260fbea90..bf33f822058d 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -54,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
54 if (bd->bd_ail) 54 if (bd->bd_ail)
55 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 55 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
56 get_bh(bh); 56 get_bh(bh);
57 atomic_inc(&sdp->sd_log_pinned);
57 trace_gfs2_pin(bd, 1); 58 trace_gfs2_pin(bd, 1);
58} 59}
59 60
@@ -94,6 +95,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
94 trace_gfs2_pin(bd, 0); 95 trace_gfs2_pin(bd, 0);
95 gfs2_log_unlock(sdp); 96 gfs2_log_unlock(sdp);
96 unlock_buffer(bh); 97 unlock_buffer(bh);
98 atomic_dec(&sdp->sd_log_pinned);
97} 99}
98 100
99 101
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index a88fadc704bb..fb2a5f93b7c3 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -94,7 +94,7 @@ static int __init init_gfs2_fs(void)
94 if (!gfs2_glock_cachep) 94 if (!gfs2_glock_cachep)
95 goto fail; 95 goto fail;
96 96
97 gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock (aspace)", 97 gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock(aspace)",
98 sizeof(struct gfs2_glock) + 98 sizeof(struct gfs2_glock) +
99 sizeof(struct address_space), 99 sizeof(struct address_space),
100 0, 0, gfs2_init_gl_aspace_once); 100 0, 0, gfs2_init_gl_aspace_once);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0bb12c80937a..18176d0b75d7 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -34,7 +34,6 @@
34 34
35static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) 35static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
36{ 36{
37 int err;
38 struct buffer_head *bh, *head; 37 struct buffer_head *bh, *head;
39 int nr_underway = 0; 38 int nr_underway = 0;
40 int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? 39 int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
@@ -86,11 +85,10 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
86 } while (bh != head); 85 } while (bh != head);
87 unlock_page(page); 86 unlock_page(page);
88 87
89 err = 0;
90 if (nr_underway == 0) 88 if (nr_underway == 0)
91 end_page_writeback(page); 89 end_page_writeback(page);
92 90
93 return err; 91 return 0;
94} 92}
95 93
96const struct address_space_operations gfs2_meta_aops = { 94const struct address_space_operations gfs2_meta_aops = {
@@ -313,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
313 struct gfs2_bufdata *bd = bh->b_private; 311 struct gfs2_bufdata *bd = bh->b_private;
314 312
315 if (test_clear_buffer_pinned(bh)) { 313 if (test_clear_buffer_pinned(bh)) {
314 atomic_dec(&sdp->sd_log_pinned);
316 list_del_init(&bd->bd_le.le_list); 315 list_del_init(&bd->bd_le.le_list);
317 if (meta) { 316 if (meta) {
318 gfs2_assert_warn(sdp, sdp->sd_log_num_buf); 317 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c1309ed1c496..3593b3a7290e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -57,8 +57,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
57{ 57{
58 spin_lock_init(&gt->gt_spin); 58 spin_lock_init(&gt->gt_spin);
59 59
60 gt->gt_incore_log_blocks = 1024;
61 gt->gt_logd_secs = 1;
62 gt->gt_quota_simul_sync = 64; 60 gt->gt_quota_simul_sync = 64;
63 gt->gt_quota_warn_period = 10; 61 gt->gt_quota_warn_period = 10;
64 gt->gt_quota_scale_num = 1; 62 gt->gt_quota_scale_num = 1;
@@ -101,14 +99,15 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
101 spin_lock_init(&sdp->sd_trunc_lock); 99 spin_lock_init(&sdp->sd_trunc_lock);
102 100
103 spin_lock_init(&sdp->sd_log_lock); 101 spin_lock_init(&sdp->sd_log_lock);
104 102 atomic_set(&sdp->sd_log_pinned, 0);
105 INIT_LIST_HEAD(&sdp->sd_log_le_buf); 103 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
106 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 104 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
107 INIT_LIST_HEAD(&sdp->sd_log_le_rg); 105 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
108 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 106 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
109 INIT_LIST_HEAD(&sdp->sd_log_le_ordered); 107 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
110 108
111 mutex_init(&sdp->sd_log_reserve_mutex); 109 init_waitqueue_head(&sdp->sd_log_waitq);
110 init_waitqueue_head(&sdp->sd_logd_waitq);
112 INIT_LIST_HEAD(&sdp->sd_ail1_list); 111 INIT_LIST_HEAD(&sdp->sd_ail1_list);
113 INIT_LIST_HEAD(&sdp->sd_ail2_list); 112 INIT_LIST_HEAD(&sdp->sd_ail2_list);
114 113
@@ -487,7 +486,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
487 struct dentry *dentry; 486 struct dentry *dentry;
488 struct inode *inode; 487 struct inode *inode;
489 488
490 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); 489 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
491 if (IS_ERR(inode)) { 490 if (IS_ERR(inode)) {
492 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); 491 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
493 return PTR_ERR(inode); 492 return PTR_ERR(inode);
@@ -733,6 +732,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
733 if (sdp->sd_args.ar_spectator) { 732 if (sdp->sd_args.ar_spectator) {
734 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); 733 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
735 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); 734 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
735 atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
736 atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
736 } else { 737 } else {
737 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { 738 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
738 fs_err(sdp, "can't mount journal #%u\n", 739 fs_err(sdp, "can't mount journal #%u\n",
@@ -770,6 +771,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
770 goto fail_jinode_gh; 771 goto fail_jinode_gh;
771 } 772 }
772 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); 773 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
774 atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
775 atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
773 776
774 /* Map the extents for this journal's blocks */ 777 /* Map the extents for this journal's blocks */
775 map_journal_extents(sdp); 778 map_journal_extents(sdp);
@@ -951,8 +954,6 @@ static int init_threads(struct gfs2_sbd *sdp, int undo)
951 if (undo) 954 if (undo)
952 goto fail_quotad; 955 goto fail_quotad;
953 956
954 sdp->sd_log_flush_time = jiffies;
955
956 p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); 957 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
957 error = IS_ERR(p); 958 error = IS_ERR(p);
958 if (error) { 959 if (error) {
@@ -1160,7 +1161,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1160 GFS2_BASIC_BLOCK_SHIFT; 1161 GFS2_BASIC_BLOCK_SHIFT;
1161 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; 1162 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
1162 1163
1163 sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; 1164 sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
1164 sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum; 1165 sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum;
1165 if (sdp->sd_args.ar_statfs_quantum) { 1166 if (sdp->sd_args.ar_statfs_quantum) {
1166 sdp->sd_tune.gt_statfs_slow = 0; 1167 sdp->sd_tune.gt_statfs_slow = 0;
@@ -1323,7 +1324,7 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1323 memset(&args, 0, sizeof(args)); 1324 memset(&args, 0, sizeof(args));
1324 args.ar_quota = GFS2_QUOTA_DEFAULT; 1325 args.ar_quota = GFS2_QUOTA_DEFAULT;
1325 args.ar_data = GFS2_DATA_DEFAULT; 1326 args.ar_data = GFS2_DATA_DEFAULT;
1326 args.ar_commit = 60; 1327 args.ar_commit = 30;
1327 args.ar_statfs_quantum = 30; 1328 args.ar_statfs_quantum = 30;
1328 args.ar_quota_quantum = 60; 1329 args.ar_quota_quantum = 60;
1329 args.ar_errors = GFS2_ERRORS_DEFAULT; 1330 args.ar_errors = GFS2_ERRORS_DEFAULT;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 6dbcbad6ab17..49667d68769e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -637,15 +637,40 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
637 unsigned blocksize, iblock, pos; 637 unsigned blocksize, iblock, pos;
638 struct buffer_head *bh, *dibh; 638 struct buffer_head *bh, *dibh;
639 struct page *page; 639 struct page *page;
640 void *kaddr; 640 void *kaddr, *ptr;
641 struct gfs2_quota *qp; 641 struct gfs2_quota q, *qp;
642 s64 value; 642 int err, nbytes;
643 int err = -EIO;
644 u64 size; 643 u64 size;
645 644
646 if (gfs2_is_stuffed(ip)) 645 if (gfs2_is_stuffed(ip))
647 gfs2_unstuff_dinode(ip, NULL); 646 gfs2_unstuff_dinode(ip, NULL);
648 647
648 memset(&q, 0, sizeof(struct gfs2_quota));
649 err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q));
650 if (err < 0)
651 return err;
652
653 err = -EIO;
654 qp = &q;
655 qp->qu_value = be64_to_cpu(qp->qu_value);
656 qp->qu_value += change;
657 qp->qu_value = cpu_to_be64(qp->qu_value);
658 qd->qd_qb.qb_value = qp->qu_value;
659 if (fdq) {
660 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
661 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
662 qd->qd_qb.qb_warn = qp->qu_warn;
663 }
664 if (fdq->d_fieldmask & FS_DQ_BHARD) {
665 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
666 qd->qd_qb.qb_limit = qp->qu_limit;
667 }
668 }
669
670 /* Write the quota into the quota file on disk */
671 ptr = qp;
672 nbytes = sizeof(struct gfs2_quota);
673get_a_page:
649 page = grab_cache_page(mapping, index); 674 page = grab_cache_page(mapping, index);
650 if (!page) 675 if (!page)
651 return -ENOMEM; 676 return -ENOMEM;
@@ -667,7 +692,12 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
667 if (!buffer_mapped(bh)) { 692 if (!buffer_mapped(bh)) {
668 gfs2_block_map(inode, iblock, bh, 1); 693 gfs2_block_map(inode, iblock, bh, 1);
669 if (!buffer_mapped(bh)) 694 if (!buffer_mapped(bh))
670 goto unlock; 695 goto unlock_out;
696 /* If it's a newly allocated disk block for quota, zero it */
697 if (buffer_new(bh)) {
698 memset(bh->b_data, 0, bh->b_size);
699 set_buffer_uptodate(bh);
700 }
671 } 701 }
672 702
673 if (PageUptodate(page)) 703 if (PageUptodate(page))
@@ -677,32 +707,34 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
677 ll_rw_block(READ_META, 1, &bh); 707 ll_rw_block(READ_META, 1, &bh);
678 wait_on_buffer(bh); 708 wait_on_buffer(bh);
679 if (!buffer_uptodate(bh)) 709 if (!buffer_uptodate(bh))
680 goto unlock; 710 goto unlock_out;
681 } 711 }
682 712
683 gfs2_trans_add_bh(ip->i_gl, bh, 0); 713 gfs2_trans_add_bh(ip->i_gl, bh, 0);
684 714
685 kaddr = kmap_atomic(page, KM_USER0); 715 kaddr = kmap_atomic(page, KM_USER0);
686 qp = kaddr + offset; 716 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
687 value = (s64)be64_to_cpu(qp->qu_value) + change; 717 nbytes = PAGE_CACHE_SIZE - offset;
688 qp->qu_value = cpu_to_be64(value); 718 memcpy(kaddr + offset, ptr, nbytes);
689 qd->qd_qb.qb_value = qp->qu_value;
690 if (fdq) {
691 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
692 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
693 qd->qd_qb.qb_warn = qp->qu_warn;
694 }
695 if (fdq->d_fieldmask & FS_DQ_BHARD) {
696 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
697 qd->qd_qb.qb_limit = qp->qu_limit;
698 }
699 }
700 flush_dcache_page(page); 719 flush_dcache_page(page);
701 kunmap_atomic(kaddr, KM_USER0); 720 kunmap_atomic(kaddr, KM_USER0);
721 unlock_page(page);
722 page_cache_release(page);
702 723
724 /* If quota straddles page boundary, we need to update the rest of the
725 * quota at the beginning of the next page */
726 if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */
727 ptr = ptr + nbytes;
728 nbytes = sizeof(struct gfs2_quota) - nbytes;
729 offset = 0;
730 index++;
731 goto get_a_page;
732 }
733
734 /* Update the disk inode timestamp and size (if extended) */
703 err = gfs2_meta_inode_buffer(ip, &dibh); 735 err = gfs2_meta_inode_buffer(ip, &dibh);
704 if (err) 736 if (err)
705 goto unlock; 737 goto out;
706 738
707 size = loc + sizeof(struct gfs2_quota); 739 size = loc + sizeof(struct gfs2_quota);
708 if (size > inode->i_size) { 740 if (size > inode->i_size) {
@@ -715,7 +747,9 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
715 brelse(dibh); 747 brelse(dibh);
716 mark_inode_dirty(inode); 748 mark_inode_dirty(inode);
717 749
718unlock: 750out:
751 return err;
752unlock_out:
719 unlock_page(page); 753 unlock_page(page);
720 page_cache_release(page); 754 page_cache_release(page);
721 return err; 755 return err;
@@ -779,8 +813,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
779 * rgrp since it won't be allocated during the transaction 813 * rgrp since it won't be allocated during the transaction
780 */ 814 */
781 al->al_requested = 1; 815 al->al_requested = 1;
782 /* +1 in the end for block requested above for unstuffing */ 816 /* +3 in the end for unstuffing block, inode size update block
783 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 1; 817 * and another block in case quota straddles page boundary and
818 * two blocks need to be updated instead of 1 */
819 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
784 820
785 if (nalloc) 821 if (nalloc)
786 al->al_requested += nalloc * (data_blocks + ind_blocks); 822 al->al_requested += nalloc * (data_blocks + ind_blocks);
@@ -1418,10 +1454,18 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
1418 1454
1419 memset(fqs, 0, sizeof(struct fs_quota_stat)); 1455 memset(fqs, 0, sizeof(struct fs_quota_stat));
1420 fqs->qs_version = FS_QSTAT_VERSION; 1456 fqs->qs_version = FS_QSTAT_VERSION;
1421 if (sdp->sd_args.ar_quota == GFS2_QUOTA_ON) 1457
1422 fqs->qs_flags = (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD); 1458 switch (sdp->sd_args.ar_quota) {
1423 else if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT) 1459 case GFS2_QUOTA_ON:
1424 fqs->qs_flags = (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT); 1460 fqs->qs_flags |= (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD);
1461 /*FALLTHRU*/
1462 case GFS2_QUOTA_ACCOUNT:
1463 fqs->qs_flags |= (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT);
1464 break;
1465 case GFS2_QUOTA_OFF:
1466 break;
1467 }
1468
1425 if (sdp->sd_quota_inode) { 1469 if (sdp->sd_quota_inode) {
1426 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr; 1470 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr;
1427 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks; 1471 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks;
@@ -1432,8 +1476,8 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
1432 return 0; 1476 return 0;
1433} 1477}
1434 1478
1435static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id, 1479static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
1436 struct fs_disk_quota *fdq) 1480 struct fs_disk_quota *fdq)
1437{ 1481{
1438 struct gfs2_sbd *sdp = sb->s_fs_info; 1482 struct gfs2_sbd *sdp = sb->s_fs_info;
1439 struct gfs2_quota_lvb *qlvb; 1483 struct gfs2_quota_lvb *qlvb;
@@ -1477,8 +1521,8 @@ out:
1477/* GFS2 only supports a subset of the XFS fields */ 1521/* GFS2 only supports a subset of the XFS fields */
1478#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) 1522#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
1479 1523
1480static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id, 1524static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1481 struct fs_disk_quota *fdq) 1525 struct fs_disk_quota *fdq)
1482{ 1526{
1483 struct gfs2_sbd *sdp = sb->s_fs_info; 1527 struct gfs2_sbd *sdp = sb->s_fs_info;
1484 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); 1528 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
@@ -1585,7 +1629,7 @@ out_put:
1585const struct quotactl_ops gfs2_quotactl_ops = { 1629const struct quotactl_ops gfs2_quotactl_ops = {
1586 .quota_sync = gfs2_quota_sync, 1630 .quota_sync = gfs2_quota_sync,
1587 .get_xstate = gfs2_quota_get_xstate, 1631 .get_xstate = gfs2_quota_get_xstate,
1588 .get_xquota = gfs2_xquota_get, 1632 .get_dqblk = gfs2_get_dqblk,
1589 .set_xquota = gfs2_xquota_set, 1633 .set_dqblk = gfs2_set_dqblk,
1590}; 1634};
1591 1635
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 503b842f3ba2..117fa4171f62 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -854,7 +854,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
854 if ((start + nr_sects) != blk) { 854 if ((start + nr_sects) != blk) {
855 rv = blkdev_issue_discard(bdev, start, 855 rv = blkdev_issue_discard(bdev, start,
856 nr_sects, GFP_NOFS, 856 nr_sects, GFP_NOFS,
857 DISCARD_FL_BARRIER); 857 BLKDEV_IFL_WAIT |
858 BLKDEV_IFL_BARRIER);
858 if (rv) 859 if (rv)
859 goto fail; 860 goto fail;
860 nr_sects = 0; 861 nr_sects = 0;
@@ -869,7 +870,7 @@ start_new_extent:
869 } 870 }
870 if (nr_sects) { 871 if (nr_sects) {
871 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 872 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
872 DISCARD_FL_BARRIER); 873 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
873 if (rv) 874 if (rv)
874 goto fail; 875 goto fail;
875 } 876 }
@@ -948,13 +949,13 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
948 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 949 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
949 * @rgd: The rgrp 950 * @rgd: The rgrp
950 * 951 *
951 * Returns: The inode, if one has been found 952 * Returns: 0 if no error
953 * The inode, if one has been found, in inode.
952 */ 954 */
953 955
954static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, 956static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
955 u64 skip) 957 u64 skip)
956{ 958{
957 struct inode *inode;
958 u32 goal = 0, block; 959 u32 goal = 0, block;
959 u64 no_addr; 960 u64 no_addr;
960 struct gfs2_sbd *sdp = rgd->rd_sbd; 961 struct gfs2_sbd *sdp = rgd->rd_sbd;
@@ -979,14 +980,11 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
979 if (no_addr == skip) 980 if (no_addr == skip)
980 continue; 981 continue;
981 *last_unlinked = no_addr; 982 *last_unlinked = no_addr;
982 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, 983 return no_addr;
983 no_addr, -1, 1);
984 if (!IS_ERR(inode))
985 return inode;
986 } 984 }
987 985
988 rgd->rd_flags &= ~GFS2_RDF_CHECK; 986 rgd->rd_flags &= ~GFS2_RDF_CHECK;
989 return NULL; 987 return 0;
990} 988}
991 989
992/** 990/**
@@ -1067,11 +1065,12 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
1067 * Try to acquire rgrp in way which avoids contending with others. 1065 * Try to acquire rgrp in way which avoids contending with others.
1068 * 1066 *
1069 * Returns: errno 1067 * Returns: errno
1068 * unlinked: the block address of an unlinked block to be reclaimed
1070 */ 1069 */
1071 1070
1072static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) 1071static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
1072 u64 *last_unlinked)
1073{ 1073{
1074 struct inode *inode = NULL;
1075 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1074 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1076 struct gfs2_rgrpd *rgd, *begin = NULL; 1075 struct gfs2_rgrpd *rgd, *begin = NULL;
1077 struct gfs2_alloc *al = ip->i_alloc; 1076 struct gfs2_alloc *al = ip->i_alloc;
@@ -1080,6 +1079,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1080 int loops = 0; 1079 int loops = 0;
1081 int error, rg_locked; 1080 int error, rg_locked;
1082 1081
1082 *unlinked = 0;
1083 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); 1083 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
1084 1084
1085 while (rgd) { 1085 while (rgd) {
@@ -1096,19 +1096,24 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1096 case 0: 1096 case 0:
1097 if (try_rgrp_fit(rgd, al)) 1097 if (try_rgrp_fit(rgd, al))
1098 goto out; 1098 goto out;
1099 if (rgd->rd_flags & GFS2_RDF_CHECK) 1099 /* If the rg came in already locked, there's no
1100 inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1100 way we can recover from a failed try_rgrp_unlink
1101 because that would require an iput which can only
1102 happen after the rgrp is unlocked. */
1103 if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
1104 *unlinked = try_rgrp_unlink(rgd, last_unlinked,
1105 ip->i_no_addr);
1101 if (!rg_locked) 1106 if (!rg_locked)
1102 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1107 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1103 if (inode) 1108 if (*unlinked)
1104 return inode; 1109 return -EAGAIN;
1105 /* fall through */ 1110 /* fall through */
1106 case GLR_TRYFAILED: 1111 case GLR_TRYFAILED:
1107 rgd = recent_rgrp_next(rgd); 1112 rgd = recent_rgrp_next(rgd);
1108 break; 1113 break;
1109 1114
1110 default: 1115 default:
1111 return ERR_PTR(error); 1116 return error;
1112 } 1117 }
1113 } 1118 }
1114 1119
@@ -1130,12 +1135,13 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1130 case 0: 1135 case 0:
1131 if (try_rgrp_fit(rgd, al)) 1136 if (try_rgrp_fit(rgd, al))
1132 goto out; 1137 goto out;
1133 if (rgd->rd_flags & GFS2_RDF_CHECK) 1138 if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
1134 inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1139 *unlinked = try_rgrp_unlink(rgd, last_unlinked,
1140 ip->i_no_addr);
1135 if (!rg_locked) 1141 if (!rg_locked)
1136 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1142 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1137 if (inode) 1143 if (*unlinked)
1138 return inode; 1144 return -EAGAIN;
1139 break; 1145 break;
1140 1146
1141 case GLR_TRYFAILED: 1147 case GLR_TRYFAILED:
@@ -1143,7 +1149,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1143 break; 1149 break;
1144 1150
1145 default: 1151 default:
1146 return ERR_PTR(error); 1152 return error;
1147 } 1153 }
1148 1154
1149 rgd = gfs2_rgrpd_get_next(rgd); 1155 rgd = gfs2_rgrpd_get_next(rgd);
@@ -1152,7 +1158,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1152 1158
1153 if (rgd == begin) { 1159 if (rgd == begin) {
1154 if (++loops >= 3) 1160 if (++loops >= 3)
1155 return ERR_PTR(-ENOSPC); 1161 return -ENOSPC;
1156 if (!skipped) 1162 if (!skipped)
1157 loops++; 1163 loops++;
1158 flags = 0; 1164 flags = 0;
@@ -1172,7 +1178,7 @@ out:
1172 forward_rgrp_set(sdp, rgd); 1178 forward_rgrp_set(sdp, rgd);
1173 } 1179 }
1174 1180
1175 return NULL; 1181 return 0;
1176} 1182}
1177 1183
1178/** 1184/**
@@ -1188,7 +1194,7 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
1188 struct gfs2_alloc *al = ip->i_alloc; 1194 struct gfs2_alloc *al = ip->i_alloc;
1189 struct inode *inode; 1195 struct inode *inode;
1190 int error = 0; 1196 int error = 0;
1191 u64 last_unlinked = NO_BLOCK; 1197 u64 last_unlinked = NO_BLOCK, unlinked;
1192 1198
1193 if (gfs2_assert_warn(sdp, al->al_requested)) 1199 if (gfs2_assert_warn(sdp, al->al_requested))
1194 return -EINVAL; 1200 return -EINVAL;
@@ -1204,14 +1210,19 @@ try_again:
1204 if (error) 1210 if (error)
1205 return error; 1211 return error;
1206 1212
1207 inode = get_local_rgrp(ip, &last_unlinked); 1213 error = get_local_rgrp(ip, &unlinked, &last_unlinked);
1208 if (inode) { 1214 if (error) {
1209 if (ip != GFS2_I(sdp->sd_rindex)) 1215 if (ip != GFS2_I(sdp->sd_rindex))
1210 gfs2_glock_dq_uninit(&al->al_ri_gh); 1216 gfs2_glock_dq_uninit(&al->al_ri_gh);
1211 if (IS_ERR(inode)) 1217 if (error != -EAGAIN)
1212 return PTR_ERR(inode); 1218 return error;
1213 iput(inode); 1219 error = gfs2_unlinked_inode_lookup(ip->i_inode.i_sb,
1220 unlinked, &inode);
1221 if (inode)
1222 iput(inode);
1214 gfs2_log_flush(sdp, NULL); 1223 gfs2_log_flush(sdp, NULL);
1224 if (error == GLR_TRYFAILED)
1225 error = 0;
1215 goto try_again; 1226 goto try_again;
1216 } 1227 }
1217 1228
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 50aac606b990..4d1aad38f1b1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1113,7 +1113,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1113 int error; 1113 int error;
1114 1114
1115 spin_lock(&gt->gt_spin); 1115 spin_lock(&gt->gt_spin);
1116 args.ar_commit = gt->gt_log_flush_secs; 1116 args.ar_commit = gt->gt_logd_secs;
1117 args.ar_quota_quantum = gt->gt_quota_quantum; 1117 args.ar_quota_quantum = gt->gt_quota_quantum;
1118 if (gt->gt_statfs_slow) 1118 if (gt->gt_statfs_slow)
1119 args.ar_statfs_quantum = 0; 1119 args.ar_statfs_quantum = 0;
@@ -1160,7 +1160,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1160 else 1160 else
1161 clear_bit(SDF_NOBARRIERS, &sdp->sd_flags); 1161 clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
1162 spin_lock(&gt->gt_spin); 1162 spin_lock(&gt->gt_spin);
1163 gt->gt_log_flush_secs = args.ar_commit; 1163 gt->gt_logd_secs = args.ar_commit;
1164 gt->gt_quota_quantum = args.ar_quota_quantum; 1164 gt->gt_quota_quantum = args.ar_quota_quantum;
1165 if (args.ar_statfs_quantum) { 1165 if (args.ar_statfs_quantum) {
1166 gt->gt_statfs_slow = 0; 1166 gt->gt_statfs_slow = 0;
@@ -1305,8 +1305,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1305 } 1305 }
1306 if (args->ar_discard) 1306 if (args->ar_discard)
1307 seq_printf(s, ",discard"); 1307 seq_printf(s, ",discard");
1308 val = sdp->sd_tune.gt_log_flush_secs; 1308 val = sdp->sd_tune.gt_logd_secs;
1309 if (val != 60) 1309 if (val != 30)
1310 seq_printf(s, ",commit=%d", val); 1310 seq_printf(s, ",commit=%d", val);
1311 val = sdp->sd_tune.gt_statfs_quantum; 1311 val = sdp->sd_tune.gt_statfs_quantum;
1312 if (val != 30) 1312 if (val != 30)
@@ -1334,7 +1334,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1334 } 1334 }
1335 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) 1335 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1336 seq_printf(s, ",nobarrier"); 1336 seq_printf(s, ",nobarrier");
1337 1337 if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
1338 seq_printf(s, ",demote_interface_used");
1338 return 0; 1339 return 0;
1339} 1340}
1340 1341
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 3df60f2d84e3..a0464680af0b 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -54,7 +54,7 @@ extern struct file_system_type gfs2meta_fs_type;
54extern const struct export_operations gfs2_export_ops; 54extern const struct export_operations gfs2_export_ops;
55extern const struct super_operations gfs2_super_ops; 55extern const struct super_operations gfs2_super_ops;
56extern const struct dentry_operations gfs2_dops; 56extern const struct dentry_operations gfs2_dops;
57extern struct xattr_handler *gfs2_xattr_handlers[]; 57extern const struct xattr_handler *gfs2_xattr_handlers[];
58 58
59#endif /* __SUPER_DOT_H__ */ 59#endif /* __SUPER_DOT_H__ */
60 60
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 54fd98425991..37f5393e68e6 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -232,6 +232,8 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len
232 glops = gfs2_glops_list[gltype]; 232 glops = gfs2_glops_list[gltype];
233 if (glops == NULL) 233 if (glops == NULL)
234 return -EINVAL; 234 return -EINVAL;
235 if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags))
236 fs_info(sdp, "demote interface used\n");
235 rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl); 237 rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl);
236 if (rv) 238 if (rv)
237 return rv; 239 return rv;
@@ -468,8 +470,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
468} \ 470} \
469TUNE_ATTR_2(name, name##_store) 471TUNE_ATTR_2(name, name##_store)
470 472
471TUNE_ATTR(incore_log_blocks, 0);
472TUNE_ATTR(log_flush_secs, 0);
473TUNE_ATTR(quota_warn_period, 0); 473TUNE_ATTR(quota_warn_period, 0);
474TUNE_ATTR(quota_quantum, 0); 474TUNE_ATTR(quota_quantum, 0);
475TUNE_ATTR(max_readahead, 0); 475TUNE_ATTR(max_readahead, 0);
@@ -481,8 +481,6 @@ TUNE_ATTR(statfs_quantum, 1);
481TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); 481TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
482 482
483static struct attribute *tune_attrs[] = { 483static struct attribute *tune_attrs[] = {
484 &tune_attr_incore_log_blocks.attr,
485 &tune_attr_log_flush_secs.attr,
486 &tune_attr_quota_warn_period.attr, 484 &tune_attr_quota_warn_period.attr,
487 &tune_attr_quota_quantum.attr, 485 &tune_attr_quota_quantum.attr,
488 &tune_attr_max_readahead.attr, 486 &tune_attr_max_readahead.attr,
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 4ef0e9fa3549..9ec73a854111 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -23,6 +23,7 @@
23#include "meta_io.h" 23#include "meta_io.h"
24#include "trans.h" 24#include "trans.h"
25#include "util.h" 25#include "util.h"
26#include "trace_gfs2.h"
26 27
27int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, 28int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
28 unsigned int revokes) 29 unsigned int revokes)
@@ -75,6 +76,23 @@ fail_holder_uninit:
75 return error; 76 return error;
76} 77}
77 78
79/**
80 * gfs2_log_release - Release a given number of log blocks
81 * @sdp: The GFS2 superblock
82 * @blks: The number of blocks
83 *
84 */
85
86static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
87{
88
89 atomic_add(blks, &sdp->sd_log_blks_free);
90 trace_gfs2_log_blocks(sdp, blks);
91 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
92 sdp->sd_jdesc->jd_blocks);
93 up_read(&sdp->sd_log_flush_lock);
94}
95
78void gfs2_trans_end(struct gfs2_sbd *sdp) 96void gfs2_trans_end(struct gfs2_sbd *sdp)
79{ 97{
80 struct gfs2_trans *tr = current->journal_info; 98 struct gfs2_trans *tr = current->journal_info;
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index c2ebdf2c01d4..82f93da00d1b 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1535,21 +1535,21 @@ out_alloc:
1535 return error; 1535 return error;
1536} 1536}
1537 1537
1538static struct xattr_handler gfs2_xattr_user_handler = { 1538static const struct xattr_handler gfs2_xattr_user_handler = {
1539 .prefix = XATTR_USER_PREFIX, 1539 .prefix = XATTR_USER_PREFIX,
1540 .flags = GFS2_EATYPE_USR, 1540 .flags = GFS2_EATYPE_USR,
1541 .get = gfs2_xattr_get, 1541 .get = gfs2_xattr_get,
1542 .set = gfs2_xattr_set, 1542 .set = gfs2_xattr_set,
1543}; 1543};
1544 1544
1545static struct xattr_handler gfs2_xattr_security_handler = { 1545static const struct xattr_handler gfs2_xattr_security_handler = {
1546 .prefix = XATTR_SECURITY_PREFIX, 1546 .prefix = XATTR_SECURITY_PREFIX,
1547 .flags = GFS2_EATYPE_SECURITY, 1547 .flags = GFS2_EATYPE_SECURITY,
1548 .get = gfs2_xattr_get, 1548 .get = gfs2_xattr_get,
1549 .set = gfs2_xattr_set, 1549 .set = gfs2_xattr_set,
1550}; 1550};
1551 1551
1552struct xattr_handler *gfs2_xattr_handlers[] = { 1552const struct xattr_handler *gfs2_xattr_handlers[] = {
1553 &gfs2_xattr_user_handler, 1553 &gfs2_xattr_user_handler,
1554 &gfs2_xattr_security_handler, 1554 &gfs2_xattr_security_handler,
1555 &gfs2_xattr_system_handler, 1555 &gfs2_xattr_system_handler,
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 5f4023678251..764fd1bdca88 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -494,7 +494,7 @@ const struct inode_operations hfsplus_dir_inode_operations = {
494const struct file_operations hfsplus_dir_operations = { 494const struct file_operations hfsplus_dir_operations = {
495 .read = generic_read_dir, 495 .read = generic_read_dir,
496 .readdir = hfsplus_readdir, 496 .readdir = hfsplus_readdir,
497 .ioctl = hfsplus_ioctl, 497 .unlocked_ioctl = hfsplus_ioctl,
498 .llseek = generic_file_llseek, 498 .llseek = generic_file_llseek,
499 .release = hfsplus_dir_release, 499 .release = hfsplus_dir_release,
500}; 500};
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 5c10d803d9df..6505c30ad965 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -337,8 +337,7 @@ struct inode *hfsplus_new_inode(struct super_block *, int);
337void hfsplus_delete_inode(struct inode *); 337void hfsplus_delete_inode(struct inode *);
338 338
339/* ioctl.c */ 339/* ioctl.c */
340int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 340long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
341 unsigned long arg);
342int hfsplus_setxattr(struct dentry *dentry, const char *name, 341int hfsplus_setxattr(struct dentry *dentry, const char *name,
343 const void *value, size_t size, int flags); 342 const void *value, size_t size, int flags);
344ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, 343ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 1bcf597c0562..9bbb82924a22 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -285,7 +285,7 @@ static const struct file_operations hfsplus_file_operations = {
285 .fsync = file_fsync, 285 .fsync = file_fsync,
286 .open = hfsplus_file_open, 286 .open = hfsplus_file_open,
287 .release = hfsplus_file_release, 287 .release = hfsplus_file_release,
288 .ioctl = hfsplus_ioctl, 288 .unlocked_ioctl = hfsplus_ioctl,
289}; 289};
290 290
291struct inode *hfsplus_new_inode(struct super_block *sb, int mode) 291struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index f457d2ca51ab..ac405f099026 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -17,14 +17,16 @@
17#include <linux/mount.h> 17#include <linux/mount.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/xattr.h> 19#include <linux/xattr.h>
20#include <linux/smp_lock.h>
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include "hfsplus_fs.h" 22#include "hfsplus_fs.h"
22 23
23int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 24long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
24 unsigned long arg)
25{ 25{
26 struct inode *inode = filp->f_path.dentry->d_inode;
26 unsigned int flags; 27 unsigned int flags;
27 28
29 lock_kernel();
28 switch (cmd) { 30 switch (cmd) {
29 case HFSPLUS_IOC_EXT2_GETFLAGS: 31 case HFSPLUS_IOC_EXT2_GETFLAGS:
30 flags = 0; 32 flags = 0;
@@ -38,8 +40,10 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
38 case HFSPLUS_IOC_EXT2_SETFLAGS: { 40 case HFSPLUS_IOC_EXT2_SETFLAGS: {
39 int err = 0; 41 int err = 0;
40 err = mnt_want_write(filp->f_path.mnt); 42 err = mnt_want_write(filp->f_path.mnt);
41 if (err) 43 if (err) {
44 unlock_kernel();
42 return err; 45 return err;
46 }
43 47
44 if (!is_owner_or_cap(inode)) { 48 if (!is_owner_or_cap(inode)) {
45 err = -EACCES; 49 err = -EACCES;
@@ -85,9 +89,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
85 mark_inode_dirty(inode); 89 mark_inode_dirty(inode);
86setflags_out: 90setflags_out:
87 mnt_drop_write(filp->f_path.mnt); 91 mnt_drop_write(filp->f_path.mnt);
92 unlock_kernel();
88 return err; 93 return err;
89 } 94 }
90 default: 95 default:
96 unlock_kernel();
91 return -ENOTTY; 97 return -ENOTTY;
92 } 98 }
93} 99}
diff --git a/fs/inode.c b/fs/inode.c
index 407bf392e20a..2bee20ae3d65 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -286,11 +286,9 @@ static void init_once(void *foo)
286 */ 286 */
287void __iget(struct inode *inode) 287void __iget(struct inode *inode)
288{ 288{
289 if (atomic_read(&inode->i_count)) { 289 if (atomic_inc_return(&inode->i_count) != 1)
290 atomic_inc(&inode->i_count);
291 return; 290 return;
292 } 291
293 atomic_inc(&inode->i_count);
294 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 292 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
295 list_move(&inode->i_list, &inode_in_use); 293 list_move(&inode->i_list, &inode_in_use);
296 inodes_stat.nr_unused--; 294 inodes_stat.nr_unused--;
@@ -1205,8 +1203,6 @@ void generic_delete_inode(struct inode *inode)
1205 inodes_stat.nr_inodes--; 1203 inodes_stat.nr_inodes--;
1206 spin_unlock(&inode_lock); 1204 spin_unlock(&inode_lock);
1207 1205
1208 security_inode_delete(inode);
1209
1210 if (op->delete_inode) { 1206 if (op->delete_inode) {
1211 void (*delete)(struct inode *) = op->delete_inode; 1207 void (*delete)(struct inode *) = op->delete_inode;
1212 /* Filesystems implementing their own 1208 /* Filesystems implementing their own
@@ -1610,3 +1606,23 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1610 inode->i_ino); 1606 inode->i_ino);
1611} 1607}
1612EXPORT_SYMBOL(init_special_inode); 1608EXPORT_SYMBOL(init_special_inode);
1609
1610/**
1611 * Init uid,gid,mode for new inode according to posix standards
1612 * @inode: New inode
1613 * @dir: Directory inode
1614 * @mode: mode of the new inode
1615 */
1616void inode_init_owner(struct inode *inode, const struct inode *dir,
1617 mode_t mode)
1618{
1619 inode->i_uid = current_fsuid();
1620 if (dir && dir->i_mode & S_ISGID) {
1621 inode->i_gid = dir->i_gid;
1622 if (S_ISDIR(mode))
1623 mode |= S_ISGID;
1624 } else
1625 inode->i_gid = current_fsgid();
1626 inode->i_mode = mode;
1627}
1628EXPORT_SYMBOL(inode_init_owner);
diff --git a/fs/internal.h b/fs/internal.h
index 8a03a5447bdf..6b706bc60a66 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -87,6 +87,8 @@ extern struct file *get_empty_filp(void);
87 * super.c 87 * super.c
88 */ 88 */
89extern int do_remount_sb(struct super_block *, int, void *, int); 89extern int do_remount_sb(struct super_block *, int, void *, int);
90extern void __put_super(struct super_block *sb);
91extern void put_super(struct super_block *sb);
90 92
91/* 93/*
92 * open.c 94 * open.c
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 7faefb4da939..2d140a713861 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -525,15 +525,8 @@ static int ioctl_fsfreeze(struct file *filp)
525 if (sb->s_op->freeze_fs == NULL) 525 if (sb->s_op->freeze_fs == NULL)
526 return -EOPNOTSUPP; 526 return -EOPNOTSUPP;
527 527
528 /* If a blockdevice-backed filesystem isn't specified, return. */
529 if (sb->s_bdev == NULL)
530 return -EINVAL;
531
532 /* Freeze */ 528 /* Freeze */
533 sb = freeze_bdev(sb->s_bdev); 529 return freeze_super(sb);
534 if (IS_ERR(sb))
535 return PTR_ERR(sb);
536 return 0;
537} 530}
538 531
539static int ioctl_fsthaw(struct file *filp) 532static int ioctl_fsthaw(struct file *filp)
@@ -543,12 +536,8 @@ static int ioctl_fsthaw(struct file *filp)
543 if (!capable(CAP_SYS_ADMIN)) 536 if (!capable(CAP_SYS_ADMIN))
544 return -EPERM; 537 return -EPERM;
545 538
546 /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
547 if (sb->s_bdev == NULL)
548 return -EINVAL;
549
550 /* Thaw */ 539 /* Thaw */
551 return thaw_bdev(sb->s_bdev, sb); 540 return thaw_super(sb);
552} 541}
553 542
554/* 543/*
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ecb44c94ba8d..28a9ddaa0c49 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -786,6 +786,12 @@ wait_for_iobuf:
786 786
787 jbd_debug(3, "JBD: commit phase 6\n"); 787 jbd_debug(3, "JBD: commit phase 6\n");
788 788
789 /* All metadata is written, now write commit record and do cleanup */
790 spin_lock(&journal->j_state_lock);
791 J_ASSERT(commit_transaction->t_state == T_COMMIT);
792 commit_transaction->t_state = T_COMMIT_RECORD;
793 spin_unlock(&journal->j_state_lock);
794
789 if (journal_write_commit_record(journal, commit_transaction)) 795 if (journal_write_commit_record(journal, commit_transaction))
790 err = -EIO; 796 err = -EIO;
791 797
@@ -923,7 +929,7 @@ restart_loop:
923 929
924 jbd_debug(3, "JBD: commit phase 8\n"); 930 jbd_debug(3, "JBD: commit phase 8\n");
925 931
926 J_ASSERT(commit_transaction->t_state == T_COMMIT); 932 J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD);
927 933
928 commit_transaction->t_state = T_FINISHED; 934 commit_transaction->t_state = T_FINISHED;
929 J_ASSERT(commit_transaction == journal->j_committing_transaction); 935 J_ASSERT(commit_transaction == journal->j_committing_transaction);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index bd224eec9b07..93d1e47647bd 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -565,6 +565,38 @@ int log_wait_commit(journal_t *journal, tid_t tid)
565} 565}
566 566
567/* 567/*
568 * Return 1 if a given transaction has not yet sent barrier request
569 * connected with a transaction commit. If 0 is returned, transaction
570 * may or may not have sent the barrier. Used to avoid sending barrier
571 * twice in common cases.
572 */
573int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
574{
575 int ret = 0;
576 transaction_t *commit_trans;
577
578 if (!(journal->j_flags & JFS_BARRIER))
579 return 0;
580 spin_lock(&journal->j_state_lock);
581 /* Transaction already committed? */
582 if (tid_geq(journal->j_commit_sequence, tid))
583 goto out;
584 /*
585 * Transaction is being committed and we already proceeded to
586 * writing commit record?
587 */
588 commit_trans = journal->j_committing_transaction;
589 if (commit_trans && commit_trans->t_tid == tid &&
590 commit_trans->t_state >= T_COMMIT_RECORD)
591 goto out;
592 ret = 1;
593out:
594 spin_unlock(&journal->j_state_lock);
595 return ret;
596}
597EXPORT_SYMBOL(journal_trans_will_send_data_barrier);
598
599/*
568 * Log buffer allocation routines: 600 * Log buffer allocation routines:
569 */ 601 */
570 602
@@ -1157,6 +1189,7 @@ int journal_destroy(journal_t *journal)
1157{ 1189{
1158 int err = 0; 1190 int err = 0;
1159 1191
1192
1160 /* Wait for the commit thread to wake up and die. */ 1193 /* Wait for the commit thread to wake up and die. */
1161 journal_kill_thread(journal); 1194 journal_kill_thread(journal);
1162 1195
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 30beb11ef928..076d1cc44f95 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -530,7 +530,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
530 */ 530 */
531 if ((journal->j_fs_dev != journal->j_dev) && 531 if ((journal->j_fs_dev != journal->j_dev) &&
532 (journal->j_flags & JBD2_BARRIER)) 532 (journal->j_flags & JBD2_BARRIER))
533 blkdev_issue_flush(journal->j_fs_dev, NULL); 533 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
534 BLKDEV_IFL_WAIT);
534 if (!(journal->j_flags & JBD2_ABORT)) 535 if (!(journal->j_flags & JBD2_ABORT))
535 jbd2_journal_update_superblock(journal, 1); 536 jbd2_journal_update_superblock(journal, 1);
536 return 0; 537 return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 671da7fb7ffd..75716d3d2be0 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -717,7 +717,8 @@ start_journal_io:
717 if (commit_transaction->t_flushed_data_blocks && 717 if (commit_transaction->t_flushed_data_blocks &&
718 (journal->j_fs_dev != journal->j_dev) && 718 (journal->j_fs_dev != journal->j_dev) &&
719 (journal->j_flags & JBD2_BARRIER)) 719 (journal->j_flags & JBD2_BARRIER))
720 blkdev_issue_flush(journal->j_fs_dev, NULL); 720 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
721 BLKDEV_IFL_WAIT);
721 722
722 /* Done it all: now write the commit record asynchronously. */ 723 /* Done it all: now write the commit record asynchronously. */
723 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 724 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -727,7 +728,8 @@ start_journal_io:
727 if (err) 728 if (err)
728 __jbd2_journal_abort_hard(journal); 729 __jbd2_journal_abort_hard(journal);
729 if (journal->j_flags & JBD2_BARRIER) 730 if (journal->j_flags & JBD2_BARRIER)
730 blkdev_issue_flush(journal->j_dev, NULL); 731 blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL,
732 BLKDEV_IFL_WAIT);
731 } 733 }
732 734
733 err = journal_finish_inode_data_buffers(journal, commit_transaction); 735 err = journal_finish_inode_data_buffers(journal, commit_transaction);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c03d4dce4d76..bc2ff5932769 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1889,7 +1889,7 @@ static struct kmem_cache *get_slab(size_t size)
1889 BUG_ON(i >= JBD2_MAX_SLABS); 1889 BUG_ON(i >= JBD2_MAX_SLABS);
1890 if (unlikely(i < 0)) 1890 if (unlikely(i < 0))
1891 i = 0; 1891 i = 0;
1892 BUG_ON(jbd2_slab[i] == 0); 1892 BUG_ON(jbd2_slab[i] == NULL);
1893 return jbd2_slab[i]; 1893 return jbd2_slab[i];
1894} 1894}
1895 1895
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 7cdc3196476a..a33aab6b5e68 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -419,7 +419,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
419 return rc; 419 return rc;
420} 420}
421 421
422struct xattr_handler jffs2_acl_access_xattr_handler = { 422const struct xattr_handler jffs2_acl_access_xattr_handler = {
423 .prefix = POSIX_ACL_XATTR_ACCESS, 423 .prefix = POSIX_ACL_XATTR_ACCESS,
424 .flags = ACL_TYPE_DEFAULT, 424 .flags = ACL_TYPE_DEFAULT,
425 .list = jffs2_acl_access_listxattr, 425 .list = jffs2_acl_access_listxattr,
@@ -427,7 +427,7 @@ struct xattr_handler jffs2_acl_access_xattr_handler = {
427 .set = jffs2_acl_setxattr, 427 .set = jffs2_acl_setxattr,
428}; 428};
429 429
430struct xattr_handler jffs2_acl_default_xattr_handler = { 430const struct xattr_handler jffs2_acl_default_xattr_handler = {
431 .prefix = POSIX_ACL_XATTR_DEFAULT, 431 .prefix = POSIX_ACL_XATTR_DEFAULT,
432 .flags = ACL_TYPE_DEFAULT, 432 .flags = ACL_TYPE_DEFAULT,
433 .list = jffs2_acl_default_listxattr, 433 .list = jffs2_acl_default_listxattr,
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index f0ba63e3c36b..5e42de8d9541 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -31,8 +31,8 @@ extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
33 33
34extern struct xattr_handler jffs2_acl_access_xattr_handler; 34extern const struct xattr_handler jffs2_acl_access_xattr_handler;
35extern struct xattr_handler jffs2_acl_default_xattr_handler; 35extern const struct xattr_handler jffs2_acl_default_xattr_handler;
36 36
37#else 37#else
38 38
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 3ff50da94789..55f1dde2fa8b 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -23,10 +23,9 @@ static int jffs2_garbage_collect_thread(void *);
23 23
24void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c) 24void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c)
25{ 25{
26 spin_lock(&c->erase_completion_lock); 26 assert_spin_locked(&c->erase_completion_lock);
27 if (c->gc_task && jffs2_thread_should_wake(c)) 27 if (c->gc_task && jffs2_thread_should_wake(c))
28 send_sig(SIGHUP, c->gc_task, 1); 28 send_sig(SIGHUP, c->gc_task, 1);
29 spin_unlock(&c->erase_completion_lock);
30} 29}
31 30
32/* This must only ever be called when no GC thread is currently running */ 31/* This must only ever be called when no GC thread is currently running */
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index b47679be118a..6286ad9b00f7 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -103,9 +103,10 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
103 jffs2_erase_failed(c, jeb, bad_offset); 103 jffs2_erase_failed(c, jeb, bad_offset);
104} 104}
105 105
106void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count) 106int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
107{ 107{
108 struct jffs2_eraseblock *jeb; 108 struct jffs2_eraseblock *jeb;
109 int work_done = 0;
109 110
110 mutex_lock(&c->erase_free_sem); 111 mutex_lock(&c->erase_free_sem);
111 112
@@ -121,6 +122,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
121 mutex_unlock(&c->erase_free_sem); 122 mutex_unlock(&c->erase_free_sem);
122 jffs2_mark_erased_block(c, jeb); 123 jffs2_mark_erased_block(c, jeb);
123 124
125 work_done++;
124 if (!--count) { 126 if (!--count) {
125 D1(printk(KERN_DEBUG "Count reached. jffs2_erase_pending_blocks leaving\n")); 127 D1(printk(KERN_DEBUG "Count reached. jffs2_erase_pending_blocks leaving\n"));
126 goto done; 128 goto done;
@@ -157,6 +159,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
157 mutex_unlock(&c->erase_free_sem); 159 mutex_unlock(&c->erase_free_sem);
158 done: 160 done:
159 D1(printk(KERN_DEBUG "jffs2_erase_pending_blocks completed\n")); 161 D1(printk(KERN_DEBUG "jffs2_erase_pending_blocks completed\n"));
162 return work_done;
160} 163}
161 164
162static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 165static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
@@ -165,10 +168,11 @@ static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblo
165 mutex_lock(&c->erase_free_sem); 168 mutex_lock(&c->erase_free_sem);
166 spin_lock(&c->erase_completion_lock); 169 spin_lock(&c->erase_completion_lock);
167 list_move_tail(&jeb->list, &c->erase_complete_list); 170 list_move_tail(&jeb->list, &c->erase_complete_list);
171 /* Wake the GC thread to mark them clean */
172 jffs2_garbage_collect_trigger(c);
168 spin_unlock(&c->erase_completion_lock); 173 spin_unlock(&c->erase_completion_lock);
169 mutex_unlock(&c->erase_free_sem); 174 mutex_unlock(&c->erase_free_sem);
170 /* Ensure that kupdated calls us again to mark them clean */ 175 wake_up(&c->erase_wait);
171 jffs2_erase_pending_trigger(c);
172} 176}
173 177
174static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset) 178static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset)
@@ -487,9 +491,9 @@ filebad:
487 491
488refile: 492refile:
489 /* Stick it back on the list from whence it came and come back later */ 493 /* Stick it back on the list from whence it came and come back later */
490 jffs2_erase_pending_trigger(c);
491 mutex_lock(&c->erase_free_sem); 494 mutex_lock(&c->erase_free_sem);
492 spin_lock(&c->erase_completion_lock); 495 spin_lock(&c->erase_completion_lock);
496 jffs2_garbage_collect_trigger(c);
493 list_move(&jeb->list, &c->erase_complete_list); 497 list_move(&jeb->list, &c->erase_complete_list);
494 spin_unlock(&c->erase_completion_lock); 498 spin_unlock(&c->erase_completion_lock);
495 mutex_unlock(&c->erase_free_sem); 499 mutex_unlock(&c->erase_free_sem);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 3451a81b2142..86e0821fc989 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -313,8 +313,8 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
313 case S_IFBLK: 313 case S_IFBLK:
314 case S_IFCHR: 314 case S_IFCHR:
315 /* Read the device numbers from the media */ 315 /* Read the device numbers from the media */
316 if (f->metadata->size != sizeof(jdev.old) && 316 if (f->metadata->size != sizeof(jdev.old_id) &&
317 f->metadata->size != sizeof(jdev.new)) { 317 f->metadata->size != sizeof(jdev.new_id)) {
318 printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size); 318 printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size);
319 goto error_io; 319 goto error_io;
320 } 320 }
@@ -325,10 +325,10 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
325 printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino); 325 printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino);
326 goto error; 326 goto error;
327 } 327 }
328 if (f->metadata->size == sizeof(jdev.old)) 328 if (f->metadata->size == sizeof(jdev.old_id))
329 rdev = old_decode_dev(je16_to_cpu(jdev.old)); 329 rdev = old_decode_dev(je16_to_cpu(jdev.old_id));
330 else 330 else
331 rdev = new_decode_dev(je32_to_cpu(jdev.new)); 331 rdev = new_decode_dev(je32_to_cpu(jdev.new_id));
332 332
333 case S_IFSOCK: 333 case S_IFSOCK:
334 case S_IFIFO: 334 case S_IFIFO:
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 3b6f2fa12cff..f5e96bd656e8 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -214,6 +214,19 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
214 return ret; 214 return ret;
215 } 215 }
216 216
217 /* If there are any blocks which need erasing, erase them now */
218 if (!list_empty(&c->erase_complete_list) ||
219 !list_empty(&c->erase_pending_list)) {
220 spin_unlock(&c->erase_completion_lock);
221 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
222 if (jffs2_erase_pending_blocks(c, 1)) {
223 mutex_unlock(&c->alloc_sem);
224 return 0;
225 }
226 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
227 spin_lock(&c->erase_completion_lock);
228 }
229
217 /* First, work out which block we're garbage-collecting */ 230 /* First, work out which block we're garbage-collecting */
218 jeb = c->gcblock; 231 jeb = c->gcblock;
219 232
@@ -222,7 +235,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
222 235
223 if (!jeb) { 236 if (!jeb) {
224 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */ 237 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
225 if (!list_empty(&c->erase_pending_list)) { 238 if (c->nr_erasing_blocks) {
226 spin_unlock(&c->erase_completion_lock); 239 spin_unlock(&c->erase_completion_lock);
227 mutex_unlock(&c->alloc_sem); 240 mutex_unlock(&c->alloc_sem);
228 return -EAGAIN; 241 return -EAGAIN;
@@ -435,7 +448,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
435 list_add_tail(&c->gcblock->list, &c->erase_pending_list); 448 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
436 c->gcblock = NULL; 449 c->gcblock = NULL;
437 c->nr_erasing_blocks++; 450 c->nr_erasing_blocks++;
438 jffs2_erase_pending_trigger(c); 451 jffs2_garbage_collect_trigger(c);
439 } 452 }
440 spin_unlock(&c->erase_completion_lock); 453 spin_unlock(&c->erase_completion_lock);
441 454
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 507ed6ec1847..a881a42f19e3 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -312,11 +312,11 @@ static inline int jffs2_blocks_use_vmalloc(struct jffs2_sb_info *c)
312static inline int jffs2_encode_dev(union jffs2_device_node *jdev, dev_t rdev) 312static inline int jffs2_encode_dev(union jffs2_device_node *jdev, dev_t rdev)
313{ 313{
314 if (old_valid_dev(rdev)) { 314 if (old_valid_dev(rdev)) {
315 jdev->old = cpu_to_je16(old_encode_dev(rdev)); 315 jdev->old_id = cpu_to_je16(old_encode_dev(rdev));
316 return sizeof(jdev->old); 316 return sizeof(jdev->old_id);
317 } else { 317 } else {
318 jdev->new = cpu_to_je32(new_encode_dev(rdev)); 318 jdev->new_id = cpu_to_je32(new_encode_dev(rdev));
319 return sizeof(jdev->new); 319 return sizeof(jdev->new_id);
320 } 320 }
321} 321}
322 322
@@ -464,7 +464,7 @@ int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb
464int jffs2_do_mount_fs(struct jffs2_sb_info *c); 464int jffs2_do_mount_fs(struct jffs2_sb_info *c);
465 465
466/* erase.c */ 466/* erase.c */
467void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count); 467int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count);
468void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); 468void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
469 469
470#ifdef CONFIG_JFFS2_FS_WRITEBUFFER 470#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 191359dde4e1..694aa5b03505 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -116,9 +116,21 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
116 116
117 ret = jffs2_garbage_collect_pass(c); 117 ret = jffs2_garbage_collect_pass(c);
118 118
119 if (ret == -EAGAIN) 119 if (ret == -EAGAIN) {
120 jffs2_erase_pending_blocks(c, 1); 120 spin_lock(&c->erase_completion_lock);
121 else if (ret) 121 if (c->nr_erasing_blocks &&
122 list_empty(&c->erase_pending_list) &&
123 list_empty(&c->erase_complete_list)) {
124 DECLARE_WAITQUEUE(wait, current);
125 set_current_state(TASK_UNINTERRUPTIBLE);
126 add_wait_queue(&c->erase_wait, &wait);
127 D1(printk(KERN_DEBUG "%s waiting for erase to complete\n", __func__));
128 spin_unlock(&c->erase_completion_lock);
129
130 schedule();
131 } else
132 spin_unlock(&c->erase_completion_lock);
133 } else if (ret)
122 return ret; 134 return ret;
123 135
124 cond_resched(); 136 cond_resched();
@@ -217,7 +229,7 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
217 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list); 229 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list);
218 list_move_tail(&ejeb->list, &c->erase_pending_list); 230 list_move_tail(&ejeb->list, &c->erase_pending_list);
219 c->nr_erasing_blocks++; 231 c->nr_erasing_blocks++;
220 jffs2_erase_pending_trigger(c); 232 jffs2_garbage_collect_trigger(c);
221 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n", 233 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n",
222 ejeb->offset)); 234 ejeb->offset));
223 } 235 }
@@ -469,7 +481,9 @@ struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c,
469void jffs2_complete_reservation(struct jffs2_sb_info *c) 481void jffs2_complete_reservation(struct jffs2_sb_info *c)
470{ 482{
471 D1(printk(KERN_DEBUG "jffs2_complete_reservation()\n")); 483 D1(printk(KERN_DEBUG "jffs2_complete_reservation()\n"));
484 spin_lock(&c->erase_completion_lock);
472 jffs2_garbage_collect_trigger(c); 485 jffs2_garbage_collect_trigger(c);
486 spin_unlock(&c->erase_completion_lock);
473 mutex_unlock(&c->alloc_sem); 487 mutex_unlock(&c->alloc_sem);
474} 488}
475 489
@@ -611,7 +625,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
611 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n")); 625 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n"));
612 list_add_tail(&jeb->list, &c->erase_pending_list); 626 list_add_tail(&jeb->list, &c->erase_pending_list);
613 c->nr_erasing_blocks++; 627 c->nr_erasing_blocks++;
614 jffs2_erase_pending_trigger(c); 628 jffs2_garbage_collect_trigger(c);
615 } else { 629 } else {
616 /* Sometimes, however, we leave it elsewhere so it doesn't get 630 /* Sometimes, however, we leave it elsewhere so it doesn't get
617 immediately reused, and we spread the load a bit. */ 631 immediately reused, and we spread the load a bit. */
@@ -732,6 +746,10 @@ int jffs2_thread_should_wake(struct jffs2_sb_info *c)
732 int nr_very_dirty = 0; 746 int nr_very_dirty = 0;
733 struct jffs2_eraseblock *jeb; 747 struct jffs2_eraseblock *jeb;
734 748
749 if (!list_empty(&c->erase_complete_list) ||
750 !list_empty(&c->erase_pending_list))
751 return 1;
752
735 if (c->unchecked_size) { 753 if (c->unchecked_size) {
736 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n", 754 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n",
737 c->unchecked_size, c->checked_ino)); 755 c->unchecked_size, c->checked_ino));
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index a7f03b7ebcb3..035a767f958b 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -140,8 +140,7 @@ void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c);
140 140
141#endif /* WRITEBUFFER */ 141#endif /* WRITEBUFFER */
142 142
143/* erase.c */ 143static inline void jffs2_dirty_trigger(struct jffs2_sb_info *c)
144static inline void jffs2_erase_pending_trigger(struct jffs2_sb_info *c)
145{ 144{
146 OFNI_BS_2SFFJ(c)->s_dirt = 1; 145 OFNI_BS_2SFFJ(c)->s_dirt = 1;
147} 146}
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 696686cc206e..46f870d1cc36 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -260,7 +260,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
260 ret = -EIO; 260 ret = -EIO;
261 goto out; 261 goto out;
262 } 262 }
263 jffs2_erase_pending_trigger(c); 263 spin_lock(&c->erase_completion_lock);
264 jffs2_garbage_collect_trigger(c);
265 spin_unlock(&c->erase_completion_lock);
264 } 266 }
265 ret = 0; 267 ret = 0;
266 out: 268 out:
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index eaccee058583..239f51216a68 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -77,7 +77,7 @@ static size_t jffs2_security_listxattr(struct dentry *dentry, char *list,
77 return retlen; 77 return retlen;
78} 78}
79 79
80struct xattr_handler jffs2_security_xattr_handler = { 80const struct xattr_handler jffs2_security_xattr_handler = {
81 .prefix = XATTR_SECURITY_PREFIX, 81 .prefix = XATTR_SECURITY_PREFIX,
82 .list = jffs2_security_listxattr, 82 .list = jffs2_security_listxattr,
83 .set = jffs2_security_setxattr, 83 .set = jffs2_security_setxattr,
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 9a80e8e595d0..511e2d609d12 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -63,8 +63,6 @@ static void jffs2_write_super(struct super_block *sb)
63 63
64 if (!(sb->s_flags & MS_RDONLY)) { 64 if (!(sb->s_flags & MS_RDONLY)) {
65 D1(printk(KERN_DEBUG "jffs2_write_super()\n")); 65 D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
66 jffs2_garbage_collect_trigger(c);
67 jffs2_erase_pending_blocks(c, 0);
68 jffs2_flush_wbuf_gc(c, 0); 66 jffs2_flush_wbuf_gc(c, 0);
69 } 67 }
70 68
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 5ef7bac265e5..07ee1546b2fa 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -84,7 +84,7 @@ static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino)
84 struct jffs2_inodirty *new; 84 struct jffs2_inodirty *new;
85 85
86 /* Mark the superblock dirty so that kupdated will flush... */ 86 /* Mark the superblock dirty so that kupdated will flush... */
87 jffs2_erase_pending_trigger(c); 87 jffs2_dirty_trigger(c);
88 88
89 if (jffs2_wbuf_pending_for_ino(c, ino)) 89 if (jffs2_wbuf_pending_for_ino(c, ino))
90 return; 90 return;
@@ -121,7 +121,7 @@ static inline void jffs2_refile_wbuf_blocks(struct jffs2_sb_info *c)
121 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n")); 121 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n"));
122 list_add_tail(&jeb->list, &c->erase_pending_list); 122 list_add_tail(&jeb->list, &c->erase_pending_list);
123 c->nr_erasing_blocks++; 123 c->nr_erasing_blocks++;
124 jffs2_erase_pending_trigger(c); 124 jffs2_garbage_collect_trigger(c);
125 } else { 125 } else {
126 /* Sometimes, however, we leave it elsewhere so it doesn't get 126 /* Sometimes, however, we leave it elsewhere so it doesn't get
127 immediately reused, and we spread the load a bit. */ 127 immediately reused, and we spread the load a bit. */
@@ -152,7 +152,7 @@ static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock
152 D1(printk("Refiling block at %08x to erase_pending_list\n", jeb->offset)); 152 D1(printk("Refiling block at %08x to erase_pending_list\n", jeb->offset));
153 list_add(&jeb->list, &c->erase_pending_list); 153 list_add(&jeb->list, &c->erase_pending_list);
154 c->nr_erasing_blocks++; 154 c->nr_erasing_blocks++;
155 jffs2_erase_pending_trigger(c); 155 jffs2_garbage_collect_trigger(c);
156 } 156 }
157 157
158 if (!jffs2_prealloc_raw_node_refs(c, jeb, 1)) { 158 if (!jffs2_prealloc_raw_node_refs(c, jeb, 1)) {
@@ -543,7 +543,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
543 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset)); 543 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset));
544 list_move(&jeb->list, &c->erase_pending_list); 544 list_move(&jeb->list, &c->erase_pending_list);
545 c->nr_erasing_blocks++; 545 c->nr_erasing_blocks++;
546 jffs2_erase_pending_trigger(c); 546 jffs2_garbage_collect_trigger(c);
547 } 547 }
548 548
549 jffs2_dbg_acct_sanity_check_nolock(c, jeb); 549 jffs2_dbg_acct_sanity_check_nolock(c, jeb);
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 9e75c62c85d6..a2d58c96f1b4 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -904,7 +904,7 @@ struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
904 * do_jffs2_setxattr(inode, xprefix, xname, buffer, size, flags) 904 * do_jffs2_setxattr(inode, xprefix, xname, buffer, size, flags)
905 * is an implementation of setxattr handler on jffs2. 905 * is an implementation of setxattr handler on jffs2.
906 * -------------------------------------------------- */ 906 * -------------------------------------------------- */
907struct xattr_handler *jffs2_xattr_handlers[] = { 907const struct xattr_handler *jffs2_xattr_handlers[] = {
908 &jffs2_user_xattr_handler, 908 &jffs2_user_xattr_handler,
909#ifdef CONFIG_JFFS2_FS_SECURITY 909#ifdef CONFIG_JFFS2_FS_SECURITY
910 &jffs2_security_xattr_handler, 910 &jffs2_security_xattr_handler,
@@ -917,8 +917,8 @@ struct xattr_handler *jffs2_xattr_handlers[] = {
917 NULL 917 NULL
918}; 918};
919 919
920static struct xattr_handler *xprefix_to_handler(int xprefix) { 920static const struct xattr_handler *xprefix_to_handler(int xprefix) {
921 struct xattr_handler *ret; 921 const struct xattr_handler *ret;
922 922
923 switch (xprefix) { 923 switch (xprefix) {
924 case JFFS2_XPREFIX_USER: 924 case JFFS2_XPREFIX_USER:
@@ -955,7 +955,7 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
955 struct jffs2_inode_cache *ic = f->inocache; 955 struct jffs2_inode_cache *ic = f->inocache;
956 struct jffs2_xattr_ref *ref, **pref; 956 struct jffs2_xattr_ref *ref, **pref;
957 struct jffs2_xattr_datum *xd; 957 struct jffs2_xattr_datum *xd;
958 struct xattr_handler *xhandle; 958 const struct xattr_handler *xhandle;
959 ssize_t len, rc; 959 ssize_t len, rc;
960 int retry = 0; 960 int retry = 0;
961 961
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index 6e3b5ddfb7ab..cf4f5759b42b 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -93,9 +93,9 @@ extern int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname
93extern int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, 93extern int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
94 const char *buffer, size_t size, int flags); 94 const char *buffer, size_t size, int flags);
95 95
96extern struct xattr_handler *jffs2_xattr_handlers[]; 96extern const struct xattr_handler *jffs2_xattr_handlers[];
97extern struct xattr_handler jffs2_user_xattr_handler; 97extern const struct xattr_handler jffs2_user_xattr_handler;
98extern struct xattr_handler jffs2_trusted_xattr_handler; 98extern const struct xattr_handler jffs2_trusted_xattr_handler;
99 99
100extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); 100extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
101#define jffs2_getxattr generic_getxattr 101#define jffs2_getxattr generic_getxattr
@@ -122,7 +122,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
122 122
123#ifdef CONFIG_JFFS2_FS_SECURITY 123#ifdef CONFIG_JFFS2_FS_SECURITY
124extern int jffs2_init_security(struct inode *inode, struct inode *dir); 124extern int jffs2_init_security(struct inode *inode, struct inode *dir);
125extern struct xattr_handler jffs2_security_xattr_handler; 125extern const struct xattr_handler jffs2_security_xattr_handler;
126#else 126#else
127#define jffs2_init_security(inode,dir) (0) 127#define jffs2_init_security(inode,dir) (0)
128#endif /* CONFIG_JFFS2_FS_SECURITY */ 128#endif /* CONFIG_JFFS2_FS_SECURITY */
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 3e5a5e356e05..1c868194c504 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -47,7 +47,7 @@ static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list,
47 return retlen; 47 return retlen;
48} 48}
49 49
50struct xattr_handler jffs2_trusted_xattr_handler = { 50const struct xattr_handler jffs2_trusted_xattr_handler = {
51 .prefix = XATTR_TRUSTED_PREFIX, 51 .prefix = XATTR_TRUSTED_PREFIX,
52 .list = jffs2_trusted_listxattr, 52 .list = jffs2_trusted_listxattr,
53 .set = jffs2_trusted_setxattr, 53 .set = jffs2_trusted_setxattr,
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 8544af67dffe..916b5c966039 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -47,7 +47,7 @@ static size_t jffs2_user_listxattr(struct dentry *dentry, char *list,
47 return retlen; 47 return retlen;
48} 48}
49 49
50struct xattr_handler jffs2_user_xattr_handler = { 50const struct xattr_handler jffs2_user_xattr_handler = {
51 .prefix = XATTR_USER_PREFIX, 51 .prefix = XATTR_USER_PREFIX,
52 .list = jffs2_user_listxattr, 52 .list = jffs2_user_listxattr,
53 .set = jffs2_user_setxattr, 53 .set = jffs2_user_setxattr,
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 14ba982b3f24..85d9ec659225 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -98,7 +98,7 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
98 if (rc) 98 if (rc)
99 return rc; 99 return rc;
100 100
101 if (iattr->ia_valid & ATTR_SIZE) 101 if (is_quota_modification(inode, iattr))
102 dquot_initialize(inode); 102 dquot_initialize(inode);
103 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || 103 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
104 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { 104 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 9e2f6a721668..c92ea3b3ea5e 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -2438,7 +2438,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2438 2438
2439 /* check if this is a control page update for an allocation. 2439 /* check if this is a control page update for an allocation.
2440 * if so, update the leaf to reflect the new leaf value using 2440 * if so, update the leaf to reflect the new leaf value using
2441 * dbSplit(); otherwise (deallocation), use dbJoin() to udpate 2441 * dbSplit(); otherwise (deallocation), use dbJoin() to update
2442 * the leaf with the new value. in addition to updating the 2442 * the leaf with the new value. in addition to updating the
2443 * leaf, dbSplit() will also split the binary buddy system of 2443 * leaf, dbSplit() will also split the binary buddy system of
2444 * the leaves, if required, and bubble new values within the 2444 * the leaves, if required, and bubble new values within the
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 829921b67765..2686531e235a 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -98,14 +98,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
98 goto fail_unlock; 98 goto fail_unlock;
99 } 99 }
100 100
101 inode->i_uid = current_fsuid(); 101 inode_init_owner(inode, parent, mode);
102 if (parent->i_mode & S_ISGID) {
103 inode->i_gid = parent->i_gid;
104 if (S_ISDIR(mode))
105 mode |= S_ISGID;
106 } else
107 inode->i_gid = current_fsgid();
108
109 /* 102 /*
110 * New inodes need to save sane values on disk when 103 * New inodes need to save sane values on disk when
111 * uid & gid mount options are used 104 * uid & gid mount options are used
@@ -121,7 +114,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
121 if (rc) 114 if (rc)
122 goto fail_drop; 115 goto fail_drop;
123 116
124 inode->i_mode = mode;
125 /* inherit flags from parent */ 117 /* inherit flags from parent */
126 jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT; 118 jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT;
127 119
@@ -134,7 +126,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
134 if (S_ISLNK(mode)) 126 if (S_ISLNK(mode))
135 jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL); 127 jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
136 } 128 }
137 jfs_inode->mode2 |= mode; 129 jfs_inode->mode2 |= inode->i_mode;
138 130
139 inode->i_blocks = 0; 131 inode->i_blocks = 0;
140 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 132 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/libfs.c b/fs/libfs.c
index ea9a6cc9b35c..232bea425b09 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -547,6 +547,40 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
547} 547}
548 548
549/** 549/**
550 * simple_write_to_buffer - copy data from user space to the buffer
551 * @to: the buffer to write to
552 * @available: the size of the buffer
553 * @ppos: the current position in the buffer
554 * @from: the user space buffer to read from
555 * @count: the maximum number of bytes to read
556 *
557 * The simple_write_to_buffer() function reads up to @count bytes from the user
558 * space address starting at @from into the buffer @to at offset @ppos.
559 *
560 * On success, the number of bytes written is returned and the offset @ppos is
561 * advanced by this number, or negative value is returned on error.
562 **/
563ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
564 const void __user *from, size_t count)
565{
566 loff_t pos = *ppos;
567 size_t res;
568
569 if (pos < 0)
570 return -EINVAL;
571 if (pos >= available || !count)
572 return 0;
573 if (count > available - pos)
574 count = available - pos;
575 res = copy_from_user(to + pos, from, count);
576 if (res == count)
577 return -EFAULT;
578 count -= res;
579 *ppos = pos + count;
580 return count;
581}
582
583/**
550 * memory_read_from_buffer - copy data from the buffer 584 * memory_read_from_buffer - copy data from the buffer
551 * @to: the kernel space buffer to read to 585 * @to: the kernel space buffer to read to
552 * @count: the maximum number of bytes to read 586 * @count: the maximum number of bytes to read
@@ -864,6 +898,7 @@ EXPORT_SYMBOL(simple_statfs);
864EXPORT_SYMBOL(simple_sync_file); 898EXPORT_SYMBOL(simple_sync_file);
865EXPORT_SYMBOL(simple_unlink); 899EXPORT_SYMBOL(simple_unlink);
866EXPORT_SYMBOL(simple_read_from_buffer); 900EXPORT_SYMBOL(simple_read_from_buffer);
901EXPORT_SYMBOL(simple_write_to_buffer);
867EXPORT_SYMBOL(memory_read_from_buffer); 902EXPORT_SYMBOL(memory_read_from_buffer);
868EXPORT_SYMBOL(simple_transaction_set); 903EXPORT_SYMBOL(simple_transaction_set);
869EXPORT_SYMBOL(simple_transaction_get); 904EXPORT_SYMBOL(simple_transaction_get);
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 243c00071f76..9bd2ce2a3040 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -303,6 +303,11 @@ static void bdev_put_device(struct super_block *sb)
303 close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE); 303 close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
304} 304}
305 305
306static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
307{
308 return 0;
309}
310
306static const struct logfs_device_ops bd_devops = { 311static const struct logfs_device_ops bd_devops = {
307 .find_first_sb = bdev_find_first_sb, 312 .find_first_sb = bdev_find_first_sb,
308 .find_last_sb = bdev_find_last_sb, 313 .find_last_sb = bdev_find_last_sb,
@@ -310,6 +315,7 @@ static const struct logfs_device_ops bd_devops = {
310 .readpage = bdev_readpage, 315 .readpage = bdev_readpage,
311 .writeseg = bdev_writeseg, 316 .writeseg = bdev_writeseg,
312 .erase = bdev_erase, 317 .erase = bdev_erase,
318 .can_write_buf = bdev_can_write_buf,
313 .sync = bdev_sync, 319 .sync = bdev_sync,
314 .put_device = bdev_put_device, 320 .put_device = bdev_put_device,
315}; 321};
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index cafb6ef2e05b..a85d47d13e4b 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -9,6 +9,7 @@
9#include <linux/completion.h> 9#include <linux/completion.h>
10#include <linux/mount.h> 10#include <linux/mount.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/slab.h>
12 13
13#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
14 15
@@ -126,7 +127,8 @@ static int mtd_readpage(void *_sb, struct page *page)
126 127
127 err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE, 128 err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
128 page_address(page)); 129 page_address(page));
129 if (err == -EUCLEAN) { 130 if (err == -EUCLEAN || err == -EBADMSG) {
131 /* -EBADMSG happens regularly on power failures */
130 err = 0; 132 err = 0;
131 /* FIXME: force GC this segment */ 133 /* FIXME: force GC this segment */
132 } 134 }
@@ -233,12 +235,32 @@ static void mtd_put_device(struct super_block *sb)
233 put_mtd_device(logfs_super(sb)->s_mtd); 235 put_mtd_device(logfs_super(sb)->s_mtd);
234} 236}
235 237
238static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
239{
240 struct logfs_super *super = logfs_super(sb);
241 void *buf;
242 int err;
243
244 buf = kmalloc(super->s_writesize, GFP_KERNEL);
245 if (!buf)
246 return -ENOMEM;
247 err = mtd_read(sb, ofs, super->s_writesize, buf);
248 if (err)
249 goto out;
250 if (memchr_inv(buf, 0xff, super->s_writesize))
251 err = -EIO;
252 kfree(buf);
253out:
254 return err;
255}
256
236static const struct logfs_device_ops mtd_devops = { 257static const struct logfs_device_ops mtd_devops = {
237 .find_first_sb = mtd_find_first_sb, 258 .find_first_sb = mtd_find_first_sb,
238 .find_last_sb = mtd_find_last_sb, 259 .find_last_sb = mtd_find_last_sb,
239 .readpage = mtd_readpage, 260 .readpage = mtd_readpage,
240 .writeseg = mtd_writeseg, 261 .writeseg = mtd_writeseg,
241 .erase = mtd_erase, 262 .erase = mtd_erase,
263 .can_write_buf = mtd_can_write_buf,
242 .sync = mtd_sync, 264 .sync = mtd_sync,
243 .put_device = mtd_put_device, 265 .put_device = mtd_put_device,
244}; 266};
@@ -250,5 +272,7 @@ int logfs_get_sb_mtd(struct file_system_type *type, int flags,
250 const struct logfs_device_ops *devops = &mtd_devops; 272 const struct logfs_device_ops *devops = &mtd_devops;
251 273
252 mtd = get_mtd_device(NULL, mtdnr); 274 mtd = get_mtd_device(NULL, mtdnr);
275 if (IS_ERR(mtd))
276 return PTR_ERR(mtd);
253 return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt); 277 return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt);
254} 278}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 2396a85c0f55..72d1893ddd36 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -12,7 +12,7 @@
12 * Atomic dir operations 12 * Atomic dir operations
13 * 13 *
14 * Directory operations are by default not atomic. Dentries and Inodes are 14 * Directory operations are by default not atomic. Dentries and Inodes are
15 * created/removed/altered in seperate operations. Therefore we need to do 15 * created/removed/altered in separate operations. Therefore we need to do
16 * a small amount of journaling. 16 * a small amount of journaling.
17 * 17 *
18 * Create, link, mkdir, mknod and symlink all share the same function to do 18 * Create, link, mkdir, mknod and symlink all share the same function to do
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 370f367a933e..0de524071870 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -161,7 +161,17 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc)
161 161
162static void logfs_invalidatepage(struct page *page, unsigned long offset) 162static void logfs_invalidatepage(struct page *page, unsigned long offset)
163{ 163{
164 move_page_to_btree(page); 164 struct logfs_block *block = logfs_block(page);
165
166 if (block->reserved_bytes) {
167 struct super_block *sb = page->mapping->host->i_sb;
168 struct logfs_super *super = logfs_super(sb);
169
170 super->s_dirty_pages -= block->reserved_bytes;
171 block->ops->free_block(sb, block);
172 BUG_ON(bitmap_weight(block->alias_map, LOGFS_BLOCK_FACTOR));
173 } else
174 move_page_to_btree(page);
165 BUG_ON(PagePrivate(page) || page->private); 175 BUG_ON(PagePrivate(page) || page->private);
166} 176}
167 177
@@ -212,10 +222,8 @@ int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
212int logfs_fsync(struct file *file, struct dentry *dentry, int datasync) 222int logfs_fsync(struct file *file, struct dentry *dentry, int datasync)
213{ 223{
214 struct super_block *sb = dentry->d_inode->i_sb; 224 struct super_block *sb = dentry->d_inode->i_sb;
215 struct logfs_super *super = logfs_super(sb);
216 225
217 /* FIXME: write anchor */ 226 logfs_write_anchor(sb);
218 super->s_devops->sync(sb);
219 return 0; 227 return 0;
220} 228}
221 229
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 76c242fbe1b0..caa4419285dc 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -122,7 +122,7 @@ static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
122 logfs_safe_iput(inode, cookie); 122 logfs_safe_iput(inode, cookie);
123} 123}
124 124
125static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist) 125static u32 logfs_gc_segment(struct super_block *sb, u32 segno)
126{ 126{
127 struct logfs_super *super = logfs_super(sb); 127 struct logfs_super *super = logfs_super(sb);
128 struct logfs_segment_header sh; 128 struct logfs_segment_header sh;
@@ -401,7 +401,7 @@ static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
401 segno, (u64)segno << super->s_segshift, 401 segno, (u64)segno << super->s_segshift,
402 dist, no_free_segments(sb), valid, 402 dist, no_free_segments(sb), valid,
403 super->s_free_bytes); 403 super->s_free_bytes);
404 cleaned = logfs_gc_segment(sb, segno, dist); 404 cleaned = logfs_gc_segment(sb, segno);
405 log_gc("GC segment #%02x complete - now %x valid\n", segno, 405 log_gc("GC segment #%02x complete - now %x valid\n", segno,
406 valid - cleaned); 406 valid - cleaned);
407 BUG_ON(cleaned != valid); 407 BUG_ON(cleaned != valid);
@@ -632,38 +632,31 @@ static int check_area(struct super_block *sb, int i)
632{ 632{
633 struct logfs_super *super = logfs_super(sb); 633 struct logfs_super *super = logfs_super(sb);
634 struct logfs_area *area = super->s_area[i]; 634 struct logfs_area *area = super->s_area[i];
635 struct logfs_object_header oh; 635 gc_level_t gc_level;
636 u32 cleaned, valid, ec;
636 u32 segno = area->a_segno; 637 u32 segno = area->a_segno;
637 u32 ofs = area->a_used_bytes; 638 u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
638 __be32 crc;
639 int err;
640 639
641 if (!area->a_is_open) 640 if (!area->a_is_open)
642 return 0; 641 return 0;
643 642
644 for (ofs = area->a_used_bytes; 643 if (super->s_devops->can_write_buf(sb, ofs) == 0)
645 ofs <= super->s_segsize - sizeof(oh); 644 return 0;
646 ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) {
647 err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh);
648 if (err)
649 return err;
650
651 if (!memchr_inv(&oh, 0xff, sizeof(oh)))
652 break;
653 645
654 crc = logfs_crc32(&oh, sizeof(oh) - 4, 4); 646 printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs);
655 if (crc != oh.crc) { 647 /*
656 printk(KERN_INFO "interrupted header at %llx\n", 648 * The device cannot write back the write buffer. Most likely the
657 dev_ofs(sb, segno, ofs)); 649 * wbuf was already written out and the system crashed at some point
658 return 0; 650 * before the journal commit happened. In that case we wouldn't have
659 } 651 * to do anything. But if the crash happened before the wbuf was
660 } 652 * written out correctly, we must GC this segment. So assume the
661 if (ofs != area->a_used_bytes) { 653 * worst and always do the GC run.
662 printk(KERN_INFO "%x bytes unaccounted data found at %llx\n", 654 */
663 ofs - area->a_used_bytes, 655 area->a_is_open = 0;
664 dev_ofs(sb, segno, area->a_used_bytes)); 656 valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
665 area->a_used_bytes = ofs; 657 cleaned = logfs_gc_segment(sb, segno);
666 } 658 if (cleaned != valid)
659 return -EIO;
667 return 0; 660 return 0;
668} 661}
669 662
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 14ed27274da2..f602e230e162 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -193,6 +193,7 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
193 inode->i_ctime = CURRENT_TIME; 193 inode->i_ctime = CURRENT_TIME;
194 inode->i_mtime = CURRENT_TIME; 194 inode->i_mtime = CURRENT_TIME;
195 inode->i_nlink = 1; 195 inode->i_nlink = 1;
196 li->li_refcount = 1;
196 INIT_LIST_HEAD(&li->li_freeing_list); 197 INIT_LIST_HEAD(&li->li_freeing_list);
197 198
198 for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++) 199 for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
@@ -326,7 +327,7 @@ static void logfs_set_ino_generation(struct super_block *sb,
326 u64 ino; 327 u64 ino;
327 328
328 mutex_lock(&super->s_journal_mutex); 329 mutex_lock(&super->s_journal_mutex);
329 ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino); 330 ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino + 1);
330 super->s_last_ino = ino; 331 super->s_last_ino = ino;
331 super->s_inos_till_wrap--; 332 super->s_inos_till_wrap--;
332 if (super->s_inos_till_wrap < 0) { 333 if (super->s_inos_till_wrap < 0) {
@@ -357,14 +358,7 @@ struct inode *logfs_new_inode(struct inode *dir, int mode)
357 inode->i_mode = mode; 358 inode->i_mode = mode;
358 logfs_set_ino_generation(sb, inode); 359 logfs_set_ino_generation(sb, inode);
359 360
360 inode->i_uid = current_fsuid(); 361 inode_init_owner(inode, dir, mode);
361 inode->i_gid = current_fsgid();
362 if (dir->i_mode & S_ISGID) {
363 inode->i_gid = dir->i_gid;
364 if (S_ISDIR(mode))
365 inode->i_mode |= S_ISGID;
366 }
367
368 logfs_inode_setops(inode); 362 logfs_inode_setops(inode);
369 insert_inode_hash(inode); 363 insert_inode_hash(inode);
370 364
@@ -386,8 +380,7 @@ static void logfs_init_once(void *_li)
386 380
387static int logfs_sync_fs(struct super_block *sb, int wait) 381static int logfs_sync_fs(struct super_block *sb, int wait)
388{ 382{
389 /* FIXME: write anchor */ 383 logfs_write_anchor(sb);
390 logfs_super(sb)->s_devops->sync(sb);
391 return 0; 384 return 0;
392} 385}
393 386
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index fb0a613f885b..4b0e0616b357 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -132,10 +132,9 @@ static int read_area(struct super_block *sb, struct logfs_je_area *a)
132 132
133 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); 133 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
134 if (super->s_writesize > 1) 134 if (super->s_writesize > 1)
135 logfs_buf_recover(area, ofs, a + 1, super->s_writesize); 135 return logfs_buf_recover(area, ofs, a + 1, super->s_writesize);
136 else 136 else
137 logfs_buf_recover(area, ofs, NULL, 0); 137 return logfs_buf_recover(area, ofs, NULL, 0);
138 return 0;
139} 138}
140 139
141static void *unpack(void *from, void *to) 140static void *unpack(void *from, void *to)
@@ -245,7 +244,7 @@ static int read_je(struct super_block *sb, u64 ofs)
245 read_erasecount(sb, unpack(jh, scratch)); 244 read_erasecount(sb, unpack(jh, scratch));
246 break; 245 break;
247 case JE_AREA: 246 case JE_AREA:
248 read_area(sb, unpack(jh, scratch)); 247 err = read_area(sb, unpack(jh, scratch));
249 break; 248 break;
250 case JE_OBJ_ALIAS: 249 case JE_OBJ_ALIAS:
251 err = logfs_load_object_aliases(sb, unpack(jh, scratch), 250 err = logfs_load_object_aliases(sb, unpack(jh, scratch),
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 0a3df1a0c936..1a9db84f8d8f 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -144,6 +144,7 @@ struct logfs_area_ops {
144 * @erase: erase one segment 144 * @erase: erase one segment
145 * @read: read from the device 145 * @read: read from the device
146 * @erase: erase part of the device 146 * @erase: erase part of the device
147 * @can_write_buf: decide whether wbuf can be written to ofs
147 */ 148 */
148struct logfs_device_ops { 149struct logfs_device_ops {
149 struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs); 150 struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs);
@@ -153,6 +154,7 @@ struct logfs_device_ops {
153 void (*writeseg)(struct super_block *sb, u64 ofs, size_t len); 154 void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
154 int (*erase)(struct super_block *sb, loff_t ofs, size_t len, 155 int (*erase)(struct super_block *sb, loff_t ofs, size_t len,
155 int ensure_write); 156 int ensure_write);
157 int (*can_write_buf)(struct super_block *sb, u64 ofs);
156 void (*sync)(struct super_block *sb); 158 void (*sync)(struct super_block *sb);
157 void (*put_device)(struct super_block *sb); 159 void (*put_device)(struct super_block *sb);
158}; 160};
@@ -394,6 +396,7 @@ struct logfs_super {
394 int s_lock_count; 396 int s_lock_count;
395 mempool_t *s_block_pool; /* struct logfs_block pool */ 397 mempool_t *s_block_pool; /* struct logfs_block pool */
396 mempool_t *s_shadow_pool; /* struct logfs_shadow pool */ 398 mempool_t *s_shadow_pool; /* struct logfs_shadow pool */
399 struct list_head s_writeback_list; /* writeback pages */
397 /* 400 /*
398 * Space accounting: 401 * Space accounting:
399 * - s_used_bytes specifies space used to store valid data objects. 402 * - s_used_bytes specifies space used to store valid data objects.
@@ -598,19 +601,19 @@ void freeseg(struct super_block *sb, u32 segno);
598int logfs_init_areas(struct super_block *sb); 601int logfs_init_areas(struct super_block *sb);
599void logfs_cleanup_areas(struct super_block *sb); 602void logfs_cleanup_areas(struct super_block *sb);
600int logfs_open_area(struct logfs_area *area, size_t bytes); 603int logfs_open_area(struct logfs_area *area, size_t bytes);
601void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, 604int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
602 int use_filler); 605 int use_filler);
603 606
604static inline void logfs_buf_write(struct logfs_area *area, u64 ofs, 607static inline int logfs_buf_write(struct logfs_area *area, u64 ofs,
605 void *buf, size_t len) 608 void *buf, size_t len)
606{ 609{
607 __logfs_buf_write(area, ofs, buf, len, 0); 610 return __logfs_buf_write(area, ofs, buf, len, 0);
608} 611}
609 612
610static inline void logfs_buf_recover(struct logfs_area *area, u64 ofs, 613static inline int logfs_buf_recover(struct logfs_area *area, u64 ofs,
611 void *buf, size_t len) 614 void *buf, size_t len)
612{ 615{
613 __logfs_buf_write(area, ofs, buf, len, 1); 616 return __logfs_buf_write(area, ofs, buf, len, 1);
614} 617}
615 618
616/* super.c */ 619/* super.c */
@@ -704,7 +707,7 @@ static inline gc_level_t expand_level(u64 ino, level_t __level)
704 u8 level = (__force u8)__level; 707 u8 level = (__force u8)__level;
705 708
706 if (ino == LOGFS_INO_MASTER) { 709 if (ino == LOGFS_INO_MASTER) {
707 /* ifile has seperate areas */ 710 /* ifile has separate areas */
708 level += LOGFS_MAX_LEVELS; 711 level += LOGFS_MAX_LEVELS;
709 } 712 }
710 return (__force gc_level_t)level; 713 return (__force gc_level_t)level;
diff --git a/fs/logfs/logfs_abi.h b/fs/logfs/logfs_abi.h
index f674725663fe..ae960519c54a 100644
--- a/fs/logfs/logfs_abi.h
+++ b/fs/logfs/logfs_abi.h
@@ -50,9 +50,9 @@ static inline void check_##type(void) \
50 * 12 - gc recycled blocks, long-lived data 50 * 12 - gc recycled blocks, long-lived data
51 * 13 - replacement blocks, short-lived data 51 * 13 - replacement blocks, short-lived data
52 * 52 *
53 * Levels 1-11 are necessary for robust gc operations and help seperate 53 * Levels 1-11 are necessary for robust gc operations and help separate
54 * short-lived metadata from longer-lived file data. In the future, 54 * short-lived metadata from longer-lived file data. In the future,
55 * file data should get seperated into several segments based on simple 55 * file data should get separated into several segments based on simple
56 * heuristics. Old data recycled during gc operation is expected to be 56 * heuristics. Old data recycled during gc operation is expected to be
57 * long-lived. New data is of uncertain life expectancy. New data 57 * long-lived. New data is of uncertain life expectancy. New data
58 * used to replace older blocks in existing files is expected to be 58 * used to replace older blocks in existing files is expected to be
@@ -117,7 +117,7 @@ static inline void check_##type(void) \
117#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED) 117#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)
118 118
119/* 119/*
120 * LogFS needs to seperate data into levels. Each level is defined as the 120 * LogFS needs to separate data into levels. Each level is defined as the
121 * maximal possible distance from the master inode (inode of the inode file). 121 * maximal possible distance from the master inode (inode of the inode file).
122 * Data blocks reside on level 0, 1x indirect block on level 1, etc. 122 * Data blocks reside on level 0, 1x indirect block on level 1, etc.
123 * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11. 123 * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
@@ -204,7 +204,7 @@ SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);
204 * @ds_crc: crc32 of structure starting with the next field 204 * @ds_crc: crc32 of structure starting with the next field
205 * @ds_ifile_levels: maximum number of levels for ifile 205 * @ds_ifile_levels: maximum number of levels for ifile
206 * @ds_iblock_levels: maximum number of levels for regular files 206 * @ds_iblock_levels: maximum number of levels for regular files
207 * @ds_data_levels: number of seperate levels for data 207 * @ds_data_levels: number of separate levels for data
208 * @pad0: reserved, must be 0 208 * @pad0: reserved, must be 0
209 * @ds_feature_incompat: incompatible filesystem features 209 * @ds_feature_incompat: incompatible filesystem features
210 * @ds_feature_ro_compat: read-only compatible filesystem features 210 * @ds_feature_ro_compat: read-only compatible filesystem features
@@ -456,7 +456,7 @@ enum logfs_vim {
456 * @vim: life expectancy of data 456 * @vim: life expectancy of data
457 * 457 *
458 * "Areas" are segments currently being used for writing. There is at least 458 * "Areas" are segments currently being used for writing. There is at least
459 * one area per GC level. Several may be used to seperate long-living from 459 * one area per GC level. Several may be used to separate long-living from
460 * short-living data. If an area with unknown vim is encountered, it can 460 * short-living data. If an area with unknown vim is encountered, it can
461 * simply be closed. 461 * simply be closed.
462 * The write buffer immediately follow this header. 462 * The write buffer immediately follow this header.
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 3159db6958e5..0718d112a1a5 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -892,6 +892,8 @@ u64 logfs_seek_hole(struct inode *inode, u64 bix)
892 return bix; 892 return bix;
893 else if (li->li_data[INDIRECT_INDEX] & LOGFS_FULLY_POPULATED) 893 else if (li->li_data[INDIRECT_INDEX] & LOGFS_FULLY_POPULATED)
894 bix = maxbix(li->li_height); 894 bix = maxbix(li->li_height);
895 else if (bix >= maxbix(li->li_height))
896 return bix;
895 else { 897 else {
896 bix = seek_holedata_loop(inode, bix, 0); 898 bix = seek_holedata_loop(inode, bix, 0);
897 if (bix < maxbix(li->li_height)) 899 if (bix < maxbix(li->li_height))
@@ -1093,17 +1095,25 @@ static int logfs_reserve_bytes(struct inode *inode, int bytes)
1093int get_page_reserve(struct inode *inode, struct page *page) 1095int get_page_reserve(struct inode *inode, struct page *page)
1094{ 1096{
1095 struct logfs_super *super = logfs_super(inode->i_sb); 1097 struct logfs_super *super = logfs_super(inode->i_sb);
1098 struct logfs_block *block = logfs_block(page);
1096 int ret; 1099 int ret;
1097 1100
1098 if (logfs_block(page) && logfs_block(page)->reserved_bytes) 1101 if (block && block->reserved_bytes)
1099 return 0; 1102 return 0;
1100 1103
1101 logfs_get_wblocks(inode->i_sb, page, WF_LOCK); 1104 logfs_get_wblocks(inode->i_sb, page, WF_LOCK);
1102 ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE); 1105 while ((ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE)) &&
1106 !list_empty(&super->s_writeback_list)) {
1107 block = list_entry(super->s_writeback_list.next,
1108 struct logfs_block, alias_list);
1109 block->ops->write_block(block);
1110 }
1103 if (!ret) { 1111 if (!ret) {
1104 alloc_data_block(inode, page); 1112 alloc_data_block(inode, page);
1105 logfs_block(page)->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE; 1113 block = logfs_block(page);
1114 block->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE;
1106 super->s_dirty_pages += 6 * LOGFS_MAX_OBJECTSIZE; 1115 super->s_dirty_pages += 6 * LOGFS_MAX_OBJECTSIZE;
1116 list_move_tail(&block->alias_list, &super->s_writeback_list);
1107 } 1117 }
1108 logfs_put_wblocks(inode->i_sb, page, WF_LOCK); 1118 logfs_put_wblocks(inode->i_sb, page, WF_LOCK);
1109 return ret; 1119 return ret;
@@ -1861,7 +1871,7 @@ int logfs_truncate(struct inode *inode, u64 target)
1861 size = target; 1871 size = target;
1862 1872
1863 logfs_get_wblocks(sb, NULL, 1); 1873 logfs_get_wblocks(sb, NULL, 1);
1864 err = __logfs_truncate(inode, target); 1874 err = __logfs_truncate(inode, size);
1865 if (!err) 1875 if (!err)
1866 err = __logfs_write_inode(inode, 0); 1876 err = __logfs_write_inode(inode, 0);
1867 logfs_put_wblocks(sb, NULL, 1); 1877 logfs_put_wblocks(sb, NULL, 1);
@@ -2249,6 +2259,7 @@ int logfs_init_rw(struct super_block *sb)
2249 int min_fill = 3 * super->s_no_blocks; 2259 int min_fill = 3 * super->s_no_blocks;
2250 2260
2251 INIT_LIST_HEAD(&super->s_object_alias); 2261 INIT_LIST_HEAD(&super->s_object_alias);
2262 INIT_LIST_HEAD(&super->s_writeback_list);
2252 mutex_init(&super->s_write_mutex); 2263 mutex_init(&super->s_write_mutex);
2253 super->s_block_pool = mempool_create_kmalloc_pool(min_fill, 2264 super->s_block_pool = mempool_create_kmalloc_pool(min_fill,
2254 sizeof(struct logfs_block)); 2265 sizeof(struct logfs_block));
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index f77ce2b470ba..a9657afb70ad 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -67,7 +67,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
67 return page; 67 return page;
68} 68}
69 69
70void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, 70int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
71 int use_filler) 71 int use_filler)
72{ 72{
73 pgoff_t index = ofs >> PAGE_SHIFT; 73 pgoff_t index = ofs >> PAGE_SHIFT;
@@ -81,8 +81,10 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
81 copylen = min((ulong)len, PAGE_SIZE - offset); 81 copylen = min((ulong)len, PAGE_SIZE - offset);
82 82
83 page = get_mapping_page(area->a_sb, index, use_filler); 83 page = get_mapping_page(area->a_sb, index, use_filler);
84 SetPageUptodate(page); 84 if (IS_ERR(page))
85 return PTR_ERR(page);
85 BUG_ON(!page); /* FIXME: reserve a pool */ 86 BUG_ON(!page); /* FIXME: reserve a pool */
87 SetPageUptodate(page);
86 memcpy(page_address(page) + offset, buf, copylen); 88 memcpy(page_address(page) + offset, buf, copylen);
87 SetPagePrivate(page); 89 SetPagePrivate(page);
88 page_cache_release(page); 90 page_cache_release(page);
@@ -92,6 +94,7 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
92 offset = 0; 94 offset = 0;
93 index++; 95 index++;
94 } while (len); 96 } while (len);
97 return 0;
95} 98}
96 99
97static void pad_partial_page(struct logfs_area *area) 100static void pad_partial_page(struct logfs_area *area)
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index d7c23ed8349a..d651e10a1e9c 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -138,10 +138,14 @@ static int logfs_sb_set(struct super_block *sb, void *_super)
138 sb->s_fs_info = super; 138 sb->s_fs_info = super;
139 sb->s_mtd = super->s_mtd; 139 sb->s_mtd = super->s_mtd;
140 sb->s_bdev = super->s_bdev; 140 sb->s_bdev = super->s_bdev;
141#ifdef CONFIG_BLOCK
141 if (sb->s_bdev) 142 if (sb->s_bdev)
142 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; 143 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
144#endif
145#ifdef CONFIG_MTD
143 if (sb->s_mtd) 146 if (sb->s_mtd)
144 sb->s_bdi = sb->s_mtd->backing_dev_info; 147 sb->s_bdi = sb->s_mtd->backing_dev_info;
148#endif
145 return 0; 149 return 0;
146} 150}
147 151
@@ -382,7 +386,7 @@ static struct page *find_super_block(struct super_block *sb)
382 if (!first || IS_ERR(first)) 386 if (!first || IS_ERR(first))
383 return NULL; 387 return NULL;
384 last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]); 388 last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
385 if (!last || IS_ERR(first)) { 389 if (!last || IS_ERR(last)) {
386 page_cache_release(first); 390 page_cache_release(first);
387 return NULL; 391 return NULL;
388 } 392 }
@@ -413,7 +417,7 @@ static int __logfs_read_sb(struct super_block *sb)
413 417
414 page = find_super_block(sb); 418 page = find_super_block(sb);
415 if (!page) 419 if (!page)
416 return -EIO; 420 return -EINVAL;
417 421
418 ds = page_address(page); 422 ds = page_address(page);
419 super->s_size = be64_to_cpu(ds->ds_filesystem_size); 423 super->s_size = be64_to_cpu(ds->ds_filesystem_size);
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 6ac693faae49..482779fe4e7c 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -221,7 +221,7 @@ void minix_free_inode(struct inode * inode)
221 clear_inode(inode); /* clear in-memory copy */ 221 clear_inode(inode); /* clear in-memory copy */
222} 222}
223 223
224struct inode * minix_new_inode(const struct inode * dir, int * error) 224struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
225{ 225{
226 struct super_block *sb = dir->i_sb; 226 struct super_block *sb = dir->i_sb;
227 struct minix_sb_info *sbi = minix_sb(sb); 227 struct minix_sb_info *sbi = minix_sb(sb);
@@ -263,8 +263,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
263 iput(inode); 263 iput(inode);
264 return NULL; 264 return NULL;
265 } 265 }
266 inode->i_uid = current_fsuid(); 266 inode_init_owner(inode, dir, mode);
267 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid();
268 inode->i_ino = j; 267 inode->i_ino = j;
269 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 268 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
270 inode->i_blocks = 0; 269 inode->i_blocks = 0;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 9dcf95b42116..111f34ee9e3b 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -46,7 +46,7 @@ struct minix_sb_info {
46extern struct inode *minix_iget(struct super_block *, unsigned long); 46extern struct inode *minix_iget(struct super_block *, unsigned long);
47extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **); 47extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **);
48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); 48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
49extern struct inode * minix_new_inode(const struct inode * dir, int * error); 49extern struct inode * minix_new_inode(const struct inode *, int, int *);
50extern void minix_free_inode(struct inode * inode); 50extern void minix_free_inode(struct inode * inode);
51extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi); 51extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi);
52extern int minix_new_block(struct inode * inode); 52extern int minix_new_block(struct inode * inode);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 32b131cd6121..e20ee85955d1 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -46,10 +46,9 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_
46 if (!old_valid_dev(rdev)) 46 if (!old_valid_dev(rdev))
47 return -EINVAL; 47 return -EINVAL;
48 48
49 inode = minix_new_inode(dir, &error); 49 inode = minix_new_inode(dir, mode, &error);
50 50
51 if (inode) { 51 if (inode) {
52 inode->i_mode = mode;
53 minix_set_inode(inode, rdev); 52 minix_set_inode(inode, rdev);
54 mark_inode_dirty(inode); 53 mark_inode_dirty(inode);
55 error = add_nondir(dentry, inode); 54 error = add_nondir(dentry, inode);
@@ -73,11 +72,10 @@ static int minix_symlink(struct inode * dir, struct dentry *dentry,
73 if (i > dir->i_sb->s_blocksize) 72 if (i > dir->i_sb->s_blocksize)
74 goto out; 73 goto out;
75 74
76 inode = minix_new_inode(dir, &err); 75 inode = minix_new_inode(dir, S_IFLNK | 0777, &err);
77 if (!inode) 76 if (!inode)
78 goto out; 77 goto out;
79 78
80 inode->i_mode = S_IFLNK | 0777;
81 minix_set_inode(inode, 0); 79 minix_set_inode(inode, 0);
82 err = page_symlink(inode, symname, i); 80 err = page_symlink(inode, symname, i);
83 if (err) 81 if (err)
@@ -117,13 +115,10 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
117 115
118 inode_inc_link_count(dir); 116 inode_inc_link_count(dir);
119 117
120 inode = minix_new_inode(dir, &err); 118 inode = minix_new_inode(dir, mode, &err);
121 if (!inode) 119 if (!inode)
122 goto out_dir; 120 goto out_dir;
123 121
124 inode->i_mode = S_IFDIR | mode;
125 if (dir->i_mode & S_ISGID)
126 inode->i_mode |= S_ISGID;
127 minix_set_inode(inode, 0); 122 minix_set_inode(inode, 0);
128 123
129 inode_inc_link_count(inode); 124 inode_inc_link_count(inode);
diff --git a/fs/namei.c b/fs/namei.c
index b86b96fe1dc3..48e1f60520ea 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -523,9 +523,10 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
523static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 523static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
524{ 524{
525 dput(nd->path.dentry); 525 dput(nd->path.dentry);
526 if (nd->path.mnt != path->mnt) 526 if (nd->path.mnt != path->mnt) {
527 mntput(nd->path.mnt); 527 mntput(nd->path.mnt);
528 nd->path.mnt = path->mnt; 528 nd->path.mnt = path->mnt;
529 }
529 nd->path.dentry = path->dentry; 530 nd->path.dentry = path->dentry;
530} 531}
531 532
diff --git a/fs/namespace.c b/fs/namespace.c
index f20cb57d1067..88058de59c7c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -628,7 +628,6 @@ repeat:
628 mnt->mnt_pinned = 0; 628 mnt->mnt_pinned = 0;
629 spin_unlock(&vfsmount_lock); 629 spin_unlock(&vfsmount_lock);
630 acct_auto_close_mnt(mnt); 630 acct_auto_close_mnt(mnt);
631 security_sb_umount_close(mnt);
632 goto repeat; 631 goto repeat;
633 } 632 }
634} 633}
@@ -1117,8 +1116,6 @@ static int do_umount(struct vfsmount *mnt, int flags)
1117 retval = 0; 1116 retval = 0;
1118 } 1117 }
1119 spin_unlock(&vfsmount_lock); 1118 spin_unlock(&vfsmount_lock);
1120 if (retval)
1121 security_sb_umount_busy(mnt);
1122 up_write(&namespace_sem); 1119 up_write(&namespace_sem);
1123 release_mounts(&umount_list); 1120 release_mounts(&umount_list);
1124 return retval; 1121 return retval;
@@ -1435,17 +1432,10 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
1435 if (cant_mount(path->dentry)) 1432 if (cant_mount(path->dentry))
1436 goto out_unlock; 1433 goto out_unlock;
1437 1434
1438 err = security_sb_check_sb(mnt, path);
1439 if (err)
1440 goto out_unlock;
1441
1442 err = -ENOENT;
1443 if (!d_unlinked(path->dentry)) 1435 if (!d_unlinked(path->dentry))
1444 err = attach_recursive_mnt(mnt, path, NULL); 1436 err = attach_recursive_mnt(mnt, path, NULL);
1445out_unlock: 1437out_unlock:
1446 mutex_unlock(&path->dentry->d_inode->i_mutex); 1438 mutex_unlock(&path->dentry->d_inode->i_mutex);
1447 if (!err)
1448 security_sb_post_addmount(mnt, path);
1449 return err; 1439 return err;
1450} 1440}
1451 1441
@@ -1581,8 +1571,6 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1581 } 1571 }
1582 up_write(&sb->s_umount); 1572 up_write(&sb->s_umount);
1583 if (!err) { 1573 if (!err) {
1584 security_sb_post_remount(path->mnt, flags, data);
1585
1586 spin_lock(&vfsmount_lock); 1574 spin_lock(&vfsmount_lock);
1587 touch_mnt_namespace(path->mnt->mnt_ns); 1575 touch_mnt_namespace(path->mnt->mnt_ns);
1588 spin_unlock(&vfsmount_lock); 1576 spin_unlock(&vfsmount_lock);
@@ -2277,7 +2265,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2277 touch_mnt_namespace(current->nsproxy->mnt_ns); 2265 touch_mnt_namespace(current->nsproxy->mnt_ns);
2278 spin_unlock(&vfsmount_lock); 2266 spin_unlock(&vfsmount_lock);
2279 chroot_fs_refs(&root, &new); 2267 chroot_fs_refs(&root, &new);
2280 security_sb_post_pivotroot(&root, &new);
2281 error = 0; 2268 error = 0;
2282 path_put(&root_parent); 2269 path_put(&root_parent);
2283 path_put(&parent_path); 2270 path_put(&parent_path);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 7edfcd4d5e52..92dde6f8d893 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -51,7 +51,7 @@ const struct file_operations ncp_dir_operations =
51{ 51{
52 .read = generic_read_dir, 52 .read = generic_read_dir,
53 .readdir = ncp_readdir, 53 .readdir = ncp_readdir,
54 .ioctl = ncp_ioctl, 54 .unlocked_ioctl = ncp_ioctl,
55#ifdef CONFIG_COMPAT 55#ifdef CONFIG_COMPAT
56 .compat_ioctl = ncp_compat_ioctl, 56 .compat_ioctl = ncp_compat_ioctl,
57#endif 57#endif
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 1daabb90e0a5..b93870892892 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -295,7 +295,7 @@ const struct file_operations ncp_file_operations =
295 .llseek = ncp_remote_llseek, 295 .llseek = ncp_remote_llseek,
296 .read = ncp_file_read, 296 .read = ncp_file_read,
297 .write = ncp_file_write, 297 .write = ncp_file_write,
298 .ioctl = ncp_ioctl, 298 .unlocked_ioctl = ncp_ioctl,
299#ifdef CONFIG_COMPAT 299#ifdef CONFIG_COMPAT
300 .compat_ioctl = ncp_compat_ioctl, 300 .compat_ioctl = ncp_compat_ioctl,
301#endif 301#endif
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 60a5e2864ea8..023c03d02070 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -20,6 +20,7 @@
20#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
21#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/smp_lock.h>
23 24
24#include <linux/ncp_fs.h> 25#include <linux/ncp_fs.h>
25 26
@@ -261,9 +262,9 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
261} 262}
262#endif /* CONFIG_NCPFS_NLS */ 263#endif /* CONFIG_NCPFS_NLS */
263 264
264static int __ncp_ioctl(struct inode *inode, struct file *filp, 265static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
265 unsigned int cmd, unsigned long arg)
266{ 266{
267 struct inode *inode = filp->f_dentry->d_inode;
267 struct ncp_server *server = NCP_SERVER(inode); 268 struct ncp_server *server = NCP_SERVER(inode);
268 int result; 269 int result;
269 struct ncp_ioctl_request request; 270 struct ncp_ioctl_request request;
@@ -841,11 +842,11 @@ static int ncp_ioctl_need_write(unsigned int cmd)
841 } 842 }
842} 843}
843 844
844int ncp_ioctl(struct inode *inode, struct file *filp, 845long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
845 unsigned int cmd, unsigned long arg)
846{ 846{
847 int ret; 847 long ret;
848 848
849 lock_kernel();
849 if (ncp_ioctl_need_write(cmd)) { 850 if (ncp_ioctl_need_write(cmd)) {
850 /* 851 /*
851 * inside the ioctl(), any failures which 852 * inside the ioctl(), any failures which
@@ -853,24 +854,28 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
853 * -EACCESS, so it seems consistent to keep 854 * -EACCESS, so it seems consistent to keep
854 * that here. 855 * that here.
855 */ 856 */
856 if (mnt_want_write(filp->f_path.mnt)) 857 if (mnt_want_write(filp->f_path.mnt)) {
857 return -EACCES; 858 ret = -EACCES;
859 goto out;
860 }
858 } 861 }
859 ret = __ncp_ioctl(inode, filp, cmd, arg); 862 ret = __ncp_ioctl(filp, cmd, arg);
860 if (ncp_ioctl_need_write(cmd)) 863 if (ncp_ioctl_need_write(cmd))
861 mnt_drop_write(filp->f_path.mnt); 864 mnt_drop_write(filp->f_path.mnt);
865
866out:
867 unlock_kernel();
862 return ret; 868 return ret;
863} 869}
864 870
865#ifdef CONFIG_COMPAT 871#ifdef CONFIG_COMPAT
866long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 872long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
867{ 873{
868 struct inode *inode = file->f_path.dentry->d_inode; 874 long ret;
869 int ret;
870 875
871 lock_kernel(); 876 lock_kernel();
872 arg = (unsigned long) compat_ptr(arg); 877 arg = (unsigned long) compat_ptr(arg);
873 ret = ncp_ioctl(inode, file, cmd, arg); 878 ret = ncp_ioctl(file, cmd, arg);
874 unlock_kernel(); 879 unlock_kernel();
875 return ret; 880 return ret;
876} 881}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index acc9c4943b84..7ec9b34a59f8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -934,7 +934,6 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
934 } 934 }
935 935
936 fsinfo.fattr = fattr; 936 fsinfo.fattr = fattr;
937 nfs_fattr_init(fattr);
938 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); 937 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
939 if (error < 0) 938 if (error < 0)
940 goto out_error; 939 goto out_error;
@@ -1047,13 +1046,18 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1047 struct nfs_fh *mntfh) 1046 struct nfs_fh *mntfh)
1048{ 1047{
1049 struct nfs_server *server; 1048 struct nfs_server *server;
1050 struct nfs_fattr fattr; 1049 struct nfs_fattr *fattr;
1051 int error; 1050 int error;
1052 1051
1053 server = nfs_alloc_server(); 1052 server = nfs_alloc_server();
1054 if (!server) 1053 if (!server)
1055 return ERR_PTR(-ENOMEM); 1054 return ERR_PTR(-ENOMEM);
1056 1055
1056 error = -ENOMEM;
1057 fattr = nfs_alloc_fattr();
1058 if (fattr == NULL)
1059 goto error;
1060
1057 /* Get a client representation */ 1061 /* Get a client representation */
1058 error = nfs_init_server(server, data); 1062 error = nfs_init_server(server, data);
1059 if (error < 0) 1063 if (error < 0)
@@ -1064,7 +1068,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1064 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1068 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1065 1069
1066 /* Probe the root fh to retrieve its FSID */ 1070 /* Probe the root fh to retrieve its FSID */
1067 error = nfs_probe_fsinfo(server, mntfh, &fattr); 1071 error = nfs_probe_fsinfo(server, mntfh, fattr);
1068 if (error < 0) 1072 if (error < 0)
1069 goto error; 1073 goto error;
1070 if (server->nfs_client->rpc_ops->version == 3) { 1074 if (server->nfs_client->rpc_ops->version == 3) {
@@ -1077,14 +1081,14 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1077 server->namelen = NFS2_MAXNAMLEN; 1081 server->namelen = NFS2_MAXNAMLEN;
1078 } 1082 }
1079 1083
1080 if (!(fattr.valid & NFS_ATTR_FATTR)) { 1084 if (!(fattr->valid & NFS_ATTR_FATTR)) {
1081 error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); 1085 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
1082 if (error < 0) { 1086 if (error < 0) {
1083 dprintk("nfs_create_server: getattr error = %d\n", -error); 1087 dprintk("nfs_create_server: getattr error = %d\n", -error);
1084 goto error; 1088 goto error;
1085 } 1089 }
1086 } 1090 }
1087 memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); 1091 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
1088 1092
1089 dprintk("Server FSID: %llx:%llx\n", 1093 dprintk("Server FSID: %llx:%llx\n",
1090 (unsigned long long) server->fsid.major, 1094 (unsigned long long) server->fsid.major,
@@ -1096,9 +1100,11 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1096 spin_unlock(&nfs_client_lock); 1100 spin_unlock(&nfs_client_lock);
1097 1101
1098 server->mount_time = jiffies; 1102 server->mount_time = jiffies;
1103 nfs_free_fattr(fattr);
1099 return server; 1104 return server;
1100 1105
1101error: 1106error:
1107 nfs_free_fattr(fattr);
1102 nfs_free_server(server); 1108 nfs_free_server(server);
1103 return ERR_PTR(error); 1109 return ERR_PTR(error);
1104} 1110}
@@ -1340,7 +1346,7 @@ error:
1340struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, 1346struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1341 struct nfs_fh *mntfh) 1347 struct nfs_fh *mntfh)
1342{ 1348{
1343 struct nfs_fattr fattr; 1349 struct nfs_fattr *fattr;
1344 struct nfs_server *server; 1350 struct nfs_server *server;
1345 int error; 1351 int error;
1346 1352
@@ -1350,6 +1356,11 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1350 if (!server) 1356 if (!server)
1351 return ERR_PTR(-ENOMEM); 1357 return ERR_PTR(-ENOMEM);
1352 1358
1359 error = -ENOMEM;
1360 fattr = nfs_alloc_fattr();
1361 if (fattr == NULL)
1362 goto error;
1363
1353 /* set up the general RPC client */ 1364 /* set up the general RPC client */
1354 error = nfs4_init_server(server, data); 1365 error = nfs4_init_server(server, data);
1355 if (error < 0) 1366 if (error < 0)
@@ -1364,7 +1375,7 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1364 goto error; 1375 goto error;
1365 1376
1366 /* Probe the root fh to retrieve its FSID */ 1377 /* Probe the root fh to retrieve its FSID */
1367 error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); 1378 error = nfs4_get_rootfh(server, mntfh);
1368 if (error < 0) 1379 if (error < 0)
1369 goto error; 1380 goto error;
1370 1381
@@ -1375,7 +1386,7 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1375 1386
1376 nfs4_session_set_rwsize(server); 1387 nfs4_session_set_rwsize(server);
1377 1388
1378 error = nfs_probe_fsinfo(server, mntfh, &fattr); 1389 error = nfs_probe_fsinfo(server, mntfh, fattr);
1379 if (error < 0) 1390 if (error < 0)
1380 goto error; 1391 goto error;
1381 1392
@@ -1389,9 +1400,11 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1389 1400
1390 server->mount_time = jiffies; 1401 server->mount_time = jiffies;
1391 dprintk("<-- nfs4_create_server() = %p\n", server); 1402 dprintk("<-- nfs4_create_server() = %p\n", server);
1403 nfs_free_fattr(fattr);
1392 return server; 1404 return server;
1393 1405
1394error: 1406error:
1407 nfs_free_fattr(fattr);
1395 nfs_free_server(server); 1408 nfs_free_server(server);
1396 dprintk("<-- nfs4_create_server() = error %d\n", error); 1409 dprintk("<-- nfs4_create_server() = error %d\n", error);
1397 return ERR_PTR(error); 1410 return ERR_PTR(error);
@@ -1405,7 +1418,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1405{ 1418{
1406 struct nfs_client *parent_client; 1419 struct nfs_client *parent_client;
1407 struct nfs_server *server, *parent_server; 1420 struct nfs_server *server, *parent_server;
1408 struct nfs_fattr fattr; 1421 struct nfs_fattr *fattr;
1409 int error; 1422 int error;
1410 1423
1411 dprintk("--> nfs4_create_referral_server()\n"); 1424 dprintk("--> nfs4_create_referral_server()\n");
@@ -1414,6 +1427,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1414 if (!server) 1427 if (!server)
1415 return ERR_PTR(-ENOMEM); 1428 return ERR_PTR(-ENOMEM);
1416 1429
1430 error = -ENOMEM;
1431 fattr = nfs_alloc_fattr();
1432 if (fattr == NULL)
1433 goto error;
1434
1417 parent_server = NFS_SB(data->sb); 1435 parent_server = NFS_SB(data->sb);
1418 parent_client = parent_server->nfs_client; 1436 parent_client = parent_server->nfs_client;
1419 1437
@@ -1443,12 +1461,12 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1443 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1461 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1444 1462
1445 /* Probe the root fh to retrieve its FSID and filehandle */ 1463 /* Probe the root fh to retrieve its FSID and filehandle */
1446 error = nfs4_path_walk(server, mntfh, data->mnt_path); 1464 error = nfs4_get_rootfh(server, mntfh);
1447 if (error < 0) 1465 if (error < 0)
1448 goto error; 1466 goto error;
1449 1467
1450 /* probe the filesystem info for this server filesystem */ 1468 /* probe the filesystem info for this server filesystem */
1451 error = nfs_probe_fsinfo(server, mntfh, &fattr); 1469 error = nfs_probe_fsinfo(server, mntfh, fattr);
1452 if (error < 0) 1470 if (error < 0)
1453 goto error; 1471 goto error;
1454 1472
@@ -1466,10 +1484,12 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1466 1484
1467 server->mount_time = jiffies; 1485 server->mount_time = jiffies;
1468 1486
1487 nfs_free_fattr(fattr);
1469 dprintk("<-- nfs_create_referral_server() = %p\n", server); 1488 dprintk("<-- nfs_create_referral_server() = %p\n", server);
1470 return server; 1489 return server;
1471 1490
1472error: 1491error:
1492 nfs_free_fattr(fattr);
1473 nfs_free_server(server); 1493 nfs_free_server(server);
1474 dprintk("<-- nfs4_create_referral_server() = error %d\n", error); 1494 dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
1475 return ERR_PTR(error); 1495 return ERR_PTR(error);
@@ -1485,7 +1505,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1485 struct nfs_fattr *fattr) 1505 struct nfs_fattr *fattr)
1486{ 1506{
1487 struct nfs_server *server; 1507 struct nfs_server *server;
1488 struct nfs_fattr fattr_fsinfo; 1508 struct nfs_fattr *fattr_fsinfo;
1489 int error; 1509 int error;
1490 1510
1491 dprintk("--> nfs_clone_server(,%llx:%llx,)\n", 1511 dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
@@ -1496,6 +1516,11 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1496 if (!server) 1516 if (!server)
1497 return ERR_PTR(-ENOMEM); 1517 return ERR_PTR(-ENOMEM);
1498 1518
1519 error = -ENOMEM;
1520 fattr_fsinfo = nfs_alloc_fattr();
1521 if (fattr_fsinfo == NULL)
1522 goto out_free_server;
1523
1499 /* Copy data from the source */ 1524 /* Copy data from the source */
1500 server->nfs_client = source->nfs_client; 1525 server->nfs_client = source->nfs_client;
1501 atomic_inc(&server->nfs_client->cl_count); 1526 atomic_inc(&server->nfs_client->cl_count);
@@ -1512,7 +1537,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1512 nfs_init_server_aclclient(server); 1537 nfs_init_server_aclclient(server);
1513 1538
1514 /* probe the filesystem info for this server filesystem */ 1539 /* probe the filesystem info for this server filesystem */
1515 error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo); 1540 error = nfs_probe_fsinfo(server, fh, fattr_fsinfo);
1516 if (error < 0) 1541 if (error < 0)
1517 goto out_free_server; 1542 goto out_free_server;
1518 1543
@@ -1534,10 +1559,12 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1534 1559
1535 server->mount_time = jiffies; 1560 server->mount_time = jiffies;
1536 1561
1562 nfs_free_fattr(fattr_fsinfo);
1537 dprintk("<-- nfs_clone_server() = %p\n", server); 1563 dprintk("<-- nfs_clone_server() = %p\n", server);
1538 return server; 1564 return server;
1539 1565
1540out_free_server: 1566out_free_server:
1567 nfs_free_fattr(fattr_fsinfo);
1541 nfs_free_server(server); 1568 nfs_free_server(server);
1542 dprintk("<-- nfs_clone_server() = error %d\n", error); 1569 dprintk("<-- nfs_clone_server() = error %d\n", error);
1543 return ERR_PTR(error); 1570 return ERR_PTR(error);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index ea61d26e7871..301634543974 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -213,7 +213,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
213 struct nfs_delegation *freeme = NULL; 213 struct nfs_delegation *freeme = NULL;
214 int status = 0; 214 int status = 0;
215 215
216 delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); 216 delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
217 if (delegation == NULL) 217 if (delegation == NULL)
218 return -ENOMEM; 218 return -ENOMEM;
219 memcpy(delegation->stateid.data, res->delegation.data, 219 memcpy(delegation->stateid.data, res->delegation.data,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a7bb5c694aa3..ee9a179ebdf3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -530,9 +530,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
530 nfs_readdir_descriptor_t my_desc, 530 nfs_readdir_descriptor_t my_desc,
531 *desc = &my_desc; 531 *desc = &my_desc;
532 struct nfs_entry my_entry; 532 struct nfs_entry my_entry;
533 struct nfs_fh fh; 533 int res = -ENOMEM;
534 struct nfs_fattr fattr;
535 long res;
536 534
537 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 535 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
538 dentry->d_parent->d_name.name, dentry->d_name.name, 536 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -554,9 +552,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
554 552
555 my_entry.cookie = my_entry.prev_cookie = 0; 553 my_entry.cookie = my_entry.prev_cookie = 0;
556 my_entry.eof = 0; 554 my_entry.eof = 0;
557 my_entry.fh = &fh; 555 my_entry.fh = nfs_alloc_fhandle();
558 my_entry.fattr = &fattr; 556 my_entry.fattr = nfs_alloc_fattr();
559 nfs_fattr_init(&fattr); 557 if (my_entry.fh == NULL || my_entry.fattr == NULL)
558 goto out_alloc_failed;
559
560 desc->entry = &my_entry; 560 desc->entry = &my_entry;
561 561
562 nfs_block_sillyrename(dentry); 562 nfs_block_sillyrename(dentry);
@@ -598,7 +598,10 @@ out:
598 nfs_unblock_sillyrename(dentry); 598 nfs_unblock_sillyrename(dentry);
599 if (res > 0) 599 if (res > 0)
600 res = 0; 600 res = 0;
601 dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n", 601out_alloc_failed:
602 nfs_free_fattr(my_entry.fattr);
603 nfs_free_fhandle(my_entry.fh);
604 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
602 dentry->d_parent->d_name.name, dentry->d_name.name, 605 dentry->d_parent->d_name.name, dentry->d_name.name,
603 res); 606 res);
604 return res; 607 return res;
@@ -776,9 +779,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
776 struct inode *dir; 779 struct inode *dir;
777 struct inode *inode; 780 struct inode *inode;
778 struct dentry *parent; 781 struct dentry *parent;
782 struct nfs_fh *fhandle = NULL;
783 struct nfs_fattr *fattr = NULL;
779 int error; 784 int error;
780 struct nfs_fh fhandle;
781 struct nfs_fattr fattr;
782 785
783 parent = dget_parent(dentry); 786 parent = dget_parent(dentry);
784 dir = parent->d_inode; 787 dir = parent->d_inode;
@@ -811,14 +814,22 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
811 if (NFS_STALE(inode)) 814 if (NFS_STALE(inode))
812 goto out_bad; 815 goto out_bad;
813 816
814 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); 817 error = -ENOMEM;
818 fhandle = nfs_alloc_fhandle();
819 fattr = nfs_alloc_fattr();
820 if (fhandle == NULL || fattr == NULL)
821 goto out_error;
822
823 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
815 if (error) 824 if (error)
816 goto out_bad; 825 goto out_bad;
817 if (nfs_compare_fh(NFS_FH(inode), &fhandle)) 826 if (nfs_compare_fh(NFS_FH(inode), fhandle))
818 goto out_bad; 827 goto out_bad;
819 if ((error = nfs_refresh_inode(inode, &fattr)) != 0) 828 if ((error = nfs_refresh_inode(inode, fattr)) != 0)
820 goto out_bad; 829 goto out_bad;
821 830
831 nfs_free_fattr(fattr);
832 nfs_free_fhandle(fhandle);
822out_set_verifier: 833out_set_verifier:
823 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 834 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
824 out_valid: 835 out_valid:
@@ -842,11 +853,21 @@ out_zap_parent:
842 shrink_dcache_parent(dentry); 853 shrink_dcache_parent(dentry);
843 } 854 }
844 d_drop(dentry); 855 d_drop(dentry);
856 nfs_free_fattr(fattr);
857 nfs_free_fhandle(fhandle);
845 dput(parent); 858 dput(parent);
846 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", 859 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
847 __func__, dentry->d_parent->d_name.name, 860 __func__, dentry->d_parent->d_name.name,
848 dentry->d_name.name); 861 dentry->d_name.name);
849 return 0; 862 return 0;
863out_error:
864 nfs_free_fattr(fattr);
865 nfs_free_fhandle(fhandle);
866 dput(parent);
867 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
868 __func__, dentry->d_parent->d_name.name,
869 dentry->d_name.name, error);
870 return error;
850} 871}
851 872
852/* 873/*
@@ -911,9 +932,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
911 struct dentry *res; 932 struct dentry *res;
912 struct dentry *parent; 933 struct dentry *parent;
913 struct inode *inode = NULL; 934 struct inode *inode = NULL;
935 struct nfs_fh *fhandle = NULL;
936 struct nfs_fattr *fattr = NULL;
914 int error; 937 int error;
915 struct nfs_fh fhandle;
916 struct nfs_fattr fattr;
917 938
918 dfprintk(VFS, "NFS: lookup(%s/%s)\n", 939 dfprintk(VFS, "NFS: lookup(%s/%s)\n",
919 dentry->d_parent->d_name.name, dentry->d_name.name); 940 dentry->d_parent->d_name.name, dentry->d_name.name);
@@ -923,7 +944,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
923 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) 944 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
924 goto out; 945 goto out;
925 946
926 res = ERR_PTR(-ENOMEM);
927 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 947 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
928 948
929 /* 949 /*
@@ -936,17 +956,23 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
936 goto out; 956 goto out;
937 } 957 }
938 958
959 res = ERR_PTR(-ENOMEM);
960 fhandle = nfs_alloc_fhandle();
961 fattr = nfs_alloc_fattr();
962 if (fhandle == NULL || fattr == NULL)
963 goto out;
964
939 parent = dentry->d_parent; 965 parent = dentry->d_parent;
940 /* Protect against concurrent sillydeletes */ 966 /* Protect against concurrent sillydeletes */
941 nfs_block_sillyrename(parent); 967 nfs_block_sillyrename(parent);
942 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); 968 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
943 if (error == -ENOENT) 969 if (error == -ENOENT)
944 goto no_entry; 970 goto no_entry;
945 if (error < 0) { 971 if (error < 0) {
946 res = ERR_PTR(error); 972 res = ERR_PTR(error);
947 goto out_unblock_sillyrename; 973 goto out_unblock_sillyrename;
948 } 974 }
949 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); 975 inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
950 res = (struct dentry *)inode; 976 res = (struct dentry *)inode;
951 if (IS_ERR(res)) 977 if (IS_ERR(res))
952 goto out_unblock_sillyrename; 978 goto out_unblock_sillyrename;
@@ -962,6 +988,8 @@ no_entry:
962out_unblock_sillyrename: 988out_unblock_sillyrename:
963 nfs_unblock_sillyrename(parent); 989 nfs_unblock_sillyrename(parent);
964out: 990out:
991 nfs_free_fattr(fattr);
992 nfs_free_fhandle(fhandle);
965 return res; 993 return res;
966} 994}
967 995
@@ -1669,28 +1697,33 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry)
1669 smp_mb__after_atomic_dec(); 1697 smp_mb__after_atomic_dec();
1670} 1698}
1671 1699
1700static void nfs_access_free_list(struct list_head *head)
1701{
1702 struct nfs_access_entry *cache;
1703
1704 while (!list_empty(head)) {
1705 cache = list_entry(head->next, struct nfs_access_entry, lru);
1706 list_del(&cache->lru);
1707 nfs_access_free_entry(cache);
1708 }
1709}
1710
1672int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) 1711int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
1673{ 1712{
1674 LIST_HEAD(head); 1713 LIST_HEAD(head);
1675 struct nfs_inode *nfsi; 1714 struct nfs_inode *nfsi;
1676 struct nfs_access_entry *cache; 1715 struct nfs_access_entry *cache;
1677 1716
1678restart: 1717 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
1718 return (nr_to_scan == 0) ? 0 : -1;
1719
1679 spin_lock(&nfs_access_lru_lock); 1720 spin_lock(&nfs_access_lru_lock);
1680 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { 1721 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
1681 struct rw_semaphore *s_umount;
1682 struct inode *inode; 1722 struct inode *inode;
1683 1723
1684 if (nr_to_scan-- == 0) 1724 if (nr_to_scan-- == 0)
1685 break; 1725 break;
1686 s_umount = &nfsi->vfs_inode.i_sb->s_umount; 1726 inode = &nfsi->vfs_inode;
1687 if (!down_read_trylock(s_umount))
1688 continue;
1689 inode = igrab(&nfsi->vfs_inode);
1690 if (inode == NULL) {
1691 up_read(s_umount);
1692 continue;
1693 }
1694 spin_lock(&inode->i_lock); 1727 spin_lock(&inode->i_lock);
1695 if (list_empty(&nfsi->access_cache_entry_lru)) 1728 if (list_empty(&nfsi->access_cache_entry_lru))
1696 goto remove_lru_entry; 1729 goto remove_lru_entry;
@@ -1704,61 +1737,47 @@ restart:
1704 else { 1737 else {
1705remove_lru_entry: 1738remove_lru_entry:
1706 list_del_init(&nfsi->access_cache_inode_lru); 1739 list_del_init(&nfsi->access_cache_inode_lru);
1740 smp_mb__before_clear_bit();
1707 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); 1741 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
1742 smp_mb__after_clear_bit();
1708 } 1743 }
1709 spin_unlock(&inode->i_lock);
1710 spin_unlock(&nfs_access_lru_lock);
1711 iput(inode);
1712 up_read(s_umount);
1713 goto restart;
1714 } 1744 }
1715 spin_unlock(&nfs_access_lru_lock); 1745 spin_unlock(&nfs_access_lru_lock);
1716 while (!list_empty(&head)) { 1746 nfs_access_free_list(&head);
1717 cache = list_entry(head.next, struct nfs_access_entry, lru);
1718 list_del(&cache->lru);
1719 nfs_access_free_entry(cache);
1720 }
1721 return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; 1747 return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
1722} 1748}
1723 1749
1724static void __nfs_access_zap_cache(struct inode *inode) 1750static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
1725{ 1751{
1726 struct nfs_inode *nfsi = NFS_I(inode);
1727 struct rb_root *root_node = &nfsi->access_cache; 1752 struct rb_root *root_node = &nfsi->access_cache;
1728 struct rb_node *n, *dispose = NULL; 1753 struct rb_node *n;
1729 struct nfs_access_entry *entry; 1754 struct nfs_access_entry *entry;
1730 1755
1731 /* Unhook entries from the cache */ 1756 /* Unhook entries from the cache */
1732 while ((n = rb_first(root_node)) != NULL) { 1757 while ((n = rb_first(root_node)) != NULL) {
1733 entry = rb_entry(n, struct nfs_access_entry, rb_node); 1758 entry = rb_entry(n, struct nfs_access_entry, rb_node);
1734 rb_erase(n, root_node); 1759 rb_erase(n, root_node);
1735 list_del(&entry->lru); 1760 list_move(&entry->lru, head);
1736 n->rb_left = dispose;
1737 dispose = n;
1738 } 1761 }
1739 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; 1762 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
1740 spin_unlock(&inode->i_lock);
1741
1742 /* Now kill them all! */
1743 while (dispose != NULL) {
1744 n = dispose;
1745 dispose = n->rb_left;
1746 nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
1747 }
1748} 1763}
1749 1764
1750void nfs_access_zap_cache(struct inode *inode) 1765void nfs_access_zap_cache(struct inode *inode)
1751{ 1766{
1767 LIST_HEAD(head);
1768
1769 if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
1770 return;
1752 /* Remove from global LRU init */ 1771 /* Remove from global LRU init */
1753 if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { 1772 spin_lock(&nfs_access_lru_lock);
1754 spin_lock(&nfs_access_lru_lock); 1773 if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
1755 list_del_init(&NFS_I(inode)->access_cache_inode_lru); 1774 list_del_init(&NFS_I(inode)->access_cache_inode_lru);
1756 spin_unlock(&nfs_access_lru_lock);
1757 }
1758 1775
1759 spin_lock(&inode->i_lock); 1776 spin_lock(&inode->i_lock);
1760 /* This will release the spinlock */ 1777 __nfs_access_zap_cache(NFS_I(inode), &head);
1761 __nfs_access_zap_cache(inode); 1778 spin_unlock(&inode->i_lock);
1779 spin_unlock(&nfs_access_lru_lock);
1780 nfs_access_free_list(&head);
1762} 1781}
1763 1782
1764static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) 1783static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
@@ -1809,8 +1828,8 @@ out_stale:
1809 nfs_access_free_entry(cache); 1828 nfs_access_free_entry(cache);
1810 return -ENOENT; 1829 return -ENOENT;
1811out_zap: 1830out_zap:
1812 /* This will release the spinlock */ 1831 spin_unlock(&inode->i_lock);
1813 __nfs_access_zap_cache(inode); 1832 nfs_access_zap_cache(inode);
1814 return -ENOENT; 1833 return -ENOENT;
1815} 1834}
1816 1835
@@ -1865,9 +1884,11 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s
1865 smp_mb__after_atomic_inc(); 1884 smp_mb__after_atomic_inc();
1866 1885
1867 /* Add inode to global LRU list */ 1886 /* Add inode to global LRU list */
1868 if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { 1887 if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
1869 spin_lock(&nfs_access_lru_lock); 1888 spin_lock(&nfs_access_lru_lock);
1870 list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); 1889 if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
1890 list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
1891 &nfs_access_lru_list);
1871 spin_unlock(&nfs_access_lru_lock); 1892 spin_unlock(&nfs_access_lru_lock);
1872 } 1893 }
1873} 1894}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8d965bddb87e..cac96bcc91e4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -161,14 +161,17 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
161 struct nfs_server *server = NFS_SERVER(inode); 161 struct nfs_server *server = NFS_SERVER(inode);
162 struct nfs_inode *nfsi = NFS_I(inode); 162 struct nfs_inode *nfsi = NFS_I(inode);
163 163
164 if (server->flags & NFS_MOUNT_NOAC) 164 if (nfs_have_delegated_attributes(inode))
165 goto force_reval; 165 goto out_noreval;
166
166 if (filp->f_flags & O_DIRECT) 167 if (filp->f_flags & O_DIRECT)
167 goto force_reval; 168 goto force_reval;
168 if (nfsi->npages != 0) 169 if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
169 return 0; 170 goto force_reval;
170 if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) 171 if (nfs_attribute_timeout(inode))
171 return 0; 172 goto force_reval;
173out_noreval:
174 return 0;
172force_reval: 175force_reval:
173 return __nfs_revalidate_inode(server, inode); 176 return __nfs_revalidate_inode(server, inode);
174} 177}
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index a6b16ed93229..ce153a6b3aec 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -467,7 +467,8 @@ int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
467 struct list_head *pages, 467 struct list_head *pages,
468 unsigned *nr_pages) 468 unsigned *nr_pages)
469{ 469{
470 int ret, npages = *nr_pages; 470 unsigned npages = *nr_pages;
471 int ret;
471 472
472 dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", 473 dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
473 NFS_I(inode)->fscache, npages, inode); 474 NFS_I(inode)->fscache, npages, inode);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b35d2a616066..7428f7d6273b 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -78,159 +78,94 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
78{ 78{
79 struct nfs_server *server = NFS_SB(sb); 79 struct nfs_server *server = NFS_SB(sb);
80 struct nfs_fsinfo fsinfo; 80 struct nfs_fsinfo fsinfo;
81 struct nfs_fattr fattr; 81 struct dentry *ret;
82 struct dentry *mntroot;
83 struct inode *inode; 82 struct inode *inode;
84 int error; 83 int error;
85 84
86 /* get the actual root for this mount */ 85 /* get the actual root for this mount */
87 fsinfo.fattr = &fattr; 86 fsinfo.fattr = nfs_alloc_fattr();
87 if (fsinfo.fattr == NULL)
88 return ERR_PTR(-ENOMEM);
88 89
89 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 90 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
90 if (error < 0) { 91 if (error < 0) {
91 dprintk("nfs_get_root: getattr error = %d\n", -error); 92 dprintk("nfs_get_root: getattr error = %d\n", -error);
92 return ERR_PTR(error); 93 ret = ERR_PTR(error);
94 goto out;
93 } 95 }
94 96
95 inode = nfs_fhget(sb, mntfh, fsinfo.fattr); 97 inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
96 if (IS_ERR(inode)) { 98 if (IS_ERR(inode)) {
97 dprintk("nfs_get_root: get root inode failed\n"); 99 dprintk("nfs_get_root: get root inode failed\n");
98 return ERR_CAST(inode); 100 ret = ERR_CAST(inode);
101 goto out;
99 } 102 }
100 103
101 error = nfs_superblock_set_dummy_root(sb, inode); 104 error = nfs_superblock_set_dummy_root(sb, inode);
102 if (error != 0) 105 if (error != 0) {
103 return ERR_PTR(error); 106 ret = ERR_PTR(error);
107 goto out;
108 }
104 109
105 /* root dentries normally start off anonymous and get spliced in later 110 /* root dentries normally start off anonymous and get spliced in later
106 * if the dentry tree reaches them; however if the dentry already 111 * if the dentry tree reaches them; however if the dentry already
107 * exists, we'll pick it up at this point and use it as the root 112 * exists, we'll pick it up at this point and use it as the root
108 */ 113 */
109 mntroot = d_obtain_alias(inode); 114 ret = d_obtain_alias(inode);
110 if (IS_ERR(mntroot)) { 115 if (IS_ERR(ret)) {
111 dprintk("nfs_get_root: get root dentry failed\n"); 116 dprintk("nfs_get_root: get root dentry failed\n");
112 return mntroot; 117 goto out;
113 } 118 }
114 119
115 security_d_instantiate(mntroot, inode); 120 security_d_instantiate(ret, inode);
116
117 if (!mntroot->d_op)
118 mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
119 121
120 return mntroot; 122 if (ret->d_op == NULL)
123 ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
124out:
125 nfs_free_fattr(fsinfo.fattr);
126 return ret;
121} 127}
122 128
123#ifdef CONFIG_NFS_V4 129#ifdef CONFIG_NFS_V4
124 130
125/* 131int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
126 * Do a simple pathwalk from the root FH of the server to the nominated target
127 * of the mountpoint
128 * - give error on symlinks
129 * - give error on ".." occurring in the path
130 * - follow traversals
131 */
132int nfs4_path_walk(struct nfs_server *server,
133 struct nfs_fh *mntfh,
134 const char *path)
135{ 132{
136 struct nfs_fsinfo fsinfo; 133 struct nfs_fsinfo fsinfo;
137 struct nfs_fattr fattr; 134 int ret = -ENOMEM;
138 struct nfs_fh lastfh;
139 struct qstr name;
140 int ret;
141 135
142 dprintk("--> nfs4_path_walk(,,%s)\n", path); 136 dprintk("--> nfs4_get_rootfh()\n");
143 137
144 fsinfo.fattr = &fattr; 138 fsinfo.fattr = nfs_alloc_fattr();
145 nfs_fattr_init(&fattr); 139 if (fsinfo.fattr == NULL)
146 140 goto out;
147 /* Eat leading slashes */
148 while (*path == '/')
149 path++;
150 141
151 /* Start by getting the root filehandle from the server */ 142 /* Start by getting the root filehandle from the server */
152 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 143 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
153 if (ret < 0) { 144 if (ret < 0) {
154 dprintk("nfs4_get_root: getroot error = %d\n", -ret); 145 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
155 return ret; 146 goto out;
156 } 147 }
157 148
158 if (!S_ISDIR(fattr.mode)) { 149 if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
159 printk(KERN_ERR "nfs4_get_root:" 150 || !S_ISDIR(fsinfo.fattr->mode)) {
151 printk(KERN_ERR "nfs4_get_rootfh:"
160 " getroot encountered non-directory\n"); 152 " getroot encountered non-directory\n");
161 return -ENOTDIR; 153 ret = -ENOTDIR;
154 goto out;
162 } 155 }
163 156
164 /* FIXME: It is quite valid for the server to return a referral here */ 157 if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
165 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { 158 printk(KERN_ERR "nfs4_get_rootfh:"
166 printk(KERN_ERR "nfs4_get_root:"
167 " getroot obtained referral\n"); 159 " getroot obtained referral\n");
168 return -EREMOTE; 160 ret = -EREMOTE;
169 } 161 goto out;
170
171next_component:
172 dprintk("Next: %s\n", path);
173
174 /* extract the next bit of the path */
175 if (!*path)
176 goto path_walk_complete;
177
178 name.name = path;
179 while (*path && *path != '/')
180 path++;
181 name.len = path - (const char *) name.name;
182
183 if (name.len > NFS4_MAXNAMLEN)
184 return -ENAMETOOLONG;
185
186eat_dot_dir:
187 while (*path == '/')
188 path++;
189
190 if (path[0] == '.' && (path[1] == '/' || !path[1])) {
191 path += 2;
192 goto eat_dot_dir;
193 }
194
195 /* FIXME: Why shouldn't the user be able to use ".." in the path? */
196 if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
197 ) {
198 printk(KERN_ERR "nfs4_get_root:"
199 " Mount path contains reference to \"..\"\n");
200 return -EINVAL;
201 } 162 }
202 163
203 /* lookup the next FH in the sequence */ 164 memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
204 memcpy(&lastfh, mntfh, sizeof(lastfh)); 165out:
205 166 nfs_free_fattr(fsinfo.fattr);
206 dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path); 167 dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
207 168 return ret;
208 ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
209 mntfh, &fattr);
210 if (ret < 0) {
211 dprintk("nfs4_get_root: getroot error = %d\n", -ret);
212 return ret;
213 }
214
215 if (!S_ISDIR(fattr.mode)) {
216 printk(KERN_ERR "nfs4_get_root:"
217 " lookupfh encountered non-directory\n");
218 return -ENOTDIR;
219 }
220
221 /* FIXME: Referrals are quite valid here too */
222 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
223 printk(KERN_ERR "nfs4_get_root:"
224 " lookupfh obtained referral\n");
225 return -EREMOTE;
226 }
227
228 goto next_component;
229
230path_walk_complete:
231 memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
232 dprintk("<-- nfs4_path_walk() = 0\n");
233 return 0;
234} 169}
235 170
236/* 171/*
@@ -239,8 +174,8 @@ path_walk_complete:
239struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) 174struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
240{ 175{
241 struct nfs_server *server = NFS_SB(sb); 176 struct nfs_server *server = NFS_SB(sb);
242 struct nfs_fattr fattr; 177 struct nfs_fattr *fattr = NULL;
243 struct dentry *mntroot; 178 struct dentry *ret;
244 struct inode *inode; 179 struct inode *inode;
245 int error; 180 int error;
246 181
@@ -254,40 +189,50 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
254 return ERR_PTR(error); 189 return ERR_PTR(error);
255 } 190 }
256 191
192 fattr = nfs_alloc_fattr();
193 if (fattr == NULL)
194 return ERR_PTR(-ENOMEM);;
195
257 /* get the actual root for this mount */ 196 /* get the actual root for this mount */
258 error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); 197 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
259 if (error < 0) { 198 if (error < 0) {
260 dprintk("nfs_get_root: getattr error = %d\n", -error); 199 dprintk("nfs_get_root: getattr error = %d\n", -error);
261 return ERR_PTR(error); 200 ret = ERR_PTR(error);
201 goto out;
262 } 202 }
263 203
264 inode = nfs_fhget(sb, mntfh, &fattr); 204 inode = nfs_fhget(sb, mntfh, fattr);
265 if (IS_ERR(inode)) { 205 if (IS_ERR(inode)) {
266 dprintk("nfs_get_root: get root inode failed\n"); 206 dprintk("nfs_get_root: get root inode failed\n");
267 return ERR_CAST(inode); 207 ret = ERR_CAST(inode);
208 goto out;
268 } 209 }
269 210
270 error = nfs_superblock_set_dummy_root(sb, inode); 211 error = nfs_superblock_set_dummy_root(sb, inode);
271 if (error != 0) 212 if (error != 0) {
272 return ERR_PTR(error); 213 ret = ERR_PTR(error);
214 goto out;
215 }
273 216
274 /* root dentries normally start off anonymous and get spliced in later 217 /* root dentries normally start off anonymous and get spliced in later
275 * if the dentry tree reaches them; however if the dentry already 218 * if the dentry tree reaches them; however if the dentry already
276 * exists, we'll pick it up at this point and use it as the root 219 * exists, we'll pick it up at this point and use it as the root
277 */ 220 */
278 mntroot = d_obtain_alias(inode); 221 ret = d_obtain_alias(inode);
279 if (IS_ERR(mntroot)) { 222 if (IS_ERR(ret)) {
280 dprintk("nfs_get_root: get root dentry failed\n"); 223 dprintk("nfs_get_root: get root dentry failed\n");
281 return mntroot; 224 goto out;
282 } 225 }
283 226
284 security_d_instantiate(mntroot, inode); 227 security_d_instantiate(ret, inode);
285 228
286 if (!mntroot->d_op) 229 if (ret->d_op == NULL)
287 mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; 230 ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
288 231
232out:
233 nfs_free_fattr(fattr);
289 dprintk("<-- nfs4_get_root()\n"); 234 dprintk("<-- nfs4_get_root()\n");
290 return mntroot; 235 return ret;
291} 236}
292 237
293#endif /* CONFIG_NFS_V4 */ 238#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a56edca0b5..099b3518feea 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -393,8 +393,8 @@ int
393nfs_setattr(struct dentry *dentry, struct iattr *attr) 393nfs_setattr(struct dentry *dentry, struct iattr *attr)
394{ 394{
395 struct inode *inode = dentry->d_inode; 395 struct inode *inode = dentry->d_inode;
396 struct nfs_fattr fattr; 396 struct nfs_fattr *fattr;
397 int error; 397 int error = -ENOMEM;
398 398
399 nfs_inc_stats(inode, NFSIOS_VFSSETATTR); 399 nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
400 400
@@ -417,14 +417,20 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
417 filemap_write_and_wait(inode->i_mapping); 417 filemap_write_and_wait(inode->i_mapping);
418 nfs_wb_all(inode); 418 nfs_wb_all(inode);
419 } 419 }
420
421 fattr = nfs_alloc_fattr();
422 if (fattr == NULL)
423 goto out;
420 /* 424 /*
421 * Return any delegations if we're going to change ACLs 425 * Return any delegations if we're going to change ACLs
422 */ 426 */
423 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) 427 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
424 nfs_inode_return_delegation(inode); 428 nfs_inode_return_delegation(inode);
425 error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); 429 error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
426 if (error == 0) 430 if (error == 0)
427 nfs_refresh_inode(inode, &fattr); 431 nfs_refresh_inode(inode, fattr);
432 nfs_free_fattr(fattr);
433out:
428 return error; 434 return error;
429} 435}
430 436
@@ -682,7 +688,7 @@ int
682__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) 688__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
683{ 689{
684 int status = -ESTALE; 690 int status = -ESTALE;
685 struct nfs_fattr fattr; 691 struct nfs_fattr *fattr = NULL;
686 struct nfs_inode *nfsi = NFS_I(inode); 692 struct nfs_inode *nfsi = NFS_I(inode);
687 693
688 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", 694 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
@@ -693,8 +699,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
693 if (NFS_STALE(inode)) 699 if (NFS_STALE(inode))
694 goto out; 700 goto out;
695 701
702 status = -ENOMEM;
703 fattr = nfs_alloc_fattr();
704 if (fattr == NULL)
705 goto out;
706
696 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); 707 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
697 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); 708 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr);
698 if (status != 0) { 709 if (status != 0) {
699 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", 710 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
700 inode->i_sb->s_id, 711 inode->i_sb->s_id,
@@ -707,7 +718,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
707 goto out; 718 goto out;
708 } 719 }
709 720
710 status = nfs_refresh_inode(inode, &fattr); 721 status = nfs_refresh_inode(inode, fattr);
711 if (status) { 722 if (status) {
712 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", 723 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
713 inode->i_sb->s_id, 724 inode->i_sb->s_id,
@@ -723,6 +734,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
723 (long long)NFS_FILEID(inode)); 734 (long long)NFS_FILEID(inode));
724 735
725 out: 736 out:
737 nfs_free_fattr(fattr);
726 return status; 738 return status;
727} 739}
728 740
@@ -730,9 +742,14 @@ int nfs_attribute_timeout(struct inode *inode)
730{ 742{
731 struct nfs_inode *nfsi = NFS_I(inode); 743 struct nfs_inode *nfsi = NFS_I(inode);
732 744
745 return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
746}
747
748static int nfs_attribute_cache_expired(struct inode *inode)
749{
733 if (nfs_have_delegated_attributes(inode)) 750 if (nfs_have_delegated_attributes(inode))
734 return 0; 751 return 0;
735 return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); 752 return nfs_attribute_timeout(inode);
736} 753}
737 754
738/** 755/**
@@ -745,7 +762,7 @@ int nfs_attribute_timeout(struct inode *inode)
745int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) 762int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
746{ 763{
747 if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR) 764 if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR)
748 && !nfs_attribute_timeout(inode)) 765 && !nfs_attribute_cache_expired(inode))
749 return NFS_STALE(inode) ? -ESTALE : 0; 766 return NFS_STALE(inode) ? -ESTALE : 0;
750 return __nfs_revalidate_inode(server, inode); 767 return __nfs_revalidate_inode(server, inode);
751} 768}
@@ -782,7 +799,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
782 int ret = 0; 799 int ret = 0;
783 800
784 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) 801 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
785 || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { 802 || nfs_attribute_cache_expired(inode)
803 || NFS_STALE(inode)) {
786 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 804 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
787 if (ret < 0) 805 if (ret < 0)
788 goto out; 806 goto out;
@@ -916,6 +934,26 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
916 fattr->gencount = nfs_inc_attr_generation_counter(); 934 fattr->gencount = nfs_inc_attr_generation_counter();
917} 935}
918 936
937struct nfs_fattr *nfs_alloc_fattr(void)
938{
939 struct nfs_fattr *fattr;
940
941 fattr = kmalloc(sizeof(*fattr), GFP_NOFS);
942 if (fattr != NULL)
943 nfs_fattr_init(fattr);
944 return fattr;
945}
946
947struct nfs_fh *nfs_alloc_fhandle(void)
948{
949 struct nfs_fh *fh;
950
951 fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS);
952 if (fh != NULL)
953 fh->size = 0;
954 return fh;
955}
956
919/** 957/**
920 * nfs_inode_attrs_need_update - check if the inode attributes need updating 958 * nfs_inode_attrs_need_update - check if the inode attributes need updating
921 * @inode - pointer to inode 959 * @inode - pointer to inode
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 11f82f03c5de..d8bd619e386c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -244,9 +244,7 @@ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
244#ifdef CONFIG_NFS_V4 244#ifdef CONFIG_NFS_V4
245extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); 245extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
246 246
247extern int nfs4_path_walk(struct nfs_server *server, 247extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
248 struct nfs_fh *mntfh,
249 const char *path);
250#endif 248#endif
251 249
252/* read.c */ 250/* read.c */
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 1d8d5c813b01..c5832487c456 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -36,14 +36,14 @@ static inline void nfs_inc_stats(const struct inode *inode,
36 36
37static inline void nfs_add_server_stats(const struct nfs_server *server, 37static inline void nfs_add_server_stats(const struct nfs_server *server,
38 enum nfs_stat_bytecounters stat, 38 enum nfs_stat_bytecounters stat,
39 unsigned long addend) 39 long addend)
40{ 40{
41 this_cpu_add(server->io_stats->bytes[stat], addend); 41 this_cpu_add(server->io_stats->bytes[stat], addend);
42} 42}
43 43
44static inline void nfs_add_stats(const struct inode *inode, 44static inline void nfs_add_stats(const struct inode *inode,
45 enum nfs_stat_bytecounters stat, 45 enum nfs_stat_bytecounters stat,
46 unsigned long addend) 46 long addend)
47{ 47{
48 nfs_add_server_stats(NFS_SERVER(inode), stat, addend); 48 nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
49} 49}
@@ -51,7 +51,7 @@ static inline void nfs_add_stats(const struct inode *inode,
51#ifdef CONFIG_NFS_FSCACHE 51#ifdef CONFIG_NFS_FSCACHE
52static inline void nfs_add_fscache_stats(struct inode *inode, 52static inline void nfs_add_fscache_stats(struct inode *inode,
53 enum nfs_stat_fscachecounters stat, 53 enum nfs_stat_fscachecounters stat,
54 unsigned long addend) 54 long addend)
55{ 55{
56 this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend); 56 this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
57} 57}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 7888cf36022d..db6aa3673cf3 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -105,8 +105,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
105 struct vfsmount *mnt; 105 struct vfsmount *mnt;
106 struct nfs_server *server = NFS_SERVER(dentry->d_inode); 106 struct nfs_server *server = NFS_SERVER(dentry->d_inode);
107 struct dentry *parent; 107 struct dentry *parent;
108 struct nfs_fh fh; 108 struct nfs_fh *fh = NULL;
109 struct nfs_fattr fattr; 109 struct nfs_fattr *fattr = NULL;
110 int err; 110 int err;
111 111
112 dprintk("--> nfs_follow_mountpoint()\n"); 112 dprintk("--> nfs_follow_mountpoint()\n");
@@ -115,6 +115,12 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
115 if (IS_ROOT(dentry)) 115 if (IS_ROOT(dentry))
116 goto out_err; 116 goto out_err;
117 117
118 err = -ENOMEM;
119 fh = nfs_alloc_fhandle();
120 fattr = nfs_alloc_fattr();
121 if (fh == NULL || fattr == NULL)
122 goto out_err;
123
118 dprintk("%s: enter\n", __func__); 124 dprintk("%s: enter\n", __func__);
119 dput(nd->path.dentry); 125 dput(nd->path.dentry);
120 nd->path.dentry = dget(dentry); 126 nd->path.dentry = dget(dentry);
@@ -123,16 +129,16 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
123 parent = dget_parent(nd->path.dentry); 129 parent = dget_parent(nd->path.dentry);
124 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, 130 err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
125 &nd->path.dentry->d_name, 131 &nd->path.dentry->d_name,
126 &fh, &fattr); 132 fh, fattr);
127 dput(parent); 133 dput(parent);
128 if (err != 0) 134 if (err != 0)
129 goto out_err; 135 goto out_err;
130 136
131 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) 137 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
132 mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry); 138 mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry);
133 else 139 else
134 mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, &fh, 140 mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh,
135 &fattr); 141 fattr);
136 err = PTR_ERR(mnt); 142 err = PTR_ERR(mnt);
137 if (IS_ERR(mnt)) 143 if (IS_ERR(mnt))
138 goto out_err; 144 goto out_err;
@@ -151,6 +157,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
151 nd->path.dentry = dget(mnt->mnt_root); 157 nd->path.dentry = dget(mnt->mnt_root);
152 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); 158 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
153out: 159out:
160 nfs_free_fattr(fattr);
161 nfs_free_fhandle(fh);
154 dprintk("%s: done, returned %d\n", __func__, err); 162 dprintk("%s: done, returned %d\n", __func__, err);
155 163
156 dprintk("<-- nfs_follow_mountpoint() = %d\n", err); 164 dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index d150ae0c5ecd..9f88c5f4c7e2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -185,7 +185,6 @@ static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl,
185struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) 185struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
186{ 186{
187 struct nfs_server *server = NFS_SERVER(inode); 187 struct nfs_server *server = NFS_SERVER(inode);
188 struct nfs_fattr fattr;
189 struct page *pages[NFSACL_MAXPAGES] = { }; 188 struct page *pages[NFSACL_MAXPAGES] = { };
190 struct nfs3_getaclargs args = { 189 struct nfs3_getaclargs args = {
191 .fh = NFS_FH(inode), 190 .fh = NFS_FH(inode),
@@ -193,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
193 .pages = pages, 192 .pages = pages,
194 }; 193 };
195 struct nfs3_getaclres res = { 194 struct nfs3_getaclres res = {
196 .fattr = &fattr, 195 0
197 }; 196 };
198 struct rpc_message msg = { 197 struct rpc_message msg = {
199 .rpc_argp = &args, 198 .rpc_argp = &args,
@@ -228,7 +227,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
228 227
229 dprintk("NFS call getacl\n"); 228 dprintk("NFS call getacl\n");
230 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; 229 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
231 nfs_fattr_init(&fattr); 230 res.fattr = nfs_alloc_fattr();
231 if (res.fattr == NULL)
232 return ERR_PTR(-ENOMEM);
233
232 status = rpc_call_sync(server->client_acl, &msg, 0); 234 status = rpc_call_sync(server->client_acl, &msg, 0);
233 dprintk("NFS reply getacl: %d\n", status); 235 dprintk("NFS reply getacl: %d\n", status);
234 236
@@ -238,7 +240,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
238 240
239 switch (status) { 241 switch (status) {
240 case 0: 242 case 0:
241 status = nfs_refresh_inode(inode, &fattr); 243 status = nfs_refresh_inode(inode, res.fattr);
242 break; 244 break;
243 case -EPFNOSUPPORT: 245 case -EPFNOSUPPORT:
244 case -EPROTONOSUPPORT: 246 case -EPROTONOSUPPORT:
@@ -278,6 +280,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
278getout: 280getout:
279 posix_acl_release(res.acl_access); 281 posix_acl_release(res.acl_access);
280 posix_acl_release(res.acl_default); 282 posix_acl_release(res.acl_default);
283 nfs_free_fattr(res.fattr);
281 284
282 if (status != 0) { 285 if (status != 0) {
283 posix_acl_release(acl); 286 posix_acl_release(acl);
@@ -290,7 +293,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
290 struct posix_acl *dfacl) 293 struct posix_acl *dfacl)
291{ 294{
292 struct nfs_server *server = NFS_SERVER(inode); 295 struct nfs_server *server = NFS_SERVER(inode);
293 struct nfs_fattr fattr; 296 struct nfs_fattr *fattr;
294 struct page *pages[NFSACL_MAXPAGES]; 297 struct page *pages[NFSACL_MAXPAGES];
295 struct nfs3_setaclargs args = { 298 struct nfs3_setaclargs args = {
296 .inode = inode, 299 .inode = inode,
@@ -335,8 +338,13 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
335 } 338 }
336 339
337 dprintk("NFS call setacl\n"); 340 dprintk("NFS call setacl\n");
341 status = -ENOMEM;
342 fattr = nfs_alloc_fattr();
343 if (fattr == NULL)
344 goto out_freepages;
345
338 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; 346 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
339 nfs_fattr_init(&fattr); 347 msg.rpc_resp = fattr;
340 status = rpc_call_sync(server->client_acl, &msg, 0); 348 status = rpc_call_sync(server->client_acl, &msg, 0);
341 nfs_access_zap_cache(inode); 349 nfs_access_zap_cache(inode);
342 nfs_zap_acl_cache(inode); 350 nfs_zap_acl_cache(inode);
@@ -344,7 +352,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
344 352
345 switch (status) { 353 switch (status) {
346 case 0: 354 case 0:
347 status = nfs_refresh_inode(inode, &fattr); 355 status = nfs_refresh_inode(inode, fattr);
348 nfs3_cache_acls(inode, acl, dfacl); 356 nfs3_cache_acls(inode, acl, dfacl);
349 break; 357 break;
350 case -EPFNOSUPPORT: 358 case -EPFNOSUPPORT:
@@ -355,6 +363,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
355 case -ENOTSUPP: 363 case -ENOTSUPP:
356 status = -EOPNOTSUPP; 364 status = -EOPNOTSUPP;
357 } 365 }
366 nfs_free_fattr(fattr);
358out_freepages: 367out_freepages:
359 while (args.npages != 0) { 368 while (args.npages != 0) {
360 args.npages--; 369 args.npages--;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e701002694e5..fabb4f2849a1 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -144,14 +144,12 @@ static int
144nfs3_proc_lookup(struct inode *dir, struct qstr *name, 144nfs3_proc_lookup(struct inode *dir, struct qstr *name,
145 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 145 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
146{ 146{
147 struct nfs_fattr dir_attr;
148 struct nfs3_diropargs arg = { 147 struct nfs3_diropargs arg = {
149 .fh = NFS_FH(dir), 148 .fh = NFS_FH(dir),
150 .name = name->name, 149 .name = name->name,
151 .len = name->len 150 .len = name->len
152 }; 151 };
153 struct nfs3_diropres res = { 152 struct nfs3_diropres res = {
154 .dir_attr = &dir_attr,
155 .fh = fhandle, 153 .fh = fhandle,
156 .fattr = fattr 154 .fattr = fattr
157 }; 155 };
@@ -163,29 +161,30 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
163 int status; 161 int status;
164 162
165 dprintk("NFS call lookup %s\n", name->name); 163 dprintk("NFS call lookup %s\n", name->name);
166 nfs_fattr_init(&dir_attr); 164 res.dir_attr = nfs_alloc_fattr();
165 if (res.dir_attr == NULL)
166 return -ENOMEM;
167
167 nfs_fattr_init(fattr); 168 nfs_fattr_init(fattr);
168 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 169 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
169 nfs_refresh_inode(dir, &dir_attr); 170 nfs_refresh_inode(dir, res.dir_attr);
170 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { 171 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
171 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; 172 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
172 msg.rpc_argp = fhandle; 173 msg.rpc_argp = fhandle;
173 msg.rpc_resp = fattr; 174 msg.rpc_resp = fattr;
174 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 175 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
175 } 176 }
177 nfs_free_fattr(res.dir_attr);
176 dprintk("NFS reply lookup: %d\n", status); 178 dprintk("NFS reply lookup: %d\n", status);
177 return status; 179 return status;
178} 180}
179 181
180static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) 182static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
181{ 183{
182 struct nfs_fattr fattr;
183 struct nfs3_accessargs arg = { 184 struct nfs3_accessargs arg = {
184 .fh = NFS_FH(inode), 185 .fh = NFS_FH(inode),
185 }; 186 };
186 struct nfs3_accessres res = { 187 struct nfs3_accessres res;
187 .fattr = &fattr,
188 };
189 struct rpc_message msg = { 188 struct rpc_message msg = {
190 .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], 189 .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
191 .rpc_argp = &arg, 190 .rpc_argp = &arg,
@@ -193,7 +192,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
193 .rpc_cred = entry->cred, 192 .rpc_cred = entry->cred,
194 }; 193 };
195 int mode = entry->mask; 194 int mode = entry->mask;
196 int status; 195 int status = -ENOMEM;
197 196
198 dprintk("NFS call access\n"); 197 dprintk("NFS call access\n");
199 198
@@ -210,9 +209,13 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
210 if (mode & MAY_EXEC) 209 if (mode & MAY_EXEC)
211 arg.access |= NFS3_ACCESS_EXECUTE; 210 arg.access |= NFS3_ACCESS_EXECUTE;
212 } 211 }
213 nfs_fattr_init(&fattr); 212
213 res.fattr = nfs_alloc_fattr();
214 if (res.fattr == NULL)
215 goto out;
216
214 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 217 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
215 nfs_refresh_inode(inode, &fattr); 218 nfs_refresh_inode(inode, res.fattr);
216 if (status == 0) { 219 if (status == 0) {
217 entry->mask = 0; 220 entry->mask = 0;
218 if (res.access & NFS3_ACCESS_READ) 221 if (res.access & NFS3_ACCESS_READ)
@@ -222,6 +225,8 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
222 if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) 225 if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
223 entry->mask |= MAY_EXEC; 226 entry->mask |= MAY_EXEC;
224 } 227 }
228 nfs_free_fattr(res.fattr);
229out:
225 dprintk("NFS reply access: %d\n", status); 230 dprintk("NFS reply access: %d\n", status);
226 return status; 231 return status;
227} 232}
@@ -229,7 +234,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
229static int nfs3_proc_readlink(struct inode *inode, struct page *page, 234static int nfs3_proc_readlink(struct inode *inode, struct page *page,
230 unsigned int pgbase, unsigned int pglen) 235 unsigned int pgbase, unsigned int pglen)
231{ 236{
232 struct nfs_fattr fattr; 237 struct nfs_fattr *fattr;
233 struct nfs3_readlinkargs args = { 238 struct nfs3_readlinkargs args = {
234 .fh = NFS_FH(inode), 239 .fh = NFS_FH(inode),
235 .pgbase = pgbase, 240 .pgbase = pgbase,
@@ -239,14 +244,19 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
239 struct rpc_message msg = { 244 struct rpc_message msg = {
240 .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK], 245 .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK],
241 .rpc_argp = &args, 246 .rpc_argp = &args,
242 .rpc_resp = &fattr,
243 }; 247 };
244 int status; 248 int status = -ENOMEM;
245 249
246 dprintk("NFS call readlink\n"); 250 dprintk("NFS call readlink\n");
247 nfs_fattr_init(&fattr); 251 fattr = nfs_alloc_fattr();
252 if (fattr == NULL)
253 goto out;
254 msg.rpc_resp = fattr;
255
248 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 256 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
249 nfs_refresh_inode(inode, &fattr); 257 nfs_refresh_inode(inode, fattr);
258 nfs_free_fattr(fattr);
259out:
250 dprintk("NFS reply readlink: %d\n", status); 260 dprintk("NFS reply readlink: %d\n", status);
251 return status; 261 return status;
252} 262}
@@ -396,12 +406,17 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
396 .rpc_argp = &arg, 406 .rpc_argp = &arg,
397 .rpc_resp = &res, 407 .rpc_resp = &res,
398 }; 408 };
399 int status; 409 int status = -ENOMEM;
400 410
401 dprintk("NFS call remove %s\n", name->name); 411 dprintk("NFS call remove %s\n", name->name);
402 nfs_fattr_init(&res.dir_attr); 412 res.dir_attr = nfs_alloc_fattr();
413 if (res.dir_attr == NULL)
414 goto out;
415
403 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 416 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
404 nfs_post_op_update_inode(dir, &res.dir_attr); 417 nfs_post_op_update_inode(dir, res.dir_attr);
418 nfs_free_fattr(res.dir_attr);
419out:
405 dprintk("NFS reply remove: %d\n", status); 420 dprintk("NFS reply remove: %d\n", status);
406 return status; 421 return status;
407} 422}
@@ -419,7 +434,7 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
419 if (nfs3_async_handle_jukebox(task, dir)) 434 if (nfs3_async_handle_jukebox(task, dir))
420 return 0; 435 return 0;
421 res = task->tk_msg.rpc_resp; 436 res = task->tk_msg.rpc_resp;
422 nfs_post_op_update_inode(dir, &res->dir_attr); 437 nfs_post_op_update_inode(dir, res->dir_attr);
423 return 1; 438 return 1;
424} 439}
425 440
@@ -427,7 +442,6 @@ static int
427nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, 442nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
428 struct inode *new_dir, struct qstr *new_name) 443 struct inode *new_dir, struct qstr *new_name)
429{ 444{
430 struct nfs_fattr old_dir_attr, new_dir_attr;
431 struct nfs3_renameargs arg = { 445 struct nfs3_renameargs arg = {
432 .fromfh = NFS_FH(old_dir), 446 .fromfh = NFS_FH(old_dir),
433 .fromname = old_name->name, 447 .fromname = old_name->name,
@@ -436,23 +450,27 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
436 .toname = new_name->name, 450 .toname = new_name->name,
437 .tolen = new_name->len 451 .tolen = new_name->len
438 }; 452 };
439 struct nfs3_renameres res = { 453 struct nfs3_renameres res;
440 .fromattr = &old_dir_attr,
441 .toattr = &new_dir_attr
442 };
443 struct rpc_message msg = { 454 struct rpc_message msg = {
444 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], 455 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
445 .rpc_argp = &arg, 456 .rpc_argp = &arg,
446 .rpc_resp = &res, 457 .rpc_resp = &res,
447 }; 458 };
448 int status; 459 int status = -ENOMEM;
449 460
450 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 461 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
451 nfs_fattr_init(&old_dir_attr); 462
452 nfs_fattr_init(&new_dir_attr); 463 res.fromattr = nfs_alloc_fattr();
464 res.toattr = nfs_alloc_fattr();
465 if (res.fromattr == NULL || res.toattr == NULL)
466 goto out;
467
453 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); 468 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
454 nfs_post_op_update_inode(old_dir, &old_dir_attr); 469 nfs_post_op_update_inode(old_dir, res.fromattr);
455 nfs_post_op_update_inode(new_dir, &new_dir_attr); 470 nfs_post_op_update_inode(new_dir, res.toattr);
471out:
472 nfs_free_fattr(res.toattr);
473 nfs_free_fattr(res.fromattr);
456 dprintk("NFS reply rename: %d\n", status); 474 dprintk("NFS reply rename: %d\n", status);
457 return status; 475 return status;
458} 476}
@@ -460,30 +478,32 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
460static int 478static int
461nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) 479nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
462{ 480{
463 struct nfs_fattr dir_attr, fattr;
464 struct nfs3_linkargs arg = { 481 struct nfs3_linkargs arg = {
465 .fromfh = NFS_FH(inode), 482 .fromfh = NFS_FH(inode),
466 .tofh = NFS_FH(dir), 483 .tofh = NFS_FH(dir),
467 .toname = name->name, 484 .toname = name->name,
468 .tolen = name->len 485 .tolen = name->len
469 }; 486 };
470 struct nfs3_linkres res = { 487 struct nfs3_linkres res;
471 .dir_attr = &dir_attr,
472 .fattr = &fattr
473 };
474 struct rpc_message msg = { 488 struct rpc_message msg = {
475 .rpc_proc = &nfs3_procedures[NFS3PROC_LINK], 489 .rpc_proc = &nfs3_procedures[NFS3PROC_LINK],
476 .rpc_argp = &arg, 490 .rpc_argp = &arg,
477 .rpc_resp = &res, 491 .rpc_resp = &res,
478 }; 492 };
479 int status; 493 int status = -ENOMEM;
480 494
481 dprintk("NFS call link %s\n", name->name); 495 dprintk("NFS call link %s\n", name->name);
482 nfs_fattr_init(&dir_attr); 496 res.fattr = nfs_alloc_fattr();
483 nfs_fattr_init(&fattr); 497 res.dir_attr = nfs_alloc_fattr();
498 if (res.fattr == NULL || res.dir_attr == NULL)
499 goto out;
500
484 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 501 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
485 nfs_post_op_update_inode(dir, &dir_attr); 502 nfs_post_op_update_inode(dir, res.dir_attr);
486 nfs_post_op_update_inode(inode, &fattr); 503 nfs_post_op_update_inode(inode, res.fattr);
504out:
505 nfs_free_fattr(res.dir_attr);
506 nfs_free_fattr(res.fattr);
487 dprintk("NFS reply link: %d\n", status); 507 dprintk("NFS reply link: %d\n", status);
488 return status; 508 return status;
489} 509}
@@ -554,7 +574,7 @@ out:
554static int 574static int
555nfs3_proc_rmdir(struct inode *dir, struct qstr *name) 575nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
556{ 576{
557 struct nfs_fattr dir_attr; 577 struct nfs_fattr *dir_attr;
558 struct nfs3_diropargs arg = { 578 struct nfs3_diropargs arg = {
559 .fh = NFS_FH(dir), 579 .fh = NFS_FH(dir),
560 .name = name->name, 580 .name = name->name,
@@ -563,14 +583,19 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
563 struct rpc_message msg = { 583 struct rpc_message msg = {
564 .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR], 584 .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR],
565 .rpc_argp = &arg, 585 .rpc_argp = &arg,
566 .rpc_resp = &dir_attr,
567 }; 586 };
568 int status; 587 int status = -ENOMEM;
569 588
570 dprintk("NFS call rmdir %s\n", name->name); 589 dprintk("NFS call rmdir %s\n", name->name);
571 nfs_fattr_init(&dir_attr); 590 dir_attr = nfs_alloc_fattr();
591 if (dir_attr == NULL)
592 goto out;
593
594 msg.rpc_resp = dir_attr;
572 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 595 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
573 nfs_post_op_update_inode(dir, &dir_attr); 596 nfs_post_op_update_inode(dir, dir_attr);
597 nfs_free_fattr(dir_attr);
598out:
574 dprintk("NFS reply rmdir: %d\n", status); 599 dprintk("NFS reply rmdir: %d\n", status);
575 return status; 600 return status;
576} 601}
@@ -589,7 +614,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
589 u64 cookie, struct page *page, unsigned int count, int plus) 614 u64 cookie, struct page *page, unsigned int count, int plus)
590{ 615{
591 struct inode *dir = dentry->d_inode; 616 struct inode *dir = dentry->d_inode;
592 struct nfs_fattr dir_attr;
593 __be32 *verf = NFS_COOKIEVERF(dir); 617 __be32 *verf = NFS_COOKIEVERF(dir);
594 struct nfs3_readdirargs arg = { 618 struct nfs3_readdirargs arg = {
595 .fh = NFS_FH(dir), 619 .fh = NFS_FH(dir),
@@ -600,7 +624,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
600 .pages = &page 624 .pages = &page
601 }; 625 };
602 struct nfs3_readdirres res = { 626 struct nfs3_readdirres res = {
603 .dir_attr = &dir_attr,
604 .verf = verf, 627 .verf = verf,
605 .plus = plus 628 .plus = plus
606 }; 629 };
@@ -610,7 +633,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
610 .rpc_resp = &res, 633 .rpc_resp = &res,
611 .rpc_cred = cred 634 .rpc_cred = cred
612 }; 635 };
613 int status; 636 int status = -ENOMEM;
614 637
615 if (plus) 638 if (plus)
616 msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS]; 639 msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
@@ -618,12 +641,17 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
618 dprintk("NFS call readdir%s %d\n", 641 dprintk("NFS call readdir%s %d\n",
619 plus? "plus" : "", (unsigned int) cookie); 642 plus? "plus" : "", (unsigned int) cookie);
620 643
621 nfs_fattr_init(&dir_attr); 644 res.dir_attr = nfs_alloc_fattr();
645 if (res.dir_attr == NULL)
646 goto out;
647
622 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 648 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
623 649
624 nfs_invalidate_atime(dir); 650 nfs_invalidate_atime(dir);
651 nfs_refresh_inode(dir, res.dir_attr);
625 652
626 nfs_refresh_inode(dir, &dir_attr); 653 nfs_free_fattr(res.dir_attr);
654out:
627 dprintk("NFS reply readdir: %d\n", status); 655 dprintk("NFS reply readdir: %d\n", status);
628 return status; 656 return status;
629} 657}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 56a86f6ac8b5..75dcfc7da365 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -762,7 +762,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
762static int 762static int
763nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res) 763nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res)
764{ 764{
765 return nfs3_xdr_wccstat(req, p, &res->dir_attr); 765 return nfs3_xdr_wccstat(req, p, res->dir_attr);
766} 766}
767 767
768/* 768/*
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a187200a7aac..c538c6106e16 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -206,14 +206,14 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
206 206
207 207
208/* nfs4proc.c */ 208/* nfs4proc.c */
209extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); 209extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
210extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); 210extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
211extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); 211extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
212extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); 212extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
213extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 213extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
214extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 214extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
215extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 215extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
216extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); 216extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
217extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 217extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
218extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 218extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
219extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 219extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -286,7 +286,7 @@ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
286extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 286extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
287extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 287extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
288 288
289extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter); 289extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
290extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 290extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
291extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); 291extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
292extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); 292extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index f071d12c613b..3c2a1724fbd2 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -115,6 +115,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
115 char *page, char *page2, 115 char *page, char *page2,
116 const struct nfs4_fs_location *location) 116 const struct nfs4_fs_location *location)
117{ 117{
118 const size_t addr_bufsize = sizeof(struct sockaddr_storage);
118 struct vfsmount *mnt = ERR_PTR(-ENOENT); 119 struct vfsmount *mnt = ERR_PTR(-ENOENT);
119 char *mnt_path; 120 char *mnt_path;
120 unsigned int maxbuflen; 121 unsigned int maxbuflen;
@@ -126,9 +127,12 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
126 mountdata->mnt_path = mnt_path; 127 mountdata->mnt_path = mnt_path;
127 maxbuflen = mnt_path - 1 - page2; 128 maxbuflen = mnt_path - 1 - page2;
128 129
130 mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL);
131 if (mountdata->addr == NULL)
132 return ERR_PTR(-ENOMEM);
133
129 for (s = 0; s < location->nservers; s++) { 134 for (s = 0; s < location->nservers; s++) {
130 const struct nfs4_string *buf = &location->servers[s]; 135 const struct nfs4_string *buf = &location->servers[s];
131 struct sockaddr_storage addr;
132 136
133 if (buf->len <= 0 || buf->len >= maxbuflen) 137 if (buf->len <= 0 || buf->len >= maxbuflen)
134 continue; 138 continue;
@@ -137,11 +141,10 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
137 continue; 141 continue;
138 142
139 mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, 143 mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
140 (struct sockaddr *)&addr, sizeof(addr)); 144 mountdata->addr, addr_bufsize);
141 if (mountdata->addrlen == 0) 145 if (mountdata->addrlen == 0)
142 continue; 146 continue;
143 147
144 mountdata->addr = (struct sockaddr *)&addr;
145 rpc_set_port(mountdata->addr, NFS_PORT); 148 rpc_set_port(mountdata->addr, NFS_PORT);
146 149
147 memcpy(page2, buf->data, buf->len); 150 memcpy(page2, buf->data, buf->len);
@@ -156,6 +159,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
156 if (!IS_ERR(mnt)) 159 if (!IS_ERR(mnt))
157 break; 160 break;
158 } 161 }
162 kfree(mountdata->addr);
159 return mnt; 163 return mnt;
160} 164}
161 165
@@ -221,8 +225,8 @@ out:
221 225
222/* 226/*
223 * nfs_do_refmount - handle crossing a referral on server 227 * nfs_do_refmount - handle crossing a referral on server
228 * @mnt_parent - mountpoint of referral
224 * @dentry - dentry of referral 229 * @dentry - dentry of referral
225 * @nd - nameidata info
226 * 230 *
227 */ 231 */
228struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 232struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 071fcedd517c..70015dd60a98 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -70,6 +70,9 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf
70static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 70static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
71static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 71static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
72static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 72static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
73static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
74 struct nfs_fattr *fattr, struct iattr *sattr,
75 struct nfs4_state *state);
73 76
74/* Prevent leaks of NFSv4 errors into userland */ 77/* Prevent leaks of NFSv4 errors into userland */
75static int nfs4_map_errors(int err) 78static int nfs4_map_errors(int err)
@@ -714,17 +717,18 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
714 717
715static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 718static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
716 struct nfs4_state_owner *sp, fmode_t fmode, int flags, 719 struct nfs4_state_owner *sp, fmode_t fmode, int flags,
717 const struct iattr *attrs) 720 const struct iattr *attrs,
721 gfp_t gfp_mask)
718{ 722{
719 struct dentry *parent = dget_parent(path->dentry); 723 struct dentry *parent = dget_parent(path->dentry);
720 struct inode *dir = parent->d_inode; 724 struct inode *dir = parent->d_inode;
721 struct nfs_server *server = NFS_SERVER(dir); 725 struct nfs_server *server = NFS_SERVER(dir);
722 struct nfs4_opendata *p; 726 struct nfs4_opendata *p;
723 727
724 p = kzalloc(sizeof(*p), GFP_KERNEL); 728 p = kzalloc(sizeof(*p), gfp_mask);
725 if (p == NULL) 729 if (p == NULL)
726 goto err; 730 goto err;
727 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); 731 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
728 if (p->o_arg.seqid == NULL) 732 if (p->o_arg.seqid == NULL)
729 goto err_free; 733 goto err_free;
730 path_get(path); 734 path_get(path);
@@ -1060,7 +1064,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
1060{ 1064{
1061 struct nfs4_opendata *opendata; 1065 struct nfs4_opendata *opendata;
1062 1066
1063 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL); 1067 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS);
1064 if (opendata == NULL) 1068 if (opendata == NULL)
1065 return ERR_PTR(-ENOMEM); 1069 return ERR_PTR(-ENOMEM);
1066 opendata->state = state; 1070 opendata->state = state;
@@ -1648,7 +1652,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1648 if (path->dentry->d_inode != NULL) 1652 if (path->dentry->d_inode != NULL)
1649 nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode); 1653 nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
1650 status = -ENOMEM; 1654 status = -ENOMEM;
1651 opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr); 1655 opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL);
1652 if (opendata == NULL) 1656 if (opendata == NULL)
1653 goto err_put_state_owner; 1657 goto err_put_state_owner;
1654 1658
@@ -1659,15 +1663,24 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1659 if (status != 0) 1663 if (status != 0)
1660 goto err_opendata_put; 1664 goto err_opendata_put;
1661 1665
1662 if (opendata->o_arg.open_flags & O_EXCL)
1663 nfs4_exclusive_attrset(opendata, sattr);
1664
1665 state = nfs4_opendata_to_nfs4_state(opendata); 1666 state = nfs4_opendata_to_nfs4_state(opendata);
1666 status = PTR_ERR(state); 1667 status = PTR_ERR(state);
1667 if (IS_ERR(state)) 1668 if (IS_ERR(state))
1668 goto err_opendata_put; 1669 goto err_opendata_put;
1669 if (server->caps & NFS_CAP_POSIX_LOCK) 1670 if (server->caps & NFS_CAP_POSIX_LOCK)
1670 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); 1671 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1672
1673 if (opendata->o_arg.open_flags & O_EXCL) {
1674 nfs4_exclusive_attrset(opendata, sattr);
1675
1676 nfs_fattr_init(opendata->o_res.f_attr);
1677 status = nfs4_do_setattr(state->inode, cred,
1678 opendata->o_res.f_attr, sattr,
1679 state);
1680 if (status == 0)
1681 nfs_setattr_update_inode(state->inode, sattr);
1682 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
1683 }
1671 nfs4_opendata_put(opendata); 1684 nfs4_opendata_put(opendata);
1672 nfs4_put_state_owner(sp); 1685 nfs4_put_state_owner(sp);
1673 *res = state; 1686 *res = state;
@@ -1914,7 +1927,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
1914 * 1927 *
1915 * NOTE: Caller must be holding the sp->so_owner semaphore! 1928 * NOTE: Caller must be holding the sp->so_owner semaphore!
1916 */ 1929 */
1917int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) 1930int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
1918{ 1931{
1919 struct nfs_server *server = NFS_SERVER(state->inode); 1932 struct nfs_server *server = NFS_SERVER(state->inode);
1920 struct nfs4_closedata *calldata; 1933 struct nfs4_closedata *calldata;
@@ -1933,7 +1946,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1933 }; 1946 };
1934 int status = -ENOMEM; 1947 int status = -ENOMEM;
1935 1948
1936 calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); 1949 calldata = kzalloc(sizeof(*calldata), gfp_mask);
1937 if (calldata == NULL) 1950 if (calldata == NULL)
1938 goto out; 1951 goto out;
1939 calldata->inode = state->inode; 1952 calldata->inode = state->inode;
@@ -1941,7 +1954,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1941 calldata->arg.fh = NFS_FH(state->inode); 1954 calldata->arg.fh = NFS_FH(state->inode);
1942 calldata->arg.stateid = &state->open_stateid; 1955 calldata->arg.stateid = &state->open_stateid;
1943 /* Serialization for the sequence id */ 1956 /* Serialization for the sequence id */
1944 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1957 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
1945 if (calldata->arg.seqid == NULL) 1958 if (calldata->arg.seqid == NULL)
1946 goto out_free_calldata; 1959 goto out_free_calldata;
1947 calldata->arg.fmode = 0; 1960 calldata->arg.fmode = 0;
@@ -2404,14 +2417,12 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh
2404static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) 2417static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
2405{ 2418{
2406 struct nfs_server *server = NFS_SERVER(inode); 2419 struct nfs_server *server = NFS_SERVER(inode);
2407 struct nfs_fattr fattr;
2408 struct nfs4_accessargs args = { 2420 struct nfs4_accessargs args = {
2409 .fh = NFS_FH(inode), 2421 .fh = NFS_FH(inode),
2410 .bitmask = server->attr_bitmask, 2422 .bitmask = server->attr_bitmask,
2411 }; 2423 };
2412 struct nfs4_accessres res = { 2424 struct nfs4_accessres res = {
2413 .server = server, 2425 .server = server,
2414 .fattr = &fattr,
2415 }; 2426 };
2416 struct rpc_message msg = { 2427 struct rpc_message msg = {
2417 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], 2428 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
@@ -2438,7 +2449,11 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2438 if (mode & MAY_EXEC) 2449 if (mode & MAY_EXEC)
2439 args.access |= NFS4_ACCESS_EXECUTE; 2450 args.access |= NFS4_ACCESS_EXECUTE;
2440 } 2451 }
2441 nfs_fattr_init(&fattr); 2452
2453 res.fattr = nfs_alloc_fattr();
2454 if (res.fattr == NULL)
2455 return -ENOMEM;
2456
2442 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2457 status = nfs4_call_sync(server, &msg, &args, &res, 0);
2443 if (!status) { 2458 if (!status) {
2444 entry->mask = 0; 2459 entry->mask = 0;
@@ -2448,8 +2463,9 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2448 entry->mask |= MAY_WRITE; 2463 entry->mask |= MAY_WRITE;
2449 if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) 2464 if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
2450 entry->mask |= MAY_EXEC; 2465 entry->mask |= MAY_EXEC;
2451 nfs_refresh_inode(inode, &fattr); 2466 nfs_refresh_inode(inode, res.fattr);
2452 } 2467 }
2468 nfs_free_fattr(res.fattr);
2453 return status; 2469 return status;
2454} 2470}
2455 2471
@@ -2562,13 +2578,6 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2562 } 2578 }
2563 d_add(dentry, igrab(state->inode)); 2579 d_add(dentry, igrab(state->inode));
2564 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 2580 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2565 if (flags & O_EXCL) {
2566 struct nfs_fattr fattr;
2567 status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state);
2568 if (status == 0)
2569 nfs_setattr_update_inode(state->inode, sattr);
2570 nfs_post_op_update_inode(state->inode, &fattr);
2571 }
2572 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) 2581 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
2573 status = nfs4_intent_set_file(nd, &path, state, fmode); 2582 status = nfs4_intent_set_file(nd, &path, state, fmode);
2574 else 2583 else
@@ -2596,14 +2605,19 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2596 .rpc_argp = &args, 2605 .rpc_argp = &args,
2597 .rpc_resp = &res, 2606 .rpc_resp = &res,
2598 }; 2607 };
2599 int status; 2608 int status = -ENOMEM;
2609
2610 res.dir_attr = nfs_alloc_fattr();
2611 if (res.dir_attr == NULL)
2612 goto out;
2600 2613
2601 nfs_fattr_init(&res.dir_attr);
2602 status = nfs4_call_sync(server, &msg, &args, &res, 1); 2614 status = nfs4_call_sync(server, &msg, &args, &res, 1);
2603 if (status == 0) { 2615 if (status == 0) {
2604 update_changeattr(dir, &res.cinfo); 2616 update_changeattr(dir, &res.cinfo);
2605 nfs_post_op_update_inode(dir, &res.dir_attr); 2617 nfs_post_op_update_inode(dir, res.dir_attr);
2606 } 2618 }
2619 nfs_free_fattr(res.dir_attr);
2620out:
2607 return status; 2621 return status;
2608} 2622}
2609 2623
@@ -2638,7 +2652,7 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2638 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2652 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2639 return 0; 2653 return 0;
2640 update_changeattr(dir, &res->cinfo); 2654 update_changeattr(dir, &res->cinfo);
2641 nfs_post_op_update_inode(dir, &res->dir_attr); 2655 nfs_post_op_update_inode(dir, res->dir_attr);
2642 return 1; 2656 return 1;
2643} 2657}
2644 2658
@@ -2653,29 +2667,31 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2653 .new_name = new_name, 2667 .new_name = new_name,
2654 .bitmask = server->attr_bitmask, 2668 .bitmask = server->attr_bitmask,
2655 }; 2669 };
2656 struct nfs_fattr old_fattr, new_fattr;
2657 struct nfs4_rename_res res = { 2670 struct nfs4_rename_res res = {
2658 .server = server, 2671 .server = server,
2659 .old_fattr = &old_fattr,
2660 .new_fattr = &new_fattr,
2661 }; 2672 };
2662 struct rpc_message msg = { 2673 struct rpc_message msg = {
2663 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], 2674 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
2664 .rpc_argp = &arg, 2675 .rpc_argp = &arg,
2665 .rpc_resp = &res, 2676 .rpc_resp = &res,
2666 }; 2677 };
2667 int status; 2678 int status = -ENOMEM;
2668 2679
2669 nfs_fattr_init(res.old_fattr); 2680 res.old_fattr = nfs_alloc_fattr();
2670 nfs_fattr_init(res.new_fattr); 2681 res.new_fattr = nfs_alloc_fattr();
2671 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2682 if (res.old_fattr == NULL || res.new_fattr == NULL)
2683 goto out;
2672 2684
2685 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
2673 if (!status) { 2686 if (!status) {
2674 update_changeattr(old_dir, &res.old_cinfo); 2687 update_changeattr(old_dir, &res.old_cinfo);
2675 nfs_post_op_update_inode(old_dir, res.old_fattr); 2688 nfs_post_op_update_inode(old_dir, res.old_fattr);
2676 update_changeattr(new_dir, &res.new_cinfo); 2689 update_changeattr(new_dir, &res.new_cinfo);
2677 nfs_post_op_update_inode(new_dir, res.new_fattr); 2690 nfs_post_op_update_inode(new_dir, res.new_fattr);
2678 } 2691 }
2692out:
2693 nfs_free_fattr(res.new_fattr);
2694 nfs_free_fattr(res.old_fattr);
2679 return status; 2695 return status;
2680} 2696}
2681 2697
@@ -2702,28 +2718,30 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2702 .name = name, 2718 .name = name,
2703 .bitmask = server->attr_bitmask, 2719 .bitmask = server->attr_bitmask,
2704 }; 2720 };
2705 struct nfs_fattr fattr, dir_attr;
2706 struct nfs4_link_res res = { 2721 struct nfs4_link_res res = {
2707 .server = server, 2722 .server = server,
2708 .fattr = &fattr,
2709 .dir_attr = &dir_attr,
2710 }; 2723 };
2711 struct rpc_message msg = { 2724 struct rpc_message msg = {
2712 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], 2725 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
2713 .rpc_argp = &arg, 2726 .rpc_argp = &arg,
2714 .rpc_resp = &res, 2727 .rpc_resp = &res,
2715 }; 2728 };
2716 int status; 2729 int status = -ENOMEM;
2730
2731 res.fattr = nfs_alloc_fattr();
2732 res.dir_attr = nfs_alloc_fattr();
2733 if (res.fattr == NULL || res.dir_attr == NULL)
2734 goto out;
2717 2735
2718 nfs_fattr_init(res.fattr);
2719 nfs_fattr_init(res.dir_attr);
2720 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2736 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
2721 if (!status) { 2737 if (!status) {
2722 update_changeattr(dir, &res.cinfo); 2738 update_changeattr(dir, &res.cinfo);
2723 nfs_post_op_update_inode(dir, res.dir_attr); 2739 nfs_post_op_update_inode(dir, res.dir_attr);
2724 nfs_post_op_update_inode(inode, res.fattr); 2740 nfs_post_op_update_inode(inode, res.fattr);
2725 } 2741 }
2726 2742out:
2743 nfs_free_fattr(res.dir_attr);
2744 nfs_free_fattr(res.fattr);
2727 return status; 2745 return status;
2728} 2746}
2729 2747
@@ -3146,23 +3164,31 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
3146 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3164 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3147} 3165}
3148 3166
3167struct nfs4_renewdata {
3168 struct nfs_client *client;
3169 unsigned long timestamp;
3170};
3171
3149/* 3172/*
3150 * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special 3173 * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
3151 * standalone procedure for queueing an asynchronous RENEW. 3174 * standalone procedure for queueing an asynchronous RENEW.
3152 */ 3175 */
3153static void nfs4_renew_release(void *data) 3176static void nfs4_renew_release(void *calldata)
3154{ 3177{
3155 struct nfs_client *clp = data; 3178 struct nfs4_renewdata *data = calldata;
3179 struct nfs_client *clp = data->client;
3156 3180
3157 if (atomic_read(&clp->cl_count) > 1) 3181 if (atomic_read(&clp->cl_count) > 1)
3158 nfs4_schedule_state_renewal(clp); 3182 nfs4_schedule_state_renewal(clp);
3159 nfs_put_client(clp); 3183 nfs_put_client(clp);
3184 kfree(data);
3160} 3185}
3161 3186
3162static void nfs4_renew_done(struct rpc_task *task, void *data) 3187static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3163{ 3188{
3164 struct nfs_client *clp = data; 3189 struct nfs4_renewdata *data = calldata;
3165 unsigned long timestamp = task->tk_start; 3190 struct nfs_client *clp = data->client;
3191 unsigned long timestamp = data->timestamp;
3166 3192
3167 if (task->tk_status < 0) { 3193 if (task->tk_status < 0) {
3168 /* Unless we're shutting down, schedule state recovery! */ 3194 /* Unless we're shutting down, schedule state recovery! */
@@ -3188,11 +3214,17 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
3188 .rpc_argp = clp, 3214 .rpc_argp = clp,
3189 .rpc_cred = cred, 3215 .rpc_cred = cred,
3190 }; 3216 };
3217 struct nfs4_renewdata *data;
3191 3218
3192 if (!atomic_inc_not_zero(&clp->cl_count)) 3219 if (!atomic_inc_not_zero(&clp->cl_count))
3193 return -EIO; 3220 return -EIO;
3221 data = kmalloc(sizeof(*data), GFP_KERNEL);
3222 if (data == NULL)
3223 return -ENOMEM;
3224 data->client = clp;
3225 data->timestamp = jiffies;
3194 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, 3226 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
3195 &nfs4_renew_ops, clp); 3227 &nfs4_renew_ops, data);
3196} 3228}
3197 3229
3198int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) 3230int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
@@ -3494,7 +3526,9 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3494 return _nfs4_async_handle_error(task, server, server->nfs_client, state); 3526 return _nfs4_async_handle_error(task, server, server->nfs_client, state);
3495} 3527}
3496 3528
3497int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) 3529int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3530 unsigned short port, struct rpc_cred *cred,
3531 struct nfs4_setclientid_res *res)
3498{ 3532{
3499 nfs4_verifier sc_verifier; 3533 nfs4_verifier sc_verifier;
3500 struct nfs4_setclientid setclientid = { 3534 struct nfs4_setclientid setclientid = {
@@ -3504,7 +3538,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
3504 struct rpc_message msg = { 3538 struct rpc_message msg = {
3505 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], 3539 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
3506 .rpc_argp = &setclientid, 3540 .rpc_argp = &setclientid,
3507 .rpc_resp = clp, 3541 .rpc_resp = res,
3508 .rpc_cred = cred, 3542 .rpc_cred = cred,
3509 }; 3543 };
3510 __be32 *p; 3544 __be32 *p;
@@ -3547,12 +3581,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
3547 return status; 3581 return status;
3548} 3582}
3549 3583
3550static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) 3584static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3585 struct nfs4_setclientid_res *arg,
3586 struct rpc_cred *cred)
3551{ 3587{
3552 struct nfs_fsinfo fsinfo; 3588 struct nfs_fsinfo fsinfo;
3553 struct rpc_message msg = { 3589 struct rpc_message msg = {
3554 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], 3590 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
3555 .rpc_argp = clp, 3591 .rpc_argp = arg,
3556 .rpc_resp = &fsinfo, 3592 .rpc_resp = &fsinfo,
3557 .rpc_cred = cred, 3593 .rpc_cred = cred,
3558 }; 3594 };
@@ -3570,12 +3606,14 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre
3570 return status; 3606 return status;
3571} 3607}
3572 3608
3573int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) 3609int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3610 struct nfs4_setclientid_res *arg,
3611 struct rpc_cred *cred)
3574{ 3612{
3575 long timeout = 0; 3613 long timeout = 0;
3576 int err; 3614 int err;
3577 do { 3615 do {
3578 err = _nfs4_proc_setclientid_confirm(clp, cred); 3616 err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
3579 switch (err) { 3617 switch (err) {
3580 case 0: 3618 case 0:
3581 return err; 3619 return err;
@@ -3667,7 +3705,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3667 }; 3705 };
3668 int status = 0; 3706 int status = 0;
3669 3707
3670 data = kzalloc(sizeof(*data), GFP_KERNEL); 3708 data = kzalloc(sizeof(*data), GFP_NOFS);
3671 if (data == NULL) 3709 if (data == NULL)
3672 return -ENOMEM; 3710 return -ENOMEM;
3673 data->args.fhandle = &data->fh; 3711 data->args.fhandle = &data->fh;
@@ -3823,7 +3861,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
3823 struct nfs4_unlockdata *p; 3861 struct nfs4_unlockdata *p;
3824 struct inode *inode = lsp->ls_state->inode; 3862 struct inode *inode = lsp->ls_state->inode;
3825 3863
3826 p = kzalloc(sizeof(*p), GFP_KERNEL); 3864 p = kzalloc(sizeof(*p), GFP_NOFS);
3827 if (p == NULL) 3865 if (p == NULL)
3828 return NULL; 3866 return NULL;
3829 p->arg.fh = NFS_FH(inode); 3867 p->arg.fh = NFS_FH(inode);
@@ -3961,7 +3999,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
3961 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) 3999 if (test_bit(NFS_DELEGATED_STATE, &state->flags))
3962 goto out; 4000 goto out;
3963 lsp = request->fl_u.nfs4_fl.owner; 4001 lsp = request->fl_u.nfs4_fl.owner;
3964 seqid = nfs_alloc_seqid(&lsp->ls_seqid); 4002 seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
3965 status = -ENOMEM; 4003 status = -ENOMEM;
3966 if (seqid == NULL) 4004 if (seqid == NULL)
3967 goto out; 4005 goto out;
@@ -3989,22 +4027,23 @@ struct nfs4_lockdata {
3989}; 4027};
3990 4028
3991static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, 4029static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3992 struct nfs_open_context *ctx, struct nfs4_lock_state *lsp) 4030 struct nfs_open_context *ctx, struct nfs4_lock_state *lsp,
4031 gfp_t gfp_mask)
3993{ 4032{
3994 struct nfs4_lockdata *p; 4033 struct nfs4_lockdata *p;
3995 struct inode *inode = lsp->ls_state->inode; 4034 struct inode *inode = lsp->ls_state->inode;
3996 struct nfs_server *server = NFS_SERVER(inode); 4035 struct nfs_server *server = NFS_SERVER(inode);
3997 4036
3998 p = kzalloc(sizeof(*p), GFP_KERNEL); 4037 p = kzalloc(sizeof(*p), gfp_mask);
3999 if (p == NULL) 4038 if (p == NULL)
4000 return NULL; 4039 return NULL;
4001 4040
4002 p->arg.fh = NFS_FH(inode); 4041 p->arg.fh = NFS_FH(inode);
4003 p->arg.fl = &p->fl; 4042 p->arg.fl = &p->fl;
4004 p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid); 4043 p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask);
4005 if (p->arg.open_seqid == NULL) 4044 if (p->arg.open_seqid == NULL)
4006 goto out_free; 4045 goto out_free;
4007 p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); 4046 p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask);
4008 if (p->arg.lock_seqid == NULL) 4047 if (p->arg.lock_seqid == NULL)
4009 goto out_free_seqid; 4048 goto out_free_seqid;
4010 p->arg.lock_stateid = &lsp->ls_stateid; 4049 p->arg.lock_stateid = &lsp->ls_stateid;
@@ -4158,7 +4197,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4158 4197
4159 dprintk("%s: begin!\n", __func__); 4198 dprintk("%s: begin!\n", __func__);
4160 data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), 4199 data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
4161 fl->fl_u.nfs4_fl.owner); 4200 fl->fl_u.nfs4_fl.owner,
4201 recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS);
4162 if (data == NULL) 4202 if (data == NULL)
4163 return -ENOMEM; 4203 return -ENOMEM;
4164 if (IS_SETLKW(cmd)) 4204 if (IS_SETLKW(cmd))
@@ -4647,7 +4687,7 @@ static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
4647 if (max_reqs != tbl->max_slots) { 4687 if (max_reqs != tbl->max_slots) {
4648 ret = -ENOMEM; 4688 ret = -ENOMEM;
4649 new = kmalloc(max_reqs * sizeof(struct nfs4_slot), 4689 new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
4650 GFP_KERNEL); 4690 GFP_NOFS);
4651 if (!new) 4691 if (!new)
4652 goto out; 4692 goto out;
4653 ret = 0; 4693 ret = 0;
@@ -4712,7 +4752,7 @@ static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
4712 4752
4713 dprintk("--> %s: max_reqs=%u\n", __func__, max_slots); 4753 dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
4714 4754
4715 slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL); 4755 slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
4716 if (!slot) 4756 if (!slot)
4717 goto out; 4757 goto out;
4718 ret = 0; 4758 ret = 0;
@@ -4761,7 +4801,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4761 struct nfs4_session *session; 4801 struct nfs4_session *session;
4762 struct nfs4_slot_table *tbl; 4802 struct nfs4_slot_table *tbl;
4763 4803
4764 session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL); 4804 session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
4765 if (!session) 4805 if (!session)
4766 return NULL; 4806 return NULL;
4767 4807
@@ -5105,8 +5145,8 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp,
5105 5145
5106 if (!atomic_inc_not_zero(&clp->cl_count)) 5146 if (!atomic_inc_not_zero(&clp->cl_count))
5107 return -EIO; 5147 return -EIO;
5108 args = kzalloc(sizeof(*args), GFP_KERNEL); 5148 args = kzalloc(sizeof(*args), GFP_NOFS);
5109 res = kzalloc(sizeof(*res), GFP_KERNEL); 5149 res = kzalloc(sizeof(*res), GFP_NOFS);
5110 if (!args || !res) { 5150 if (!args || !res) {
5111 kfree(args); 5151 kfree(args);
5112 kfree(res); 5152 kfree(res);
@@ -5207,7 +5247,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5207 int status = -ENOMEM; 5247 int status = -ENOMEM;
5208 5248
5209 dprintk("--> %s\n", __func__); 5249 dprintk("--> %s\n", __func__);
5210 calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); 5250 calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
5211 if (calldata == NULL) 5251 if (calldata == NULL)
5212 goto out; 5252 goto out;
5213 calldata->clp = clp; 5253 calldata->clp = clp;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6c5ed51f105e..34acf5926fdc 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -62,6 +62,7 @@ static LIST_HEAD(nfs4_clientid_list);
62 62
63int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 63int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
64{ 64{
65 struct nfs4_setclientid_res clid;
65 unsigned short port; 66 unsigned short port;
66 int status; 67 int status;
67 68
@@ -69,11 +70,15 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
69 if (clp->cl_addr.ss_family == AF_INET6) 70 if (clp->cl_addr.ss_family == AF_INET6)
70 port = nfs_callback_tcpport6; 71 port = nfs_callback_tcpport6;
71 72
72 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred); 73 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
73 if (status == 0) 74 if (status != 0)
74 status = nfs4_proc_setclientid_confirm(clp, cred); 75 goto out;
75 if (status == 0) 76 status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
76 nfs4_schedule_state_renewal(clp); 77 if (status != 0)
78 goto out;
79 clp->cl_clientid = clid.clientid;
80 nfs4_schedule_state_renewal(clp);
81out:
77 return status; 82 return status;
78} 83}
79 84
@@ -361,7 +366,7 @@ nfs4_alloc_state_owner(void)
361{ 366{
362 struct nfs4_state_owner *sp; 367 struct nfs4_state_owner *sp;
363 368
364 sp = kzalloc(sizeof(*sp),GFP_KERNEL); 369 sp = kzalloc(sizeof(*sp),GFP_NOFS);
365 if (!sp) 370 if (!sp)
366 return NULL; 371 return NULL;
367 spin_lock_init(&sp->so_lock); 372 spin_lock_init(&sp->so_lock);
@@ -435,7 +440,7 @@ nfs4_alloc_open_state(void)
435{ 440{
436 struct nfs4_state *state; 441 struct nfs4_state *state;
437 442
438 state = kzalloc(sizeof(*state), GFP_KERNEL); 443 state = kzalloc(sizeof(*state), GFP_NOFS);
439 if (!state) 444 if (!state)
440 return NULL; 445 return NULL;
441 atomic_set(&state->count, 1); 446 atomic_set(&state->count, 1);
@@ -537,7 +542,8 @@ void nfs4_put_open_state(struct nfs4_state *state)
537/* 542/*
538 * Close the current file. 543 * Close the current file.
539 */ 544 */
540static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait) 545static void __nfs4_close(struct path *path, struct nfs4_state *state,
546 fmode_t fmode, gfp_t gfp_mask, int wait)
541{ 547{
542 struct nfs4_state_owner *owner = state->owner; 548 struct nfs4_state_owner *owner = state->owner;
543 int call_close = 0; 549 int call_close = 0;
@@ -578,17 +584,17 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fm
578 nfs4_put_open_state(state); 584 nfs4_put_open_state(state);
579 nfs4_put_state_owner(owner); 585 nfs4_put_state_owner(owner);
580 } else 586 } else
581 nfs4_do_close(path, state, wait); 587 nfs4_do_close(path, state, gfp_mask, wait);
582} 588}
583 589
584void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) 590void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
585{ 591{
586 __nfs4_close(path, state, fmode, 0); 592 __nfs4_close(path, state, fmode, GFP_NOFS, 0);
587} 593}
588 594
589void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) 595void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
590{ 596{
591 __nfs4_close(path, state, fmode, 1); 597 __nfs4_close(path, state, fmode, GFP_KERNEL, 1);
592} 598}
593 599
594/* 600/*
@@ -618,7 +624,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
618 struct nfs4_lock_state *lsp; 624 struct nfs4_lock_state *lsp;
619 struct nfs_client *clp = state->owner->so_client; 625 struct nfs_client *clp = state->owner->so_client;
620 626
621 lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); 627 lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
622 if (lsp == NULL) 628 if (lsp == NULL)
623 return NULL; 629 return NULL;
624 rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); 630 rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue");
@@ -754,11 +760,11 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
754 nfs4_put_lock_state(lsp); 760 nfs4_put_lock_state(lsp);
755} 761}
756 762
757struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) 763struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
758{ 764{
759 struct nfs_seqid *new; 765 struct nfs_seqid *new;
760 766
761 new = kmalloc(sizeof(*new), GFP_KERNEL); 767 new = kmalloc(sizeof(*new), gfp_mask);
762 if (new != NULL) { 768 if (new != NULL) {
763 new->sequence = counter; 769 new->sequence = counter;
764 INIT_LIST_HEAD(&new->list); 770 INIT_LIST_HEAD(&new->list);
@@ -1347,7 +1353,7 @@ static int nfs4_recall_slot(struct nfs_client *clp)
1347 1353
1348 nfs4_begin_drain_session(clp); 1354 nfs4_begin_drain_session(clp);
1349 new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot), 1355 new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
1350 GFP_KERNEL); 1356 GFP_NOFS);
1351 if (!new) 1357 if (!new)
1352 return -ENOMEM; 1358 return -ENOMEM;
1353 1359
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 38f3b582e7c2..6bdef28efa33 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1504,14 +1504,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1504 hdr->replen += decode_setclientid_maxsz; 1504 hdr->replen += decode_setclientid_maxsz;
1505} 1505}
1506 1506
1507static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr) 1507static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
1508{ 1508{
1509 __be32 *p; 1509 __be32 *p;
1510 1510
1511 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); 1511 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
1512 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); 1512 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
1513 p = xdr_encode_hyper(p, client_state->cl_clientid); 1513 p = xdr_encode_hyper(p, arg->clientid);
1514 xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); 1514 xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
1515 hdr->nops++; 1515 hdr->nops++;
1516 hdr->replen += decode_setclientid_confirm_maxsz; 1516 hdr->replen += decode_setclientid_confirm_maxsz;
1517} 1517}
@@ -2324,7 +2324,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4
2324/* 2324/*
2325 * a SETCLIENTID_CONFIRM request 2325 * a SETCLIENTID_CONFIRM request
2326 */ 2326 */
2327static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp) 2327static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
2328{ 2328{
2329 struct xdr_stream xdr; 2329 struct xdr_stream xdr;
2330 struct compound_hdr hdr = { 2330 struct compound_hdr hdr = {
@@ -2334,7 +2334,7 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
2334 2334
2335 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2335 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2336 encode_compound_hdr(&xdr, req, &hdr); 2336 encode_compound_hdr(&xdr, req, &hdr);
2337 encode_setclientid_confirm(&xdr, clp, &hdr); 2337 encode_setclientid_confirm(&xdr, arg, &hdr);
2338 encode_putrootfh(&xdr, &hdr); 2338 encode_putrootfh(&xdr, &hdr);
2339 encode_fsinfo(&xdr, lease_bitmap, &hdr); 2339 encode_fsinfo(&xdr, lease_bitmap, &hdr);
2340 encode_nops(&hdr); 2340 encode_nops(&hdr);
@@ -4397,7 +4397,7 @@ out_overflow:
4397 return -EIO; 4397 return -EIO;
4398} 4398}
4399 4399
4400static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) 4400static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
4401{ 4401{
4402 __be32 *p; 4402 __be32 *p;
4403 uint32_t opnum; 4403 uint32_t opnum;
@@ -4417,8 +4417,8 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
4417 p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE); 4417 p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
4418 if (unlikely(!p)) 4418 if (unlikely(!p))
4419 goto out_overflow; 4419 goto out_overflow;
4420 p = xdr_decode_hyper(p, &clp->cl_clientid); 4420 p = xdr_decode_hyper(p, &res->clientid);
4421 memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE); 4421 memcpy(res->confirm.data, p, NFS4_VERIFIER_SIZE);
4422 } else if (nfserr == NFSERR_CLID_INUSE) { 4422 } else if (nfserr == NFSERR_CLID_INUSE) {
4423 uint32_t len; 4423 uint32_t len;
4424 4424
@@ -4815,7 +4815,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
4815 goto out; 4815 goto out;
4816 if ((status = decode_remove(&xdr, &res->cinfo)) != 0) 4816 if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
4817 goto out; 4817 goto out;
4818 decode_getfattr(&xdr, &res->dir_attr, res->server, 4818 decode_getfattr(&xdr, res->dir_attr, res->server,
4819 !RPC_IS_ASYNC(rqstp->rq_task)); 4819 !RPC_IS_ASYNC(rqstp->rq_task));
4820out: 4820out:
4821 return status; 4821 return status;
@@ -5498,7 +5498,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
5498 * Decode SETCLIENTID response 5498 * Decode SETCLIENTID response
5499 */ 5499 */
5500static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, 5500static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
5501 struct nfs_client *clp) 5501 struct nfs4_setclientid_res *res)
5502{ 5502{
5503 struct xdr_stream xdr; 5503 struct xdr_stream xdr;
5504 struct compound_hdr hdr; 5504 struct compound_hdr hdr;
@@ -5507,7 +5507,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
5507 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5507 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
5508 status = decode_compound_hdr(&xdr, &hdr); 5508 status = decode_compound_hdr(&xdr, &hdr);
5509 if (!status) 5509 if (!status)
5510 status = decode_setclientid(&xdr, clp); 5510 status = decode_setclientid(&xdr, res);
5511 return status; 5511 return status;
5512} 5512}
5513 5513
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8c55b27c0de4..6bd19d843af7 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -488,7 +488,6 @@ static int __init root_nfs_ports(void)
488 */ 488 */
489static int __init root_nfs_get_handle(void) 489static int __init root_nfs_get_handle(void)
490{ 490{
491 struct nfs_fh fh;
492 struct sockaddr_in sin; 491 struct sockaddr_in sin;
493 unsigned int auth_flav_len = 0; 492 unsigned int auth_flav_len = 0;
494 struct nfs_mount_request request = { 493 struct nfs_mount_request request = {
@@ -499,21 +498,24 @@ static int __init root_nfs_get_handle(void)
499 NFS_MNT3_VERSION : NFS_MNT_VERSION, 498 NFS_MNT3_VERSION : NFS_MNT_VERSION,
500 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? 499 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
501 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP, 500 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
502 .fh = &fh,
503 .auth_flav_len = &auth_flav_len, 501 .auth_flav_len = &auth_flav_len,
504 }; 502 };
505 int status; 503 int status = -ENOMEM;
506 504
505 request.fh = nfs_alloc_fhandle();
506 if (!request.fh)
507 goto out;
507 set_sockaddr(&sin, servaddr, htons(mount_port)); 508 set_sockaddr(&sin, servaddr, htons(mount_port));
508 status = nfs_mount(&request); 509 status = nfs_mount(&request);
509 if (status < 0) 510 if (status < 0)
510 printk(KERN_ERR "Root-NFS: Server returned error %d " 511 printk(KERN_ERR "Root-NFS: Server returned error %d "
511 "while mounting %s\n", status, nfs_export_path); 512 "while mounting %s\n", status, nfs_export_path);
512 else { 513 else {
513 nfs_data.root.size = fh.size; 514 nfs_data.root.size = request.fh->size;
514 memcpy(nfs_data.root.data, fh.data, fh.size); 515 memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
515 } 516 }
516 517 nfs_free_fhandle(request.fh);
518out:
517 return status; 519 return status;
518} 520}
519 521
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 29d9d36cd5f4..a3654e57b589 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -60,16 +60,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
60{ 60{
61 struct nfs_page *req; 61 struct nfs_page *req;
62 62
63 for (;;) { 63 /* try to allocate the request struct */
64 /* try to allocate the request struct */ 64 req = nfs_page_alloc();
65 req = nfs_page_alloc(); 65 if (req == NULL)
66 if (req != NULL) 66 return ERR_PTR(-ENOMEM);
67 break;
68
69 if (fatal_signal_pending(current))
70 return ERR_PTR(-ERESTARTSYS);
71 yield();
72 }
73 67
74 /* Initialize the request struct. Initially, we assume a 68 /* Initialize the request struct. Initially, we assume a
75 * long write-back delay. This will be adjusted in 69 * long write-back delay. This will be adjusted in
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0288be80444f..611bec22f552 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -224,35 +224,60 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
224 return status; 224 return status;
225} 225}
226 226
227struct nfs_createdata {
228 struct nfs_createargs arg;
229 struct nfs_diropok res;
230 struct nfs_fh fhandle;
231 struct nfs_fattr fattr;
232};
233
234static struct nfs_createdata *nfs_alloc_createdata(struct inode *dir,
235 struct dentry *dentry, struct iattr *sattr)
236{
237 struct nfs_createdata *data;
238
239 data = kmalloc(sizeof(*data), GFP_KERNEL);
240
241 if (data != NULL) {
242 data->arg.fh = NFS_FH(dir);
243 data->arg.name = dentry->d_name.name;
244 data->arg.len = dentry->d_name.len;
245 data->arg.sattr = sattr;
246 nfs_fattr_init(&data->fattr);
247 data->fhandle.size = 0;
248 data->res.fh = &data->fhandle;
249 data->res.fattr = &data->fattr;
250 }
251 return data;
252};
253
254static void nfs_free_createdata(const struct nfs_createdata *data)
255{
256 kfree(data);
257}
258
227static int 259static int
228nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
229 int flags, struct nameidata *nd) 261 int flags, struct nameidata *nd)
230{ 262{
231 struct nfs_fh fhandle; 263 struct nfs_createdata *data;
232 struct nfs_fattr fattr;
233 struct nfs_createargs arg = {
234 .fh = NFS_FH(dir),
235 .name = dentry->d_name.name,
236 .len = dentry->d_name.len,
237 .sattr = sattr
238 };
239 struct nfs_diropok res = {
240 .fh = &fhandle,
241 .fattr = &fattr
242 };
243 struct rpc_message msg = { 264 struct rpc_message msg = {
244 .rpc_proc = &nfs_procedures[NFSPROC_CREATE], 265 .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
245 .rpc_argp = &arg,
246 .rpc_resp = &res,
247 }; 266 };
248 int status; 267 int status = -ENOMEM;
249 268
250 nfs_fattr_init(&fattr);
251 dprintk("NFS call create %s\n", dentry->d_name.name); 269 dprintk("NFS call create %s\n", dentry->d_name.name);
270 data = nfs_alloc_createdata(dir, dentry, sattr);
271 if (data == NULL)
272 goto out;
273 msg.rpc_argp = &data->arg;
274 msg.rpc_resp = &data->res;
252 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 275 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
253 nfs_mark_for_revalidate(dir); 276 nfs_mark_for_revalidate(dir);
254 if (status == 0) 277 if (status == 0)
255 status = nfs_instantiate(dentry, &fhandle, &fattr); 278 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
279 nfs_free_createdata(data);
280out:
256 dprintk("NFS reply create: %d\n", status); 281 dprintk("NFS reply create: %d\n", status);
257 return status; 282 return status;
258} 283}
@@ -264,24 +289,12 @@ static int
264nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 289nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
265 dev_t rdev) 290 dev_t rdev)
266{ 291{
267 struct nfs_fh fhandle; 292 struct nfs_createdata *data;
268 struct nfs_fattr fattr;
269 struct nfs_createargs arg = {
270 .fh = NFS_FH(dir),
271 .name = dentry->d_name.name,
272 .len = dentry->d_name.len,
273 .sattr = sattr
274 };
275 struct nfs_diropok res = {
276 .fh = &fhandle,
277 .fattr = &fattr
278 };
279 struct rpc_message msg = { 293 struct rpc_message msg = {
280 .rpc_proc = &nfs_procedures[NFSPROC_CREATE], 294 .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
281 .rpc_argp = &arg,
282 .rpc_resp = &res,
283 }; 295 };
284 int status, mode; 296 umode_t mode;
297 int status = -ENOMEM;
285 298
286 dprintk("NFS call mknod %s\n", dentry->d_name.name); 299 dprintk("NFS call mknod %s\n", dentry->d_name.name);
287 300
@@ -294,17 +307,24 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
294 sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */ 307 sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
295 } 308 }
296 309
297 nfs_fattr_init(&fattr); 310 data = nfs_alloc_createdata(dir, dentry, sattr);
311 if (data == NULL)
312 goto out;
313 msg.rpc_argp = &data->arg;
314 msg.rpc_resp = &data->res;
315
298 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 316 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
299 nfs_mark_for_revalidate(dir); 317 nfs_mark_for_revalidate(dir);
300 318
301 if (status == -EINVAL && S_ISFIFO(mode)) { 319 if (status == -EINVAL && S_ISFIFO(mode)) {
302 sattr->ia_mode = mode; 320 sattr->ia_mode = mode;
303 nfs_fattr_init(&fattr); 321 nfs_fattr_init(data->res.fattr);
304 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 322 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
305 } 323 }
306 if (status == 0) 324 if (status == 0)
307 status = nfs_instantiate(dentry, &fhandle, &fattr); 325 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
326 nfs_free_createdata(data);
327out:
308 dprintk("NFS reply mknod: %d\n", status); 328 dprintk("NFS reply mknod: %d\n", status);
309 return status; 329 return status;
310} 330}
@@ -398,8 +418,8 @@ static int
398nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, 418nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
399 unsigned int len, struct iattr *sattr) 419 unsigned int len, struct iattr *sattr)
400{ 420{
401 struct nfs_fh fhandle; 421 struct nfs_fh *fh;
402 struct nfs_fattr fattr; 422 struct nfs_fattr *fattr;
403 struct nfs_symlinkargs arg = { 423 struct nfs_symlinkargs arg = {
404 .fromfh = NFS_FH(dir), 424 .fromfh = NFS_FH(dir),
405 .fromname = dentry->d_name.name, 425 .fromname = dentry->d_name.name,
@@ -412,12 +432,18 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
412 .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK], 432 .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK],
413 .rpc_argp = &arg, 433 .rpc_argp = &arg,
414 }; 434 };
415 int status; 435 int status = -ENAMETOOLONG;
436
437 dprintk("NFS call symlink %s\n", dentry->d_name.name);
416 438
417 if (len > NFS2_MAXPATHLEN) 439 if (len > NFS2_MAXPATHLEN)
418 return -ENAMETOOLONG; 440 goto out;
419 441
420 dprintk("NFS call symlink %s\n", dentry->d_name.name); 442 fh = nfs_alloc_fhandle();
443 fattr = nfs_alloc_fattr();
444 status = -ENOMEM;
445 if (fh == NULL || fattr == NULL)
446 goto out;
421 447
422 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 448 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
423 nfs_mark_for_revalidate(dir); 449 nfs_mark_for_revalidate(dir);
@@ -427,12 +453,12 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
427 * filehandle size to zero indicates to nfs_instantiate that it 453 * filehandle size to zero indicates to nfs_instantiate that it
428 * should fill in the data with a LOOKUP call on the wire. 454 * should fill in the data with a LOOKUP call on the wire.
429 */ 455 */
430 if (status == 0) { 456 if (status == 0)
431 nfs_fattr_init(&fattr); 457 status = nfs_instantiate(dentry, fh, fattr);
432 fhandle.size = 0;
433 status = nfs_instantiate(dentry, &fhandle, &fattr);
434 }
435 458
459 nfs_free_fattr(fattr);
460 nfs_free_fhandle(fh);
461out:
436 dprintk("NFS reply symlink: %d\n", status); 462 dprintk("NFS reply symlink: %d\n", status);
437 return status; 463 return status;
438} 464}
@@ -440,31 +466,25 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
440static int 466static int
441nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) 467nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
442{ 468{
443 struct nfs_fh fhandle; 469 struct nfs_createdata *data;
444 struct nfs_fattr fattr;
445 struct nfs_createargs arg = {
446 .fh = NFS_FH(dir),
447 .name = dentry->d_name.name,
448 .len = dentry->d_name.len,
449 .sattr = sattr
450 };
451 struct nfs_diropok res = {
452 .fh = &fhandle,
453 .fattr = &fattr
454 };
455 struct rpc_message msg = { 470 struct rpc_message msg = {
456 .rpc_proc = &nfs_procedures[NFSPROC_MKDIR], 471 .rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
457 .rpc_argp = &arg,
458 .rpc_resp = &res,
459 }; 472 };
460 int status; 473 int status = -ENOMEM;
461 474
462 dprintk("NFS call mkdir %s\n", dentry->d_name.name); 475 dprintk("NFS call mkdir %s\n", dentry->d_name.name);
463 nfs_fattr_init(&fattr); 476 data = nfs_alloc_createdata(dir, dentry, sattr);
477 if (data == NULL)
478 goto out;
479 msg.rpc_argp = &data->arg;
480 msg.rpc_resp = &data->res;
481
464 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 482 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
465 nfs_mark_for_revalidate(dir); 483 nfs_mark_for_revalidate(dir);
466 if (status == 0) 484 if (status == 0)
467 status = nfs_instantiate(dentry, &fhandle, &fattr); 485 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
486 nfs_free_createdata(data);
487out:
468 dprintk("NFS reply mkdir: %d\n", status); 488 dprintk("NFS reply mkdir: %d\n", status);
469 return status; 489 return status;
470} 490}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index db9b360ae19d..6e2b06e6ca79 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -40,7 +40,7 @@ static mempool_t *nfs_rdata_mempool;
40 40
41struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 41struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
42{ 42{
43 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); 43 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
44 44
45 if (p) { 45 if (p) {
46 memset(p, 0, sizeof(*p)); 46 memset(p, 0, sizeof(*p));
@@ -50,7 +50,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
50 if (pagecount <= ARRAY_SIZE(p->page_array)) 50 if (pagecount <= ARRAY_SIZE(p->page_array))
51 p->pagevec = p->page_array; 51 p->pagevec = p->page_array;
52 else { 52 else {
53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
54 if (!p->pagevec) { 54 if (!p->pagevec) {
55 mempool_free(p, nfs_rdata_mempool); 55 mempool_free(p, nfs_rdata_mempool);
56 p = NULL; 56 p = NULL;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b4148fc00f9f..2f8b1157daa2 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -141,7 +141,6 @@ static const match_table_t nfs_mount_option_tokens = {
141 { Opt_resvport, "resvport" }, 141 { Opt_resvport, "resvport" },
142 { Opt_noresvport, "noresvport" }, 142 { Opt_noresvport, "noresvport" },
143 { Opt_fscache, "fsc" }, 143 { Opt_fscache, "fsc" },
144 { Opt_fscache_uniq, "fsc=%s" },
145 { Opt_nofscache, "nofsc" }, 144 { Opt_nofscache, "nofsc" },
146 145
147 { Opt_port, "port=%s" }, 146 { Opt_port, "port=%s" },
@@ -171,6 +170,7 @@ static const match_table_t nfs_mount_option_tokens = {
171 { Opt_mountaddr, "mountaddr=%s" }, 170 { Opt_mountaddr, "mountaddr=%s" },
172 171
173 { Opt_lookupcache, "lookupcache=%s" }, 172 { Opt_lookupcache, "lookupcache=%s" },
173 { Opt_fscache_uniq, "fsc=%s" },
174 174
175 { Opt_err, NULL } 175 { Opt_err, NULL }
176}; 176};
@@ -423,15 +423,19 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
423 unsigned char blockbits; 423 unsigned char blockbits;
424 unsigned long blockres; 424 unsigned long blockres;
425 struct nfs_fh *fh = NFS_FH(dentry->d_inode); 425 struct nfs_fh *fh = NFS_FH(dentry->d_inode);
426 struct nfs_fattr fattr; 426 struct nfs_fsstat res;
427 struct nfs_fsstat res = { 427 int error = -ENOMEM;
428 .fattr = &fattr, 428
429 }; 429 res.fattr = nfs_alloc_fattr();
430 int error; 430 if (res.fattr == NULL)
431 goto out_err;
431 432
432 error = server->nfs_client->rpc_ops->statfs(server, fh, &res); 433 error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
434
435 nfs_free_fattr(res.fattr);
433 if (error < 0) 436 if (error < 0)
434 goto out_err; 437 goto out_err;
438
435 buf->f_type = NFS_SUPER_MAGIC; 439 buf->f_type = NFS_SUPER_MAGIC;
436 440
437 /* 441 /*
@@ -1046,14 +1050,6 @@ static int nfs_parse_mount_options(char *raw,
1046 kfree(mnt->fscache_uniq); 1050 kfree(mnt->fscache_uniq);
1047 mnt->fscache_uniq = NULL; 1051 mnt->fscache_uniq = NULL;
1048 break; 1052 break;
1049 case Opt_fscache_uniq:
1050 string = match_strdup(args);
1051 if (!string)
1052 goto out_nomem;
1053 kfree(mnt->fscache_uniq);
1054 mnt->fscache_uniq = string;
1055 mnt->options |= NFS_OPTION_FSCACHE;
1056 break;
1057 1053
1058 /* 1054 /*
1059 * options that take numeric values 1055 * options that take numeric values
@@ -1384,6 +1380,14 @@ static int nfs_parse_mount_options(char *raw,
1384 return 0; 1380 return 0;
1385 }; 1381 };
1386 break; 1382 break;
1383 case Opt_fscache_uniq:
1384 string = match_strdup(args);
1385 if (string == NULL)
1386 goto out_nomem;
1387 kfree(mnt->fscache_uniq);
1388 mnt->fscache_uniq = string;
1389 mnt->options |= NFS_OPTION_FSCACHE;
1390 break;
1387 1391
1388 /* 1392 /*
1389 * Special options 1393 * Special options
@@ -2172,7 +2176,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2172 int error = -ENOMEM; 2176 int error = -ENOMEM;
2173 2177
2174 data = nfs_alloc_parsed_mount_data(3); 2178 data = nfs_alloc_parsed_mount_data(3);
2175 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); 2179 mntfh = nfs_alloc_fhandle();
2176 if (data == NULL || mntfh == NULL) 2180 if (data == NULL || mntfh == NULL)
2177 goto out_free_fh; 2181 goto out_free_fh;
2178 2182
@@ -2247,7 +2251,7 @@ out:
2247 kfree(data->fscache_uniq); 2251 kfree(data->fscache_uniq);
2248 security_free_mnt_opts(&data->lsm_opts); 2252 security_free_mnt_opts(&data->lsm_opts);
2249out_free_fh: 2253out_free_fh:
2250 kfree(mntfh); 2254 nfs_free_fhandle(mntfh);
2251 kfree(data); 2255 kfree(data);
2252 return error; 2256 return error;
2253 2257
@@ -2556,7 +2560,7 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
2556 }; 2560 };
2557 int error = -ENOMEM; 2561 int error = -ENOMEM;
2558 2562
2559 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); 2563 mntfh = nfs_alloc_fhandle();
2560 if (data == NULL || mntfh == NULL) 2564 if (data == NULL || mntfh == NULL)
2561 goto out_free_fh; 2565 goto out_free_fh;
2562 2566
@@ -2614,7 +2618,7 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
2614out: 2618out:
2615 security_free_mnt_opts(&data->lsm_opts); 2619 security_free_mnt_opts(&data->lsm_opts);
2616out_free_fh: 2620out_free_fh:
2617 kfree(mntfh); 2621 nfs_free_fhandle(mntfh);
2618 return error; 2622 return error;
2619 2623
2620out_free: 2624out_free:
@@ -2669,41 +2673,120 @@ out_freepage:
2669 free_page((unsigned long)page); 2673 free_page((unsigned long)page);
2670} 2674}
2671 2675
2676struct nfs_referral_count {
2677 struct list_head list;
2678 const struct task_struct *task;
2679 unsigned int referral_count;
2680};
2681
2682static LIST_HEAD(nfs_referral_count_list);
2683static DEFINE_SPINLOCK(nfs_referral_count_list_lock);
2684
2685static struct nfs_referral_count *nfs_find_referral_count(void)
2686{
2687 struct nfs_referral_count *p;
2688
2689 list_for_each_entry(p, &nfs_referral_count_list, list) {
2690 if (p->task == current)
2691 return p;
2692 }
2693 return NULL;
2694}
2695
2696#define NFS_MAX_NESTED_REFERRALS 2
2697
2698static int nfs_referral_loop_protect(void)
2699{
2700 struct nfs_referral_count *p, *new;
2701 int ret = -ENOMEM;
2702
2703 new = kmalloc(sizeof(*new), GFP_KERNEL);
2704 if (!new)
2705 goto out;
2706 new->task = current;
2707 new->referral_count = 1;
2708
2709 ret = 0;
2710 spin_lock(&nfs_referral_count_list_lock);
2711 p = nfs_find_referral_count();
2712 if (p != NULL) {
2713 if (p->referral_count >= NFS_MAX_NESTED_REFERRALS)
2714 ret = -ELOOP;
2715 else
2716 p->referral_count++;
2717 } else {
2718 list_add(&new->list, &nfs_referral_count_list);
2719 new = NULL;
2720 }
2721 spin_unlock(&nfs_referral_count_list_lock);
2722 kfree(new);
2723out:
2724 return ret;
2725}
2726
2727static void nfs_referral_loop_unprotect(void)
2728{
2729 struct nfs_referral_count *p;
2730
2731 spin_lock(&nfs_referral_count_list_lock);
2732 p = nfs_find_referral_count();
2733 p->referral_count--;
2734 if (p->referral_count == 0)
2735 list_del(&p->list);
2736 else
2737 p = NULL;
2738 spin_unlock(&nfs_referral_count_list_lock);
2739 kfree(p);
2740}
2741
2672static int nfs_follow_remote_path(struct vfsmount *root_mnt, 2742static int nfs_follow_remote_path(struct vfsmount *root_mnt,
2673 const char *export_path, struct vfsmount *mnt_target) 2743 const char *export_path, struct vfsmount *mnt_target)
2674{ 2744{
2745 struct nameidata *nd = NULL;
2675 struct mnt_namespace *ns_private; 2746 struct mnt_namespace *ns_private;
2676 struct nameidata nd;
2677 struct super_block *s; 2747 struct super_block *s;
2678 int ret; 2748 int ret;
2679 2749
2750 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
2751 if (nd == NULL)
2752 return -ENOMEM;
2753
2680 ns_private = create_mnt_ns(root_mnt); 2754 ns_private = create_mnt_ns(root_mnt);
2681 ret = PTR_ERR(ns_private); 2755 ret = PTR_ERR(ns_private);
2682 if (IS_ERR(ns_private)) 2756 if (IS_ERR(ns_private))
2683 goto out_mntput; 2757 goto out_mntput;
2684 2758
2759 ret = nfs_referral_loop_protect();
2760 if (ret != 0)
2761 goto out_put_mnt_ns;
2762
2685 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, 2763 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
2686 export_path, LOOKUP_FOLLOW, &nd); 2764 export_path, LOOKUP_FOLLOW, nd);
2687 2765
2766 nfs_referral_loop_unprotect();
2688 put_mnt_ns(ns_private); 2767 put_mnt_ns(ns_private);
2689 2768
2690 if (ret != 0) 2769 if (ret != 0)
2691 goto out_err; 2770 goto out_err;
2692 2771
2693 s = nd.path.mnt->mnt_sb; 2772 s = nd->path.mnt->mnt_sb;
2694 atomic_inc(&s->s_active); 2773 atomic_inc(&s->s_active);
2695 mnt_target->mnt_sb = s; 2774 mnt_target->mnt_sb = s;
2696 mnt_target->mnt_root = dget(nd.path.dentry); 2775 mnt_target->mnt_root = dget(nd->path.dentry);
2697 2776
2698 /* Correct the device pathname */ 2777 /* Correct the device pathname */
2699 nfs_fix_devname(&nd.path, mnt_target); 2778 nfs_fix_devname(&nd->path, mnt_target);
2700 2779
2701 path_put(&nd.path); 2780 path_put(&nd->path);
2781 kfree(nd);
2702 down_write(&s->s_umount); 2782 down_write(&s->s_umount);
2703 return 0; 2783 return 0;
2784out_put_mnt_ns:
2785 put_mnt_ns(ns_private);
2704out_mntput: 2786out_mntput:
2705 mntput(root_mnt); 2787 mntput(root_mnt);
2706out_err: 2788out_err:
2789 kfree(nd);
2707 return ret; 2790 return ret;
2708} 2791}
2709 2792
@@ -2874,17 +2957,21 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
2874 struct super_block *s; 2957 struct super_block *s;
2875 struct nfs_server *server; 2958 struct nfs_server *server;
2876 struct dentry *mntroot; 2959 struct dentry *mntroot;
2877 struct nfs_fh mntfh; 2960 struct nfs_fh *mntfh;
2878 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2961 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2879 struct nfs_sb_mountdata sb_mntdata = { 2962 struct nfs_sb_mountdata sb_mntdata = {
2880 .mntflags = flags, 2963 .mntflags = flags,
2881 }; 2964 };
2882 int error; 2965 int error = -ENOMEM;
2883 2966
2884 dprintk("--> nfs4_referral_get_sb()\n"); 2967 dprintk("--> nfs4_referral_get_sb()\n");
2885 2968
2969 mntfh = nfs_alloc_fhandle();
2970 if (mntfh == NULL)
2971 goto out_err_nofh;
2972
2886 /* create a new volume representation */ 2973 /* create a new volume representation */
2887 server = nfs4_create_referral_server(data, &mntfh); 2974 server = nfs4_create_referral_server(data, mntfh);
2888 if (IS_ERR(server)) { 2975 if (IS_ERR(server)) {
2889 error = PTR_ERR(server); 2976 error = PTR_ERR(server);
2890 goto out_err_noserver; 2977 goto out_err_noserver;
@@ -2916,7 +3003,7 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
2916 nfs_fscache_get_super_cookie(s, NULL, data); 3003 nfs_fscache_get_super_cookie(s, NULL, data);
2917 } 3004 }
2918 3005
2919 mntroot = nfs4_get_root(s, &mntfh); 3006 mntroot = nfs4_get_root(s, mntfh);
2920 if (IS_ERR(mntroot)) { 3007 if (IS_ERR(mntroot)) {
2921 error = PTR_ERR(mntroot); 3008 error = PTR_ERR(mntroot);
2922 goto error_splat_super; 3009 goto error_splat_super;
@@ -2933,12 +3020,15 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
2933 3020
2934 security_sb_clone_mnt_opts(data->sb, s); 3021 security_sb_clone_mnt_opts(data->sb, s);
2935 3022
3023 nfs_free_fhandle(mntfh);
2936 dprintk("<-- nfs4_referral_get_sb() = 0\n"); 3024 dprintk("<-- nfs4_referral_get_sb() = 0\n");
2937 return 0; 3025 return 0;
2938 3026
2939out_err_nosb: 3027out_err_nosb:
2940 nfs_free_server(server); 3028 nfs_free_server(server);
2941out_err_noserver: 3029out_err_noserver:
3030 nfs_free_fhandle(mntfh);
3031out_err_nofh:
2942 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); 3032 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
2943 return error; 3033 return error;
2944 3034
@@ -2947,6 +3037,7 @@ error_splat_super:
2947 bdi_unregister(&server->backing_dev_info); 3037 bdi_unregister(&server->backing_dev_info);
2948error_splat_bdi: 3038error_splat_bdi:
2949 deactivate_locked_super(s); 3039 deactivate_locked_super(s);
3040 nfs_free_fhandle(mntfh);
2950 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); 3041 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
2951 return error; 3042 return error;
2952} 3043}
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 6da3d3ff6edd..a2242af6a17d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -23,6 +23,7 @@ struct nfs_unlinkdata {
23 struct nfs_removeres res; 23 struct nfs_removeres res;
24 struct inode *dir; 24 struct inode *dir;
25 struct rpc_cred *cred; 25 struct rpc_cred *cred;
26 struct nfs_fattr dir_attr;
26}; 27};
27 28
28/** 29/**
@@ -169,7 +170,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
169 } 170 }
170 nfs_sb_active(dir->i_sb); 171 nfs_sb_active(dir->i_sb);
171 data->args.fh = NFS_FH(dir); 172 data->args.fh = NFS_FH(dir);
172 nfs_fattr_init(&data->res.dir_attr); 173 nfs_fattr_init(data->res.dir_attr);
173 174
174 NFS_PROTO(dir)->unlink_setup(&msg, dir); 175 NFS_PROTO(dir)->unlink_setup(&msg, dir);
175 176
@@ -259,6 +260,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
259 goto out_free; 260 goto out_free;
260 } 261 }
261 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; 262 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
263 data->res.dir_attr = &data->dir_attr;
262 264
263 status = -EBUSY; 265 status = -EBUSY;
264 spin_lock(&dentry->d_lock); 266 spin_lock(&dentry->d_lock);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 872a5ef550c7..c2a4f71d87dd 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_cache = {
259 .alloc = expkey_alloc, 259 .alloc = expkey_alloc,
260}; 260};
261 261
262static struct svc_expkey * 262static int
263svc_expkey_lookup(struct svc_expkey *item) 263svc_expkey_hash(struct svc_expkey *item)
264{ 264{
265 struct cache_head *ch;
266 int hash = item->ek_fsidtype; 265 int hash = item->ek_fsidtype;
267 char * cp = (char*)item->ek_fsid; 266 char * cp = (char*)item->ek_fsid;
268 int len = key_len(item->ek_fsidtype); 267 int len = key_len(item->ek_fsidtype);
@@ -270,6 +269,14 @@ svc_expkey_lookup(struct svc_expkey *item)
270 hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); 269 hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
271 hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS); 270 hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
272 hash &= EXPKEY_HASHMASK; 271 hash &= EXPKEY_HASHMASK;
272 return hash;
273}
274
275static struct svc_expkey *
276svc_expkey_lookup(struct svc_expkey *item)
277{
278 struct cache_head *ch;
279 int hash = svc_expkey_hash(item);
273 280
274 ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h, 281 ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
275 hash); 282 hash);
@@ -283,13 +290,7 @@ static struct svc_expkey *
283svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) 290svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
284{ 291{
285 struct cache_head *ch; 292 struct cache_head *ch;
286 int hash = new->ek_fsidtype; 293 int hash = svc_expkey_hash(new);
287 char * cp = (char*)new->ek_fsid;
288 int len = key_len(new->ek_fsidtype);
289
290 hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
291 hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
292 hash &= EXPKEY_HASHMASK;
293 294
294 ch = sunrpc_cache_update(&svc_expkey_cache, &new->h, 295 ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
295 &old->h, hash); 296 &old->h, hash);
@@ -738,14 +739,22 @@ struct cache_detail svc_export_cache = {
738 .alloc = svc_export_alloc, 739 .alloc = svc_export_alloc,
739}; 740};
740 741
741static struct svc_export * 742static int
742svc_export_lookup(struct svc_export *exp) 743svc_export_hash(struct svc_export *exp)
743{ 744{
744 struct cache_head *ch;
745 int hash; 745 int hash;
746
746 hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS); 747 hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
747 hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS); 748 hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
748 hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS); 749 hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
750 return hash;
751}
752
753static struct svc_export *
754svc_export_lookup(struct svc_export *exp)
755{
756 struct cache_head *ch;
757 int hash = svc_export_hash(exp);
749 758
750 ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h, 759 ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
751 hash); 760 hash);
@@ -759,10 +768,7 @@ static struct svc_export *
759svc_export_update(struct svc_export *new, struct svc_export *old) 768svc_export_update(struct svc_export *new, struct svc_export *old)
760{ 769{
761 struct cache_head *ch; 770 struct cache_head *ch;
762 int hash; 771 int hash = svc_export_hash(old);
763 hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
764 hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS);
765 hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS);
766 772
767 ch = sunrpc_cache_update(&svc_export_cache, &new->h, 773 ch = sunrpc_cache_update(&svc_export_cache, &new->h,
768 &old->h, 774 &old->h,
@@ -1071,9 +1077,9 @@ exp_export(struct nfsctl_export *nxp)
1071 err = 0; 1077 err = 0;
1072finish: 1078finish:
1073 kfree(new.ex_pathname); 1079 kfree(new.ex_pathname);
1074 if (exp) 1080 if (!IS_ERR_OR_NULL(exp))
1075 exp_put(exp); 1081 exp_put(exp);
1076 if (fsid_key && !IS_ERR(fsid_key)) 1082 if (!IS_ERR_OR_NULL(fsid_key))
1077 cache_put(&fsid_key->h, &svc_expkey_cache); 1083 cache_put(&fsid_key->h, &svc_expkey_cache);
1078 path_put(&path); 1084 path_put(&path);
1079out_put_clp: 1085out_put_clp:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7e32bd394e86..eb78e7e22077 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -32,6 +32,7 @@
32 */ 32 */
33 33
34#include <linux/sunrpc/clnt.h> 34#include <linux/sunrpc/clnt.h>
35#include <linux/sunrpc/svc_xprt.h>
35#include <linux/slab.h> 36#include <linux/slab.h>
36#include "nfsd.h" 37#include "nfsd.h"
37#include "state.h" 38#include "state.h"
@@ -79,11 +80,6 @@ enum nfs_cb_opnum4 {
79 cb_sequence_dec_sz + \ 80 cb_sequence_dec_sz + \
80 op_dec_sz) 81 op_dec_sz)
81 82
82struct nfs4_rpc_args {
83 void *args_op;
84 struct nfsd4_cb_sequence args_seq;
85};
86
87/* 83/*
88* Generic encode routines from fs/nfs/nfs4xdr.c 84* Generic encode routines from fs/nfs/nfs4xdr.c
89*/ 85*/
@@ -428,13 +424,19 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
428}; 424};
429 425
430static struct rpc_version nfs_cb_version4 = { 426static struct rpc_version nfs_cb_version4 = {
427/*
428 * Note on the callback rpc program version number: despite language in rfc
429 * 5661 section 18.36.3 requiring servers to use 4 in this field, the
430 * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
431 * in practice that appears to be what implementations use. The section
432 * 18.36.3 language is expected to be fixed in an erratum.
433 */
431 .number = 1, 434 .number = 1,
432 .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), 435 .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
433 .procs = nfs4_cb_procedures 436 .procs = nfs4_cb_procedures
434}; 437};
435 438
436static struct rpc_version * nfs_cb_version[] = { 439static struct rpc_version * nfs_cb_version[] = {
437 NULL,
438 &nfs_cb_version4, 440 &nfs_cb_version4,
439}; 441};
440 442
@@ -456,15 +458,14 @@ static struct rpc_program cb_program = {
456 458
457static int max_cb_time(void) 459static int max_cb_time(void)
458{ 460{
459 return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ; 461 return max(nfsd4_lease/10, (time_t)1) * HZ;
460} 462}
461 463
462/* Reference counting, callback cleanup, etc., all look racy as heck. 464/* Reference counting, callback cleanup, etc., all look racy as heck.
463 * And why is cb_set an atomic? */ 465 * And why is cl_cb_set an atomic? */
464 466
465int setup_callback_client(struct nfs4_client *clp) 467int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
466{ 468{
467 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
468 struct rpc_timeout timeparms = { 469 struct rpc_timeout timeparms = {
469 .to_initval = max_cb_time(), 470 .to_initval = max_cb_time(),
470 .to_retries = 0, 471 .to_retries = 0,
@@ -476,7 +477,7 @@ int setup_callback_client(struct nfs4_client *clp)
476 .timeout = &timeparms, 477 .timeout = &timeparms,
477 .program = &cb_program, 478 .program = &cb_program,
478 .prognumber = cb->cb_prog, 479 .prognumber = cb->cb_prog,
479 .version = nfs_cb_version[1]->number, 480 .version = 0,
480 .authflavor = clp->cl_flavor, 481 .authflavor = clp->cl_flavor,
481 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 482 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
482 .client_name = clp->cl_principal, 483 .client_name = clp->cl_principal,
@@ -486,7 +487,7 @@ int setup_callback_client(struct nfs4_client *clp)
486 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 487 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
487 return -EINVAL; 488 return -EINVAL;
488 if (cb->cb_minorversion) { 489 if (cb->cb_minorversion) {
489 args.bc_xprt = clp->cl_cb_xprt; 490 args.bc_xprt = cb->cb_xprt;
490 args.protocol = XPRT_TRANSPORT_BC_TCP; 491 args.protocol = XPRT_TRANSPORT_BC_TCP;
491 } 492 }
492 /* Create RPC client */ 493 /* Create RPC client */
@@ -496,7 +497,7 @@ int setup_callback_client(struct nfs4_client *clp)
496 PTR_ERR(client)); 497 PTR_ERR(client));
497 return PTR_ERR(client); 498 return PTR_ERR(client);
498 } 499 }
499 cb->cb_client = client; 500 nfsd4_set_callback_client(clp, client);
500 return 0; 501 return 0;
501 502
502} 503}
@@ -514,8 +515,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
514 if (task->tk_status) 515 if (task->tk_status)
515 warn_no_callback_path(clp, task->tk_status); 516 warn_no_callback_path(clp, task->tk_status);
516 else 517 else
517 atomic_set(&clp->cl_cb_conn.cb_set, 1); 518 atomic_set(&clp->cl_cb_set, 1);
518 put_nfs4_client(clp);
519} 519}
520 520
521static const struct rpc_call_ops nfsd4_cb_probe_ops = { 521static const struct rpc_call_ops nfsd4_cb_probe_ops = {
@@ -537,7 +537,6 @@ int set_callback_cred(void)
537 537
538void do_probe_callback(struct nfs4_client *clp) 538void do_probe_callback(struct nfs4_client *clp)
539{ 539{
540 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
541 struct rpc_message msg = { 540 struct rpc_message msg = {
542 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 541 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
543 .rpc_argp = clp, 542 .rpc_argp = clp,
@@ -545,34 +544,27 @@ void do_probe_callback(struct nfs4_client *clp)
545 }; 544 };
546 int status; 545 int status;
547 546
548 status = rpc_call_async(cb->cb_client, &msg, 547 status = rpc_call_async(clp->cl_cb_client, &msg,
549 RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 548 RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
550 &nfsd4_cb_probe_ops, (void *)clp); 549 &nfsd4_cb_probe_ops, (void *)clp);
551 if (status) { 550 if (status)
552 warn_no_callback_path(clp, status); 551 warn_no_callback_path(clp, status);
553 put_nfs4_client(clp);
554 }
555} 552}
556 553
557/* 554/*
558 * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... 555 * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
559 */ 556 */
560void 557void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
561nfsd4_probe_callback(struct nfs4_client *clp)
562{ 558{
563 int status; 559 int status;
564 560
565 BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set)); 561 BUG_ON(atomic_read(&clp->cl_cb_set));
566 562
567 status = setup_callback_client(clp); 563 status = setup_callback_client(clp, cb);
568 if (status) { 564 if (status) {
569 warn_no_callback_path(clp, status); 565 warn_no_callback_path(clp, status);
570 return; 566 return;
571 } 567 }
572
573 /* the task holds a reference to the nfs4_client struct */
574 atomic_inc(&clp->cl_count);
575
576 do_probe_callback(clp); 568 do_probe_callback(clp);
577} 569}
578 570
@@ -658,18 +650,32 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
658 } 650 }
659} 651}
660 652
653
661static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 654static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
662{ 655{
663 struct nfs4_delegation *dp = calldata; 656 struct nfs4_delegation *dp = calldata;
664 struct nfs4_client *clp = dp->dl_client; 657 struct nfs4_client *clp = dp->dl_client;
658 struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
665 659
666 nfsd4_cb_done(task, calldata); 660 nfsd4_cb_done(task, calldata);
667 661
662 if (current_rpc_client == NULL) {
663 /* We're shutting down; give up. */
664 /* XXX: err, or is it ok just to fall through
665 * and rpc_restart_call? */
666 return;
667 }
668
668 switch (task->tk_status) { 669 switch (task->tk_status) {
669 case -EIO: 670 case -EIO:
670 /* Network partition? */ 671 /* Network partition? */
671 atomic_set(&clp->cl_cb_conn.cb_set, 0); 672 atomic_set(&clp->cl_cb_set, 0);
672 warn_no_callback_path(clp, task->tk_status); 673 warn_no_callback_path(clp, task->tk_status);
674 if (current_rpc_client != task->tk_client) {
675 /* queue a callback on the new connection: */
676 nfsd4_cb_recall(dp);
677 return;
678 }
673 case -EBADHANDLE: 679 case -EBADHANDLE:
674 case -NFS4ERR_BAD_STATEID: 680 case -NFS4ERR_BAD_STATEID:
675 /* Race: client probably got cb_recall 681 /* Race: client probably got cb_recall
@@ -677,7 +683,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
677 break; 683 break;
678 default: 684 default:
679 /* success, or error we can't handle */ 685 /* success, or error we can't handle */
680 goto done; 686 return;
681 } 687 }
682 if (dp->dl_retries--) { 688 if (dp->dl_retries--) {
683 rpc_delay(task, 2*HZ); 689 rpc_delay(task, 2*HZ);
@@ -685,20 +691,16 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
685 rpc_restart_call(task); 691 rpc_restart_call(task);
686 return; 692 return;
687 } else { 693 } else {
688 atomic_set(&clp->cl_cb_conn.cb_set, 0); 694 atomic_set(&clp->cl_cb_set, 0);
689 warn_no_callback_path(clp, task->tk_status); 695 warn_no_callback_path(clp, task->tk_status);
690 } 696 }
691done:
692 kfree(task->tk_msg.rpc_argp);
693} 697}
694 698
695static void nfsd4_cb_recall_release(void *calldata) 699static void nfsd4_cb_recall_release(void *calldata)
696{ 700{
697 struct nfs4_delegation *dp = calldata; 701 struct nfs4_delegation *dp = calldata;
698 struct nfs4_client *clp = dp->dl_client;
699 702
700 nfs4_put_delegation(dp); 703 nfs4_put_delegation(dp);
701 put_nfs4_client(clp);
702} 704}
703 705
704static const struct rpc_call_ops nfsd4_cb_recall_ops = { 706static const struct rpc_call_ops nfsd4_cb_recall_ops = {
@@ -707,33 +709,75 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
707 .rpc_release = nfsd4_cb_recall_release, 709 .rpc_release = nfsd4_cb_recall_release,
708}; 710};
709 711
712static struct workqueue_struct *callback_wq;
713
714int nfsd4_create_callback_queue(void)
715{
716 callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
717 if (!callback_wq)
718 return -ENOMEM;
719 return 0;
720}
721
722void nfsd4_destroy_callback_queue(void)
723{
724 destroy_workqueue(callback_wq);
725}
726
727/* must be called under the state lock */
728void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
729{
730 struct rpc_clnt *old = clp->cl_cb_client;
731
732 clp->cl_cb_client = new;
733 /*
734 * After this, any work that saw the old value of cl_cb_client will
735 * be gone:
736 */
737 flush_workqueue(callback_wq);
738 /* So we can safely shut it down: */
739 if (old)
740 rpc_shutdown_client(old);
741}
742
710/* 743/*
711 * called with dp->dl_count inc'ed. 744 * called with dp->dl_count inc'ed.
712 */ 745 */
713void 746static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
714nfsd4_cb_recall(struct nfs4_delegation *dp)
715{ 747{
716 struct nfs4_client *clp = dp->dl_client; 748 struct nfs4_client *clp = dp->dl_client;
717 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; 749 struct rpc_clnt *clnt = clp->cl_cb_client;
718 struct nfs4_rpc_args *args; 750 struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
719 struct rpc_message msg = { 751 struct rpc_message msg = {
720 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], 752 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
721 .rpc_cred = callback_cred 753 .rpc_cred = callback_cred
722 }; 754 };
723 int status = -ENOMEM; 755 int status;
756
757 if (clnt == NULL)
758 return; /* Client is shutting down; give up. */
724 759
725 args = kzalloc(sizeof(*args), GFP_KERNEL);
726 if (!args)
727 goto out;
728 args->args_op = dp; 760 args->args_op = dp;
729 msg.rpc_argp = args; 761 msg.rpc_argp = args;
730 dp->dl_retries = 1; 762 dp->dl_retries = 1;
731 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, 763 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
732 &nfsd4_cb_recall_ops, dp); 764 &nfsd4_cb_recall_ops, dp);
733out: 765 if (status)
734 if (status) {
735 kfree(args);
736 put_nfs4_client(clp);
737 nfs4_put_delegation(dp); 766 nfs4_put_delegation(dp);
738 } 767}
768
769void nfsd4_do_callback_rpc(struct work_struct *w)
770{
771 /* XXX: for now, just send off delegation recall. */
772 /* In future, generalize to handle any sort of callback. */
773 struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
774 struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
775
776 _nfsd4_cb_recall(dp);
777}
778
779
780void nfsd4_cb_recall(struct nfs4_delegation *dp)
781{
782 queue_work(callback_wq, &dp->dl_recall.cb_work);
739} 783}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2ab9e8501bfe..59ec449b0c7f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[];
969static const char *nfsd4_op_name(unsigned opnum); 969static const char *nfsd4_op_name(unsigned opnum);
970 970
971/* 971/*
972 * Enforce NFSv4.1 COMPOUND ordering rules. 972 * Enforce NFSv4.1 COMPOUND ordering rules:
973 * 973 *
974 * TODO: 974 * Also note, enforced elsewhere:
975 * - enforce NFS4ERR_NOT_ONLY_OP, 975 * - SEQUENCE other than as first op results in
976 * - DESTROY_SESSION MUST be the final operation in the COMPOUND request. 976 * NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
977 * - BIND_CONN_TO_SESSION must be the only op in its compound
978 * (Will be enforced in nfsd4_bind_conn_to_session().)
979 * - DESTROY_SESSION must be the final operation in a compound, if
980 * sessionid's in SEQUENCE and DESTROY_SESSION are the same.
981 * (Enforced in nfsd4_destroy_session().)
977 */ 982 */
978static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args) 983static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
979{ 984{
980 if (args->minorversion && args->opcnt > 0) { 985 struct nfsd4_op *op = &args->ops[0];
981 struct nfsd4_op *op = &args->ops[0]; 986
982 return (op->status == nfserr_op_illegal) || 987 /* These ordering requirements don't apply to NFSv4.0: */
983 (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP); 988 if (args->minorversion == 0)
984 } 989 return nfs_ok;
985 return true; 990 /* This is weird, but OK, not our problem: */
991 if (args->opcnt == 0)
992 return nfs_ok;
993 if (op->status == nfserr_op_illegal)
994 return nfs_ok;
995 if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
996 return nfserr_op_not_in_session;
997 if (op->opnum == OP_SEQUENCE)
998 return nfs_ok;
999 if (args->opcnt != 1)
1000 return nfserr_not_only_op;
1001 return nfs_ok;
986} 1002}
987 1003
988/* 1004/*
@@ -1012,6 +1028,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1012 resp->rqstp = rqstp; 1028 resp->rqstp = rqstp;
1013 resp->cstate.minorversion = args->minorversion; 1029 resp->cstate.minorversion = args->minorversion;
1014 resp->cstate.replay_owner = NULL; 1030 resp->cstate.replay_owner = NULL;
1031 resp->cstate.session = NULL;
1015 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); 1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
1016 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); 1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
1017 /* Use the deferral mechanism only for NFSv4.0 compounds */ 1034 /* Use the deferral mechanism only for NFSv4.0 compounds */
@@ -1024,13 +1041,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1024 if (args->minorversion > nfsd_supported_minorversion) 1041 if (args->minorversion > nfsd_supported_minorversion)
1025 goto out; 1042 goto out;
1026 1043
1027 if (!nfs41_op_ordering_ok(args)) { 1044 status = nfs41_check_op_ordering(args);
1045 if (status) {
1028 op = &args->ops[0]; 1046 op = &args->ops[0];
1029 op->status = nfserr_sequence_pos; 1047 op->status = status;
1030 goto encode_op; 1048 goto encode_op;
1031 } 1049 }
1032 1050
1033 status = nfs_ok;
1034 while (!status && resp->opcnt < args->opcnt) { 1051 while (!status && resp->opcnt < args->opcnt) {
1035 op = &args->ops[resp->opcnt++]; 1052 op = &args->ops[resp->opcnt++];
1036 1053
@@ -1295,6 +1312,11 @@ static struct nfsd4_operation nfsd4_ops[] = {
1295 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, 1312 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
1296 .op_name = "OP_SEQUENCE", 1313 .op_name = "OP_SEQUENCE",
1297 }, 1314 },
1315 [OP_RECLAIM_COMPLETE] = {
1316 .op_func = (nfsd4op_func)nfsd4_reclaim_complete,
1317 .op_flags = ALLOWED_WITHOUT_FH,
1318 .op_name = "OP_RECLAIM_COMPLETE",
1319 },
1298}; 1320};
1299 1321
1300static const char *nfsd4_op_name(unsigned opnum) 1322static const char *nfsd4_op_name(unsigned opnum)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 7a9ae3254a4b..7e26caab2a26 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -44,8 +44,7 @@
44#define NFSDDBG_FACILITY NFSDDBG_PROC 44#define NFSDDBG_FACILITY NFSDDBG_PROC
45 45
46/* Globals */ 46/* Globals */
47static struct path rec_dir; 47static struct file *rec_file;
48static int rec_dir_init = 0;
49 48
50static int 49static int
51nfs4_save_creds(const struct cred **original_creds) 50nfs4_save_creds(const struct cred **original_creds)
@@ -117,33 +116,28 @@ out_no_tfm:
117 return status; 116 return status;
118} 117}
119 118
120static void
121nfsd4_sync_rec_dir(void)
122{
123 vfs_fsync(NULL, rec_dir.dentry, 0);
124}
125
126int 119int
127nfsd4_create_clid_dir(struct nfs4_client *clp) 120nfsd4_create_clid_dir(struct nfs4_client *clp)
128{ 121{
129 const struct cred *original_cred; 122 const struct cred *original_cred;
130 char *dname = clp->cl_recdir; 123 char *dname = clp->cl_recdir;
131 struct dentry *dentry; 124 struct dentry *dir, *dentry;
132 int status; 125 int status;
133 126
134 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); 127 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
135 128
136 if (!rec_dir_init || clp->cl_firststate) 129 if (!rec_file || clp->cl_firststate)
137 return 0; 130 return 0;
138 131
139 status = nfs4_save_creds(&original_cred); 132 status = nfs4_save_creds(&original_cred);
140 if (status < 0) 133 if (status < 0)
141 return status; 134 return status;
142 135
136 dir = rec_file->f_path.dentry;
143 /* lock the parent */ 137 /* lock the parent */
144 mutex_lock(&rec_dir.dentry->d_inode->i_mutex); 138 mutex_lock(&dir->d_inode->i_mutex);
145 139
146 dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); 140 dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
147 if (IS_ERR(dentry)) { 141 if (IS_ERR(dentry)) {
148 status = PTR_ERR(dentry); 142 status = PTR_ERR(dentry);
149 goto out_unlock; 143 goto out_unlock;
@@ -153,18 +147,18 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
153 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); 147 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
154 goto out_put; 148 goto out_put;
155 } 149 }
156 status = mnt_want_write(rec_dir.mnt); 150 status = mnt_want_write(rec_file->f_path.mnt);
157 if (status) 151 if (status)
158 goto out_put; 152 goto out_put;
159 status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); 153 status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
160 mnt_drop_write(rec_dir.mnt); 154 mnt_drop_write(rec_file->f_path.mnt);
161out_put: 155out_put:
162 dput(dentry); 156 dput(dentry);
163out_unlock: 157out_unlock:
164 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); 158 mutex_unlock(&dir->d_inode->i_mutex);
165 if (status == 0) { 159 if (status == 0) {
166 clp->cl_firststate = 1; 160 clp->cl_firststate = 1;
167 nfsd4_sync_rec_dir(); 161 vfs_fsync(rec_file, 0);
168 } 162 }
169 nfs4_reset_creds(original_cred); 163 nfs4_reset_creds(original_cred);
170 dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); 164 dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
@@ -206,14 +200,14 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
206 struct dentry *dentry; 200 struct dentry *dentry;
207 int status; 201 int status;
208 202
209 if (!rec_dir_init) 203 if (!rec_file)
210 return 0; 204 return 0;
211 205
212 status = nfs4_save_creds(&original_cred); 206 status = nfs4_save_creds(&original_cred);
213 if (status < 0) 207 if (status < 0)
214 return status; 208 return status;
215 209
216 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, 210 filp = dentry_open(dget(dir), mntget(rec_file->f_path.mnt), O_RDONLY,
217 current_cred()); 211 current_cred());
218 status = PTR_ERR(filp); 212 status = PTR_ERR(filp);
219 if (IS_ERR(filp)) 213 if (IS_ERR(filp))
@@ -250,13 +244,14 @@ out:
250static int 244static int
251nfsd4_unlink_clid_dir(char *name, int namlen) 245nfsd4_unlink_clid_dir(char *name, int namlen)
252{ 246{
253 struct dentry *dentry; 247 struct dentry *dir, *dentry;
254 int status; 248 int status;
255 249
256 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); 250 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
257 251
258 mutex_lock_nested(&rec_dir.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 252 dir = rec_file->f_path.dentry;
259 dentry = lookup_one_len(name, rec_dir.dentry, namlen); 253 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
254 dentry = lookup_one_len(name, dir, namlen);
260 if (IS_ERR(dentry)) { 255 if (IS_ERR(dentry)) {
261 status = PTR_ERR(dentry); 256 status = PTR_ERR(dentry);
262 goto out_unlock; 257 goto out_unlock;
@@ -264,11 +259,11 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
264 status = -ENOENT; 259 status = -ENOENT;
265 if (!dentry->d_inode) 260 if (!dentry->d_inode)
266 goto out; 261 goto out;
267 status = vfs_rmdir(rec_dir.dentry->d_inode, dentry); 262 status = vfs_rmdir(dir->d_inode, dentry);
268out: 263out:
269 dput(dentry); 264 dput(dentry);
270out_unlock: 265out_unlock:
271 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); 266 mutex_unlock(&dir->d_inode->i_mutex);
272 return status; 267 return status;
273} 268}
274 269
@@ -278,10 +273,10 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
278 const struct cred *original_cred; 273 const struct cred *original_cred;
279 int status; 274 int status;
280 275
281 if (!rec_dir_init || !clp->cl_firststate) 276 if (!rec_file || !clp->cl_firststate)
282 return; 277 return;
283 278
284 status = mnt_want_write(rec_dir.mnt); 279 status = mnt_want_write(rec_file->f_path.mnt);
285 if (status) 280 if (status)
286 goto out; 281 goto out;
287 clp->cl_firststate = 0; 282 clp->cl_firststate = 0;
@@ -293,8 +288,8 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
293 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); 288 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
294 nfs4_reset_creds(original_cred); 289 nfs4_reset_creds(original_cred);
295 if (status == 0) 290 if (status == 0)
296 nfsd4_sync_rec_dir(); 291 vfs_fsync(rec_file, 0);
297 mnt_drop_write(rec_dir.mnt); 292 mnt_drop_write(rec_file->f_path.mnt);
298out: 293out:
299 if (status) 294 if (status)
300 printk("NFSD: Failed to remove expired client state directory" 295 printk("NFSD: Failed to remove expired client state directory"
@@ -323,19 +318,19 @@ void
323nfsd4_recdir_purge_old(void) { 318nfsd4_recdir_purge_old(void) {
324 int status; 319 int status;
325 320
326 if (!rec_dir_init) 321 if (!rec_file)
327 return; 322 return;
328 status = mnt_want_write(rec_dir.mnt); 323 status = mnt_want_write(rec_file->f_path.mnt);
329 if (status) 324 if (status)
330 goto out; 325 goto out;
331 status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); 326 status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old);
332 if (status == 0) 327 if (status == 0)
333 nfsd4_sync_rec_dir(); 328 vfs_fsync(rec_file, 0);
334 mnt_drop_write(rec_dir.mnt); 329 mnt_drop_write(rec_file->f_path.mnt);
335out: 330out:
336 if (status) 331 if (status)
337 printk("nfsd4: failed to purge old clients from recovery" 332 printk("nfsd4: failed to purge old clients from recovery"
338 " directory %s\n", rec_dir.dentry->d_name.name); 333 " directory %s\n", rec_file->f_path.dentry->d_name.name);
339} 334}
340 335
341static int 336static int
@@ -355,10 +350,13 @@ int
355nfsd4_recdir_load(void) { 350nfsd4_recdir_load(void) {
356 int status; 351 int status;
357 352
358 status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir); 353 if (!rec_file)
354 return 0;
355
356 status = nfsd4_list_rec_dir(rec_file->f_path.dentry, load_recdir);
359 if (status) 357 if (status)
360 printk("nfsd4: failed loading clients from recovery" 358 printk("nfsd4: failed loading clients from recovery"
361 " directory %s\n", rec_dir.dentry->d_name.name); 359 " directory %s\n", rec_file->f_path.dentry->d_name.name);
362 return status; 360 return status;
363} 361}
364 362
@@ -375,7 +373,7 @@ nfsd4_init_recdir(char *rec_dirname)
375 printk("NFSD: Using %s as the NFSv4 state recovery directory\n", 373 printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
376 rec_dirname); 374 rec_dirname);
377 375
378 BUG_ON(rec_dir_init); 376 BUG_ON(rec_file);
379 377
380 status = nfs4_save_creds(&original_cred); 378 status = nfs4_save_creds(&original_cred);
381 if (status < 0) { 379 if (status < 0) {
@@ -385,22 +383,21 @@ nfsd4_init_recdir(char *rec_dirname)
385 return; 383 return;
386 } 384 }
387 385
388 status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, 386 rec_file = filp_open(rec_dirname, O_RDONLY | O_DIRECTORY, 0);
389 &rec_dir); 387 if (IS_ERR(rec_file)) {
390 if (status)
391 printk("NFSD: unable to find recovery directory %s\n", 388 printk("NFSD: unable to find recovery directory %s\n",
392 rec_dirname); 389 rec_dirname);
390 rec_file = NULL;
391 }
393 392
394 if (!status)
395 rec_dir_init = 1;
396 nfs4_reset_creds(original_cred); 393 nfs4_reset_creds(original_cred);
397} 394}
398 395
399void 396void
400nfsd4_shutdown_recdir(void) 397nfsd4_shutdown_recdir(void)
401{ 398{
402 if (!rec_dir_init) 399 if (!rec_file)
403 return; 400 return;
404 rec_dir_init = 0; 401 fput(rec_file);
405 path_put(&rec_dir); 402 rec_file = NULL;
406} 403}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6a8fedaa4f55..12f7109720c2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -45,8 +45,8 @@
45#define NFSDDBG_FACILITY NFSDDBG_PROC 45#define NFSDDBG_FACILITY NFSDDBG_PROC
46 46
47/* Globals */ 47/* Globals */
48static time_t lease_time = 90; /* default lease time */ 48time_t nfsd4_lease = 90; /* default lease time */
49static time_t user_lease_time = 90; 49time_t nfsd4_grace = 90;
50static time_t boot_time; 50static time_t boot_time;
51static u32 current_ownerid = 1; 51static u32 current_ownerid = 1;
52static u32 current_fileid = 1; 52static u32 current_fileid = 1;
@@ -190,7 +190,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
190 dp->dl_vfs_file = stp->st_vfs_file; 190 dp->dl_vfs_file = stp->st_vfs_file;
191 dp->dl_type = type; 191 dp->dl_type = type;
192 dp->dl_ident = cb->cb_ident; 192 dp->dl_ident = cb->cb_ident;
193 dp->dl_stateid.si_boot = get_seconds(); 193 dp->dl_stateid.si_boot = boot_time;
194 dp->dl_stateid.si_stateownerid = current_delegid++; 194 dp->dl_stateid.si_stateownerid = current_delegid++;
195 dp->dl_stateid.si_fileid = 0; 195 dp->dl_stateid.si_fileid = 0;
196 dp->dl_stateid.si_generation = 0; 196 dp->dl_stateid.si_generation = 0;
@@ -199,6 +199,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
199 atomic_set(&dp->dl_count, 1); 199 atomic_set(&dp->dl_count, 1);
200 list_add(&dp->dl_perfile, &fp->fi_delegations); 200 list_add(&dp->dl_perfile, &fp->fi_delegations);
201 list_add(&dp->dl_perclnt, &clp->cl_delegations); 201 list_add(&dp->dl_perclnt, &clp->cl_delegations);
202 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
202 return dp; 203 return dp;
203} 204}
204 205
@@ -249,6 +250,9 @@ unhash_delegation(struct nfs4_delegation *dp)
249 * SETCLIENTID state 250 * SETCLIENTID state
250 */ 251 */
251 252
253/* client_lock protects the client lru list and session hash table */
254static DEFINE_SPINLOCK(client_lock);
255
252/* Hash tables for nfs4_clientid state */ 256/* Hash tables for nfs4_clientid state */
253#define CLIENT_HASH_BITS 4 257#define CLIENT_HASH_BITS 4
254#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) 258#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
@@ -367,7 +371,6 @@ static void release_openowner(struct nfs4_stateowner *sop)
367 nfs4_put_stateowner(sop); 371 nfs4_put_stateowner(sop);
368} 372}
369 373
370static DEFINE_SPINLOCK(sessionid_lock);
371#define SESSION_HASH_SIZE 512 374#define SESSION_HASH_SIZE 512
372static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; 375static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
373 376
@@ -565,10 +568,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
565 568
566 new->se_flags = cses->flags; 569 new->se_flags = cses->flags;
567 kref_init(&new->se_ref); 570 kref_init(&new->se_ref);
568 spin_lock(&sessionid_lock); 571 spin_lock(&client_lock);
569 list_add(&new->se_hash, &sessionid_hashtbl[idx]); 572 list_add(&new->se_hash, &sessionid_hashtbl[idx]);
570 list_add(&new->se_perclnt, &clp->cl_sessions); 573 list_add(&new->se_perclnt, &clp->cl_sessions);
571 spin_unlock(&sessionid_lock); 574 spin_unlock(&client_lock);
572 575
573 status = nfs_ok; 576 status = nfs_ok;
574out: 577out:
@@ -579,7 +582,7 @@ out_free:
579 goto out; 582 goto out;
580} 583}
581 584
582/* caller must hold sessionid_lock */ 585/* caller must hold client_lock */
583static struct nfsd4_session * 586static struct nfsd4_session *
584find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) 587find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
585{ 588{
@@ -602,7 +605,7 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
602 return NULL; 605 return NULL;
603} 606}
604 607
605/* caller must hold sessionid_lock */ 608/* caller must hold client_lock */
606static void 609static void
607unhash_session(struct nfsd4_session *ses) 610unhash_session(struct nfsd4_session *ses)
608{ 611{
@@ -610,15 +613,6 @@ unhash_session(struct nfsd4_session *ses)
610 list_del(&ses->se_perclnt); 613 list_del(&ses->se_perclnt);
611} 614}
612 615
613static void
614release_session(struct nfsd4_session *ses)
615{
616 spin_lock(&sessionid_lock);
617 unhash_session(ses);
618 spin_unlock(&sessionid_lock);
619 nfsd4_put_session(ses);
620}
621
622void 616void
623free_session(struct kref *kref) 617free_session(struct kref *kref)
624{ 618{
@@ -634,9 +628,18 @@ free_session(struct kref *kref)
634 kfree(ses); 628 kfree(ses);
635} 629}
636 630
631/* must be called under the client_lock */
637static inline void 632static inline void
638renew_client(struct nfs4_client *clp) 633renew_client_locked(struct nfs4_client *clp)
639{ 634{
635 if (is_client_expired(clp)) {
636 dprintk("%s: client (clientid %08x/%08x) already expired\n",
637 __func__,
638 clp->cl_clientid.cl_boot,
639 clp->cl_clientid.cl_id);
640 return;
641 }
642
640 /* 643 /*
641 * Move client to the end to the LRU list. 644 * Move client to the end to the LRU list.
642 */ 645 */
@@ -647,6 +650,14 @@ renew_client(struct nfs4_client *clp)
647 clp->cl_time = get_seconds(); 650 clp->cl_time = get_seconds();
648} 651}
649 652
653static inline void
654renew_client(struct nfs4_client *clp)
655{
656 spin_lock(&client_lock);
657 renew_client_locked(clp);
658 spin_unlock(&client_lock);
659}
660
650/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ 661/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
651static int 662static int
652STALE_CLIENTID(clientid_t *clid) 663STALE_CLIENTID(clientid_t *clid)
@@ -680,27 +691,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
680 return clp; 691 return clp;
681} 692}
682 693
683static void
684shutdown_callback_client(struct nfs4_client *clp)
685{
686 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
687
688 if (clnt) {
689 /*
690 * Callback threads take a reference on the client, so there
691 * should be no outstanding callbacks at this point.
692 */
693 clp->cl_cb_conn.cb_client = NULL;
694 rpc_shutdown_client(clnt);
695 }
696}
697
698static inline void 694static inline void
699free_client(struct nfs4_client *clp) 695free_client(struct nfs4_client *clp)
700{ 696{
701 shutdown_callback_client(clp);
702 if (clp->cl_cb_xprt)
703 svc_xprt_put(clp->cl_cb_xprt);
704 if (clp->cl_cred.cr_group_info) 697 if (clp->cl_cred.cr_group_info)
705 put_group_info(clp->cl_cred.cr_group_info); 698 put_group_info(clp->cl_cred.cr_group_info);
706 kfree(clp->cl_principal); 699 kfree(clp->cl_principal);
@@ -709,10 +702,34 @@ free_client(struct nfs4_client *clp)
709} 702}
710 703
711void 704void
712put_nfs4_client(struct nfs4_client *clp) 705release_session_client(struct nfsd4_session *session)
713{ 706{
714 if (atomic_dec_and_test(&clp->cl_count)) 707 struct nfs4_client *clp = session->se_client;
708
709 if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
710 return;
711 if (is_client_expired(clp)) {
715 free_client(clp); 712 free_client(clp);
713 session->se_client = NULL;
714 } else
715 renew_client_locked(clp);
716 spin_unlock(&client_lock);
717 nfsd4_put_session(session);
718}
719
720/* must be called under the client_lock */
721static inline void
722unhash_client_locked(struct nfs4_client *clp)
723{
724 mark_client_expired(clp);
725 list_del(&clp->cl_lru);
726 while (!list_empty(&clp->cl_sessions)) {
727 struct nfsd4_session *ses;
728 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
729 se_perclnt);
730 unhash_session(ses);
731 nfsd4_put_session(ses);
732 }
716} 733}
717 734
718static void 735static void
@@ -722,9 +739,6 @@ expire_client(struct nfs4_client *clp)
722 struct nfs4_delegation *dp; 739 struct nfs4_delegation *dp;
723 struct list_head reaplist; 740 struct list_head reaplist;
724 741
725 dprintk("NFSD: expire_client cl_count %d\n",
726 atomic_read(&clp->cl_count));
727
728 INIT_LIST_HEAD(&reaplist); 742 INIT_LIST_HEAD(&reaplist);
729 spin_lock(&recall_lock); 743 spin_lock(&recall_lock);
730 while (!list_empty(&clp->cl_delegations)) { 744 while (!list_empty(&clp->cl_delegations)) {
@@ -740,20 +754,20 @@ expire_client(struct nfs4_client *clp)
740 list_del_init(&dp->dl_recall_lru); 754 list_del_init(&dp->dl_recall_lru);
741 unhash_delegation(dp); 755 unhash_delegation(dp);
742 } 756 }
743 list_del(&clp->cl_idhash);
744 list_del(&clp->cl_strhash);
745 list_del(&clp->cl_lru);
746 while (!list_empty(&clp->cl_openowners)) { 757 while (!list_empty(&clp->cl_openowners)) {
747 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); 758 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
748 release_openowner(sop); 759 release_openowner(sop);
749 } 760 }
750 while (!list_empty(&clp->cl_sessions)) { 761 nfsd4_set_callback_client(clp, NULL);
751 struct nfsd4_session *ses; 762 if (clp->cl_cb_conn.cb_xprt)
752 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, 763 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
753 se_perclnt); 764 list_del(&clp->cl_idhash);
754 release_session(ses); 765 list_del(&clp->cl_strhash);
755 } 766 spin_lock(&client_lock);
756 put_nfs4_client(clp); 767 unhash_client_locked(clp);
768 if (atomic_read(&clp->cl_refcount) == 0)
769 free_client(clp);
770 spin_unlock(&client_lock);
757} 771}
758 772
759static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) 773static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
@@ -839,14 +853,15 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
839 } 853 }
840 854
841 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); 855 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
842 atomic_set(&clp->cl_count, 1); 856 atomic_set(&clp->cl_refcount, 0);
843 atomic_set(&clp->cl_cb_conn.cb_set, 0); 857 atomic_set(&clp->cl_cb_set, 0);
844 INIT_LIST_HEAD(&clp->cl_idhash); 858 INIT_LIST_HEAD(&clp->cl_idhash);
845 INIT_LIST_HEAD(&clp->cl_strhash); 859 INIT_LIST_HEAD(&clp->cl_strhash);
846 INIT_LIST_HEAD(&clp->cl_openowners); 860 INIT_LIST_HEAD(&clp->cl_openowners);
847 INIT_LIST_HEAD(&clp->cl_delegations); 861 INIT_LIST_HEAD(&clp->cl_delegations);
848 INIT_LIST_HEAD(&clp->cl_sessions); 862 INIT_LIST_HEAD(&clp->cl_sessions);
849 INIT_LIST_HEAD(&clp->cl_lru); 863 INIT_LIST_HEAD(&clp->cl_lru);
864 clp->cl_time = get_seconds();
850 clear_bit(0, &clp->cl_cb_slot_busy); 865 clear_bit(0, &clp->cl_cb_slot_busy);
851 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 866 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
852 copy_verf(clp, verf); 867 copy_verf(clp, verf);
@@ -877,8 +892,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
877 list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); 892 list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
878 idhashval = clientid_hashval(clp->cl_clientid.cl_id); 893 idhashval = clientid_hashval(clp->cl_clientid.cl_id);
879 list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); 894 list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
880 list_add_tail(&clp->cl_lru, &client_lru); 895 renew_client(clp);
881 clp->cl_time = get_seconds();
882} 896}
883 897
884static void 898static void
@@ -888,10 +902,9 @@ move_to_confirmed(struct nfs4_client *clp)
888 unsigned int strhashval; 902 unsigned int strhashval;
889 903
890 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); 904 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
891 list_del_init(&clp->cl_strhash);
892 list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); 905 list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
893 strhashval = clientstr_hashval(clp->cl_recdir); 906 strhashval = clientstr_hashval(clp->cl_recdir);
894 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); 907 list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
895 renew_client(clp); 908 renew_client(clp);
896} 909}
897 910
@@ -1327,15 +1340,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1327 cs_slot->sl_seqid++; /* from 0 to 1 */ 1340 cs_slot->sl_seqid++; /* from 0 to 1 */
1328 move_to_confirmed(unconf); 1341 move_to_confirmed(unconf);
1329 1342
1330 /*
1331 * We do not support RDMA or persistent sessions
1332 */
1333 cr_ses->flags &= ~SESSION4_PERSIST;
1334 cr_ses->flags &= ~SESSION4_RDMA;
1335
1336 if (cr_ses->flags & SESSION4_BACK_CHAN) { 1343 if (cr_ses->flags & SESSION4_BACK_CHAN) {
1337 unconf->cl_cb_xprt = rqstp->rq_xprt; 1344 unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
1338 svc_xprt_get(unconf->cl_cb_xprt); 1345 svc_xprt_get(rqstp->rq_xprt);
1339 rpc_copy_addr( 1346 rpc_copy_addr(
1340 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, 1347 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1341 sa); 1348 sa);
@@ -1344,7 +1351,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1344 cstate->minorversion; 1351 cstate->minorversion;
1345 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; 1352 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1346 unconf->cl_cb_seq_nr = 1; 1353 unconf->cl_cb_seq_nr = 1;
1347 nfsd4_probe_callback(unconf); 1354 nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
1348 } 1355 }
1349 conf = unconf; 1356 conf = unconf;
1350 } else { 1357 } else {
@@ -1352,6 +1359,12 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1352 goto out; 1359 goto out;
1353 } 1360 }
1354 1361
1362 /*
1363 * We do not support RDMA or persistent sessions
1364 */
1365 cr_ses->flags &= ~SESSION4_PERSIST;
1366 cr_ses->flags &= ~SESSION4_RDMA;
1367
1355 status = alloc_init_session(rqstp, conf, cr_ses); 1368 status = alloc_init_session(rqstp, conf, cr_ses);
1356 if (status) 1369 if (status)
1357 goto out; 1370 goto out;
@@ -1369,6 +1382,21 @@ out:
1369 return status; 1382 return status;
1370} 1383}
1371 1384
1385static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
1386{
1387 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1388 struct nfsd4_compoundargs *argp = rqstp->rq_argp;
1389
1390 return argp->opcnt == resp->opcnt;
1391}
1392
1393static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
1394{
1395 if (!session)
1396 return 0;
1397 return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
1398}
1399
1372__be32 1400__be32
1373nfsd4_destroy_session(struct svc_rqst *r, 1401nfsd4_destroy_session(struct svc_rqst *r,
1374 struct nfsd4_compound_state *cstate, 1402 struct nfsd4_compound_state *cstate,
@@ -1384,19 +1412,25 @@ nfsd4_destroy_session(struct svc_rqst *r,
1384 * - Do we need to clear any callback info from previous session? 1412 * - Do we need to clear any callback info from previous session?
1385 */ 1413 */
1386 1414
1415 if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
1416 if (!nfsd4_last_compound_op(r))
1417 return nfserr_not_only_op;
1418 }
1387 dump_sessionid(__func__, &sessionid->sessionid); 1419 dump_sessionid(__func__, &sessionid->sessionid);
1388 spin_lock(&sessionid_lock); 1420 spin_lock(&client_lock);
1389 ses = find_in_sessionid_hashtbl(&sessionid->sessionid); 1421 ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
1390 if (!ses) { 1422 if (!ses) {
1391 spin_unlock(&sessionid_lock); 1423 spin_unlock(&client_lock);
1392 goto out; 1424 goto out;
1393 } 1425 }
1394 1426
1395 unhash_session(ses); 1427 unhash_session(ses);
1396 spin_unlock(&sessionid_lock); 1428 spin_unlock(&client_lock);
1397 1429
1430 nfs4_lock_state();
1398 /* wait for callbacks */ 1431 /* wait for callbacks */
1399 shutdown_callback_client(ses->se_client); 1432 nfsd4_set_callback_client(ses->se_client, NULL);
1433 nfs4_unlock_state();
1400 nfsd4_put_session(ses); 1434 nfsd4_put_session(ses);
1401 status = nfs_ok; 1435 status = nfs_ok;
1402out: 1436out:
@@ -1417,7 +1451,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1417 if (resp->opcnt != 1) 1451 if (resp->opcnt != 1)
1418 return nfserr_sequence_pos; 1452 return nfserr_sequence_pos;
1419 1453
1420 spin_lock(&sessionid_lock); 1454 spin_lock(&client_lock);
1421 status = nfserr_badsession; 1455 status = nfserr_badsession;
1422 session = find_in_sessionid_hashtbl(&seq->sessionid); 1456 session = find_in_sessionid_hashtbl(&seq->sessionid);
1423 if (!session) 1457 if (!session)
@@ -1456,23 +1490,47 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1456 cstate->slot = slot; 1490 cstate->slot = slot;
1457 cstate->session = session; 1491 cstate->session = session;
1458 1492
1459 /* Hold a session reference until done processing the compound:
1460 * nfsd4_put_session called only if the cstate slot is set.
1461 */
1462 nfsd4_get_session(session);
1463out: 1493out:
1464 spin_unlock(&sessionid_lock); 1494 /* Hold a session reference until done processing the compound. */
1465 /* Renew the clientid on success and on replay */
1466 if (cstate->session) { 1495 if (cstate->session) {
1467 nfs4_lock_state(); 1496 nfsd4_get_session(cstate->session);
1468 renew_client(session->se_client); 1497 atomic_inc(&session->se_client->cl_refcount);
1469 nfs4_unlock_state();
1470 } 1498 }
1499 spin_unlock(&client_lock);
1471 dprintk("%s: return %d\n", __func__, ntohl(status)); 1500 dprintk("%s: return %d\n", __func__, ntohl(status));
1472 return status; 1501 return status;
1473} 1502}
1474 1503
1475__be32 1504__be32
1505nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
1506{
1507 if (rc->rca_one_fs) {
1508 if (!cstate->current_fh.fh_dentry)
1509 return nfserr_nofilehandle;
1510 /*
1511 * We don't take advantage of the rca_one_fs case.
1512 * That's OK, it's optional, we can safely ignore it.
1513 */
1514 return nfs_ok;
1515 }
1516 nfs4_lock_state();
1517 if (is_client_expired(cstate->session->se_client)) {
1518 nfs4_unlock_state();
1519 /*
1520 * The following error isn't really legal.
1521 * But we only get here if the client just explicitly
1522 * destroyed the client. Surely it no longer cares what
1523 * error it gets back on an operation for the dead
1524 * client.
1525 */
1526 return nfserr_stale_clientid;
1527 }
1528 nfsd4_create_clid_dir(cstate->session->se_client);
1529 nfs4_unlock_state();
1530 return nfs_ok;
1531}
1532
1533__be32
1476nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1534nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1477 struct nfsd4_setclientid *setclid) 1535 struct nfsd4_setclientid *setclid)
1478{ 1536{
@@ -1631,9 +1689,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1631 if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) 1689 if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
1632 status = nfserr_clid_inuse; 1690 status = nfserr_clid_inuse;
1633 else { 1691 else {
1634 /* XXX: We just turn off callbacks until we can handle 1692 atomic_set(&conf->cl_cb_set, 0);
1635 * change request correctly. */ 1693 nfsd4_probe_callback(conf, &unconf->cl_cb_conn);
1636 atomic_set(&conf->cl_cb_conn.cb_set, 0);
1637 expire_client(unconf); 1694 expire_client(unconf);
1638 status = nfs_ok; 1695 status = nfs_ok;
1639 1696
@@ -1667,7 +1724,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1667 } 1724 }
1668 move_to_confirmed(unconf); 1725 move_to_confirmed(unconf);
1669 conf = unconf; 1726 conf = unconf;
1670 nfsd4_probe_callback(conf); 1727 nfsd4_probe_callback(conf, &conf->cl_cb_conn);
1671 status = nfs_ok; 1728 status = nfs_ok;
1672 } 1729 }
1673 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) 1730 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
@@ -1700,12 +1757,12 @@ alloc_init_file(struct inode *ino)
1700 INIT_LIST_HEAD(&fp->fi_hash); 1757 INIT_LIST_HEAD(&fp->fi_hash);
1701 INIT_LIST_HEAD(&fp->fi_stateids); 1758 INIT_LIST_HEAD(&fp->fi_stateids);
1702 INIT_LIST_HEAD(&fp->fi_delegations); 1759 INIT_LIST_HEAD(&fp->fi_delegations);
1703 spin_lock(&recall_lock);
1704 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
1705 spin_unlock(&recall_lock);
1706 fp->fi_inode = igrab(ino); 1760 fp->fi_inode = igrab(ino);
1707 fp->fi_id = current_fileid++; 1761 fp->fi_id = current_fileid++;
1708 fp->fi_had_conflict = false; 1762 fp->fi_had_conflict = false;
1763 spin_lock(&recall_lock);
1764 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
1765 spin_unlock(&recall_lock);
1709 return fp; 1766 return fp;
1710 } 1767 }
1711 return NULL; 1768 return NULL;
@@ -1827,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
1827 stp->st_stateowner = sop; 1884 stp->st_stateowner = sop;
1828 get_nfs4_file(fp); 1885 get_nfs4_file(fp);
1829 stp->st_file = fp; 1886 stp->st_file = fp;
1830 stp->st_stateid.si_boot = get_seconds(); 1887 stp->st_stateid.si_boot = boot_time;
1831 stp->st_stateid.si_stateownerid = sop->so_id; 1888 stp->st_stateid.si_stateownerid = sop->so_id;
1832 stp->st_stateid.si_fileid = fp->fi_id; 1889 stp->st_stateid.si_fileid = fp->fi_id;
1833 stp->st_stateid.si_generation = 0; 1890 stp->st_stateid.si_generation = 0;
@@ -2028,7 +2085,6 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
2028 * lock) we know the server hasn't removed the lease yet, we know 2085 * lock) we know the server hasn't removed the lease yet, we know
2029 * it's safe to take a reference: */ 2086 * it's safe to take a reference: */
2030 atomic_inc(&dp->dl_count); 2087 atomic_inc(&dp->dl_count);
2031 atomic_inc(&dp->dl_client->cl_count);
2032 2088
2033 spin_lock(&recall_lock); 2089 spin_lock(&recall_lock);
2034 list_add_tail(&dp->dl_recall_lru, &del_recall_lru); 2090 list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
@@ -2347,7 +2403,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2347{ 2403{
2348 struct nfs4_delegation *dp; 2404 struct nfs4_delegation *dp;
2349 struct nfs4_stateowner *sop = stp->st_stateowner; 2405 struct nfs4_stateowner *sop = stp->st_stateowner;
2350 struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn; 2406 int cb_up = atomic_read(&sop->so_client->cl_cb_set);
2351 struct file_lock fl, *flp = &fl; 2407 struct file_lock fl, *flp = &fl;
2352 int status, flag = 0; 2408 int status, flag = 0;
2353 2409
@@ -2355,7 +2411,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2355 open->op_recall = 0; 2411 open->op_recall = 0;
2356 switch (open->op_claim_type) { 2412 switch (open->op_claim_type) {
2357 case NFS4_OPEN_CLAIM_PREVIOUS: 2413 case NFS4_OPEN_CLAIM_PREVIOUS:
2358 if (!atomic_read(&cb->cb_set)) 2414 if (!cb_up)
2359 open->op_recall = 1; 2415 open->op_recall = 1;
2360 flag = open->op_delegate_type; 2416 flag = open->op_delegate_type;
2361 if (flag == NFS4_OPEN_DELEGATE_NONE) 2417 if (flag == NFS4_OPEN_DELEGATE_NONE)
@@ -2366,7 +2422,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2366 * had the chance to reclaim theirs.... */ 2422 * had the chance to reclaim theirs.... */
2367 if (locks_in_grace()) 2423 if (locks_in_grace())
2368 goto out; 2424 goto out;
2369 if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) 2425 if (!cb_up || !sop->so_confirmed)
2370 goto out; 2426 goto out;
2371 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 2427 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
2372 flag = NFS4_OPEN_DELEGATE_WRITE; 2428 flag = NFS4_OPEN_DELEGATE_WRITE;
@@ -2483,10 +2539,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2483 } 2539 }
2484 memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); 2540 memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
2485 2541
2486 if (nfsd4_has_session(&resp->cstate)) { 2542 if (nfsd4_has_session(&resp->cstate))
2487 open->op_stateowner->so_confirmed = 1; 2543 open->op_stateowner->so_confirmed = 1;
2488 nfsd4_create_clid_dir(open->op_stateowner->so_client);
2489 }
2490 2544
2491 /* 2545 /*
2492 * Attempt to hand out a delegation. No error return, because the 2546 * Attempt to hand out a delegation. No error return, because the
@@ -2537,7 +2591,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2537 renew_client(clp); 2591 renew_client(clp);
2538 status = nfserr_cb_path_down; 2592 status = nfserr_cb_path_down;
2539 if (!list_empty(&clp->cl_delegations) 2593 if (!list_empty(&clp->cl_delegations)
2540 && !atomic_read(&clp->cl_cb_conn.cb_set)) 2594 && !atomic_read(&clp->cl_cb_set))
2541 goto out; 2595 goto out;
2542 status = nfs_ok; 2596 status = nfs_ok;
2543out: 2597out:
@@ -2554,6 +2608,12 @@ nfsd4_end_grace(void)
2554 dprintk("NFSD: end of grace period\n"); 2608 dprintk("NFSD: end of grace period\n");
2555 nfsd4_recdir_purge_old(); 2609 nfsd4_recdir_purge_old();
2556 locks_end_grace(&nfsd4_manager); 2610 locks_end_grace(&nfsd4_manager);
2611 /*
2612 * Now that every NFSv4 client has had the chance to recover and
2613 * to see the (possibly new, possibly shorter) lease time, we
2614 * can safely set the next grace time to the current lease time:
2615 */
2616 nfsd4_grace = nfsd4_lease;
2557} 2617}
2558 2618
2559static time_t 2619static time_t
@@ -2563,15 +2623,17 @@ nfs4_laundromat(void)
2563 struct nfs4_stateowner *sop; 2623 struct nfs4_stateowner *sop;
2564 struct nfs4_delegation *dp; 2624 struct nfs4_delegation *dp;
2565 struct list_head *pos, *next, reaplist; 2625 struct list_head *pos, *next, reaplist;
2566 time_t cutoff = get_seconds() - NFSD_LEASE_TIME; 2626 time_t cutoff = get_seconds() - nfsd4_lease;
2567 time_t t, clientid_val = NFSD_LEASE_TIME; 2627 time_t t, clientid_val = nfsd4_lease;
2568 time_t u, test_val = NFSD_LEASE_TIME; 2628 time_t u, test_val = nfsd4_lease;
2569 2629
2570 nfs4_lock_state(); 2630 nfs4_lock_state();
2571 2631
2572 dprintk("NFSD: laundromat service - starting\n"); 2632 dprintk("NFSD: laundromat service - starting\n");
2573 if (locks_in_grace()) 2633 if (locks_in_grace())
2574 nfsd4_end_grace(); 2634 nfsd4_end_grace();
2635 INIT_LIST_HEAD(&reaplist);
2636 spin_lock(&client_lock);
2575 list_for_each_safe(pos, next, &client_lru) { 2637 list_for_each_safe(pos, next, &client_lru) {
2576 clp = list_entry(pos, struct nfs4_client, cl_lru); 2638 clp = list_entry(pos, struct nfs4_client, cl_lru);
2577 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { 2639 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -2580,12 +2642,22 @@ nfs4_laundromat(void)
2580 clientid_val = t; 2642 clientid_val = t;
2581 break; 2643 break;
2582 } 2644 }
2645 if (atomic_read(&clp->cl_refcount)) {
2646 dprintk("NFSD: client in use (clientid %08x)\n",
2647 clp->cl_clientid.cl_id);
2648 continue;
2649 }
2650 unhash_client_locked(clp);
2651 list_add(&clp->cl_lru, &reaplist);
2652 }
2653 spin_unlock(&client_lock);
2654 list_for_each_safe(pos, next, &reaplist) {
2655 clp = list_entry(pos, struct nfs4_client, cl_lru);
2583 dprintk("NFSD: purging unused client (clientid %08x)\n", 2656 dprintk("NFSD: purging unused client (clientid %08x)\n",
2584 clp->cl_clientid.cl_id); 2657 clp->cl_clientid.cl_id);
2585 nfsd4_remove_clid_dir(clp); 2658 nfsd4_remove_clid_dir(clp);
2586 expire_client(clp); 2659 expire_client(clp);
2587 } 2660 }
2588 INIT_LIST_HEAD(&reaplist);
2589 spin_lock(&recall_lock); 2661 spin_lock(&recall_lock);
2590 list_for_each_safe(pos, next, &del_recall_lru) { 2662 list_for_each_safe(pos, next, &del_recall_lru) {
2591 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 2663 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
@@ -2605,7 +2677,7 @@ nfs4_laundromat(void)
2605 list_del_init(&dp->dl_recall_lru); 2677 list_del_init(&dp->dl_recall_lru);
2606 unhash_delegation(dp); 2678 unhash_delegation(dp);
2607 } 2679 }
2608 test_val = NFSD_LEASE_TIME; 2680 test_val = nfsd4_lease;
2609 list_for_each_safe(pos, next, &close_lru) { 2681 list_for_each_safe(pos, next, &close_lru) {
2610 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); 2682 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
2611 if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { 2683 if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
@@ -2661,39 +2733,11 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
2661static int 2733static int
2662STALE_STATEID(stateid_t *stateid) 2734STALE_STATEID(stateid_t *stateid)
2663{ 2735{
2664 if (time_after((unsigned long)boot_time, 2736 if (stateid->si_boot == boot_time)
2665 (unsigned long)stateid->si_boot)) { 2737 return 0;
2666 dprintk("NFSD: stale stateid " STATEID_FMT "!\n", 2738 dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
2667 STATEID_VAL(stateid));
2668 return 1;
2669 }
2670 return 0;
2671}
2672
2673static int
2674EXPIRED_STATEID(stateid_t *stateid)
2675{
2676 if (time_before((unsigned long)boot_time,
2677 ((unsigned long)stateid->si_boot)) &&
2678 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
2679 dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
2680 STATEID_VAL(stateid));
2681 return 1;
2682 }
2683 return 0;
2684}
2685
2686static __be32
2687stateid_error_map(stateid_t *stateid)
2688{
2689 if (STALE_STATEID(stateid))
2690 return nfserr_stale_stateid;
2691 if (EXPIRED_STATEID(stateid))
2692 return nfserr_expired;
2693
2694 dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
2695 STATEID_VAL(stateid)); 2739 STATEID_VAL(stateid));
2696 return nfserr_bad_stateid; 2740 return 1;
2697} 2741}
2698 2742
2699static inline int 2743static inline int
@@ -2817,10 +2861,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2817 status = nfserr_bad_stateid; 2861 status = nfserr_bad_stateid;
2818 if (is_delegation_stateid(stateid)) { 2862 if (is_delegation_stateid(stateid)) {
2819 dp = find_delegation_stateid(ino, stateid); 2863 dp = find_delegation_stateid(ino, stateid);
2820 if (!dp) { 2864 if (!dp)
2821 status = stateid_error_map(stateid);
2822 goto out; 2865 goto out;
2823 }
2824 status = check_stateid_generation(stateid, &dp->dl_stateid, 2866 status = check_stateid_generation(stateid, &dp->dl_stateid,
2825 flags); 2867 flags);
2826 if (status) 2868 if (status)
@@ -2833,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2833 *filpp = dp->dl_vfs_file; 2875 *filpp = dp->dl_vfs_file;
2834 } else { /* open or lock stateid */ 2876 } else { /* open or lock stateid */
2835 stp = find_stateid(stateid, flags); 2877 stp = find_stateid(stateid, flags);
2836 if (!stp) { 2878 if (!stp)
2837 status = stateid_error_map(stateid);
2838 goto out; 2879 goto out;
2839 }
2840 if (nfs4_check_fh(current_fh, stp)) 2880 if (nfs4_check_fh(current_fh, stp))
2841 goto out; 2881 goto out;
2842 if (!stp->st_stateowner->so_confirmed) 2882 if (!stp->st_stateowner->so_confirmed)
@@ -2908,7 +2948,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2908 */ 2948 */
2909 sop = search_close_lru(stateid->si_stateownerid, flags); 2949 sop = search_close_lru(stateid->si_stateownerid, flags);
2910 if (sop == NULL) 2950 if (sop == NULL)
2911 return stateid_error_map(stateid); 2951 return nfserr_bad_stateid;
2912 *sopp = sop; 2952 *sopp = sop;
2913 goto check_replay; 2953 goto check_replay;
2914 } 2954 }
@@ -3175,10 +3215,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3175 if (!is_delegation_stateid(stateid)) 3215 if (!is_delegation_stateid(stateid))
3176 goto out; 3216 goto out;
3177 dp = find_delegation_stateid(inode, stateid); 3217 dp = find_delegation_stateid(inode, stateid);
3178 if (!dp) { 3218 if (!dp)
3179 status = stateid_error_map(stateid);
3180 goto out; 3219 goto out;
3181 }
3182 status = check_stateid_generation(stateid, &dp->dl_stateid, flags); 3220 status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
3183 if (status) 3221 if (status)
3184 goto out; 3222 goto out;
@@ -3404,7 +3442,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
3404 stp->st_stateowner = sop; 3442 stp->st_stateowner = sop;
3405 get_nfs4_file(fp); 3443 get_nfs4_file(fp);
3406 stp->st_file = fp; 3444 stp->st_file = fp;
3407 stp->st_stateid.si_boot = get_seconds(); 3445 stp->st_stateid.si_boot = boot_time;
3408 stp->st_stateid.si_stateownerid = sop->so_id; 3446 stp->st_stateid.si_stateownerid = sop->so_id;
3409 stp->st_stateid.si_fileid = fp->fi_id; 3447 stp->st_stateid.si_fileid = fp->fi_id;
3410 stp->st_stateid.si_generation = 0; 3448 stp->st_stateid.si_generation = 0;
@@ -3976,12 +4014,6 @@ nfsd4_load_reboot_recovery_data(void)
3976 printk("NFSD: Failure reading reboot recovery data\n"); 4014 printk("NFSD: Failure reading reboot recovery data\n");
3977} 4015}
3978 4016
3979unsigned long
3980get_nfs4_grace_period(void)
3981{
3982 return max(user_lease_time, lease_time) * HZ;
3983}
3984
3985/* 4017/*
3986 * Since the lifetime of a delegation isn't limited to that of an open, a 4018 * Since the lifetime of a delegation isn't limited to that of an open, a
3987 * client may quite reasonably hang on to a delegation as long as it has 4019 * client may quite reasonably hang on to a delegation as long as it has
@@ -4008,20 +4040,27 @@ set_max_delegations(void)
4008static int 4040static int
4009__nfs4_state_start(void) 4041__nfs4_state_start(void)
4010{ 4042{
4011 unsigned long grace_time; 4043 int ret;
4012 4044
4013 boot_time = get_seconds(); 4045 boot_time = get_seconds();
4014 grace_time = get_nfs4_grace_period();
4015 lease_time = user_lease_time;
4016 locks_start_grace(&nfsd4_manager); 4046 locks_start_grace(&nfsd4_manager);
4017 printk(KERN_INFO "NFSD: starting %ld-second grace period\n", 4047 printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
4018 grace_time/HZ); 4048 nfsd4_grace);
4049 ret = set_callback_cred();
4050 if (ret)
4051 return -ENOMEM;
4019 laundry_wq = create_singlethread_workqueue("nfsd4"); 4052 laundry_wq = create_singlethread_workqueue("nfsd4");
4020 if (laundry_wq == NULL) 4053 if (laundry_wq == NULL)
4021 return -ENOMEM; 4054 return -ENOMEM;
4022 queue_delayed_work(laundry_wq, &laundromat_work, grace_time); 4055 ret = nfsd4_create_callback_queue();
4056 if (ret)
4057 goto out_free_laundry;
4058 queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
4023 set_max_delegations(); 4059 set_max_delegations();
4024 return set_callback_cred(); 4060 return 0;
4061out_free_laundry:
4062 destroy_workqueue(laundry_wq);
4063 return ret;
4025} 4064}
4026 4065
4027int 4066int
@@ -4039,12 +4078,6 @@ nfs4_state_start(void)
4039 return 0; 4078 return 0;
4040} 4079}
4041 4080
4042time_t
4043nfs4_lease_time(void)
4044{
4045 return lease_time;
4046}
4047
4048static void 4081static void
4049__nfs4_state_shutdown(void) 4082__nfs4_state_shutdown(void)
4050{ 4083{
@@ -4089,6 +4122,7 @@ nfs4_state_shutdown(void)
4089 nfs4_lock_state(); 4122 nfs4_lock_state();
4090 nfs4_release_reclaim(); 4123 nfs4_release_reclaim();
4091 __nfs4_state_shutdown(); 4124 __nfs4_state_shutdown();
4125 nfsd4_destroy_callback_queue();
4092 nfs4_unlock_state(); 4126 nfs4_unlock_state();
4093} 4127}
4094 4128
@@ -4128,21 +4162,3 @@ nfs4_recoverydir(void)
4128{ 4162{
4129 return user_recovery_dirname; 4163 return user_recovery_dirname;
4130} 4164}
4131
4132/*
4133 * Called when leasetime is changed.
4134 *
4135 * The only way the protocol gives us to handle on-the-fly lease changes is to
4136 * simulate a reboot. Instead of doing that, we just wait till the next time
4137 * we start to register any changes in lease time. If the administrator
4138 * really wants to change the lease time *now*, they can go ahead and bring
4139 * nfsd down and then back up again after changing the lease time.
4140 *
4141 * user_lease_time is protected by nfsd_mutex since it's only really accessed
4142 * when nfsd is starting
4143 */
4144void
4145nfs4_reset_lease(time_t leasetime)
4146{
4147 user_lease_time = leasetime;
4148}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 34ccf815ea8a..ac17a7080239 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1234,6 +1234,16 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
1234 DECODE_TAIL; 1234 DECODE_TAIL;
1235} 1235}
1236 1236
1237static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
1238{
1239 DECODE_HEAD;
1240
1241 READ_BUF(4);
1242 READ32(rc->rca_one_fs);
1243
1244 DECODE_TAIL;
1245}
1246
1237static __be32 1247static __be32
1238nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) 1248nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
1239{ 1249{
@@ -1346,7 +1356,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
1346 [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, 1356 [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp,
1347 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, 1357 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1348 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, 1358 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp,
1349 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_notsupp, 1359 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
1350}; 1360};
1351 1361
1352struct nfsd4_minorversion_ops { 1362struct nfsd4_minorversion_ops {
@@ -1900,7 +1910,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1900 if (bmval0 & FATTR4_WORD0_LEASE_TIME) { 1910 if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
1901 if ((buflen -= 4) < 0) 1911 if ((buflen -= 4) < 0)
1902 goto out_resource; 1912 goto out_resource;
1903 WRITE32(NFSD_LEASE_TIME); 1913 WRITE32(nfsd4_lease);
1904 } 1914 }
1905 if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { 1915 if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
1906 if ((buflen -= 4) < 0) 1916 if ((buflen -= 4) < 0)
@@ -3307,11 +3317,14 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3307 iov = &rqstp->rq_res.head[0]; 3317 iov = &rqstp->rq_res.head[0];
3308 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; 3318 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
3309 BUG_ON(iov->iov_len > PAGE_SIZE); 3319 BUG_ON(iov->iov_len > PAGE_SIZE);
3310 if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) { 3320 if (nfsd4_has_session(cs)) {
3311 nfsd4_store_cache_entry(resp); 3321 if (cs->status != nfserr_replay_cache) {
3312 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); 3322 nfsd4_store_cache_entry(resp);
3313 resp->cstate.slot->sl_inuse = false; 3323 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
3314 nfsd4_put_session(resp->cstate.session); 3324 cs->slot->sl_inuse = false;
3325 }
3326 /* Renew the clientid on success and on replay */
3327 release_session_client(cs->session);
3315 } 3328 }
3316 return 1; 3329 return 1;
3317} 3330}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e3591073098f..bc3194ea01f5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -46,6 +46,7 @@ enum {
46 */ 46 */
47#ifdef CONFIG_NFSD_V4 47#ifdef CONFIG_NFSD_V4
48 NFSD_Leasetime, 48 NFSD_Leasetime,
49 NFSD_Gracetime,
49 NFSD_RecoveryDir, 50 NFSD_RecoveryDir,
50#endif 51#endif
51}; 52};
@@ -70,6 +71,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size);
70static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); 71static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
71#ifdef CONFIG_NFSD_V4 72#ifdef CONFIG_NFSD_V4
72static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 73static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
74static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
73static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); 75static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
74#endif 76#endif
75 77
@@ -91,6 +93,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
91 [NFSD_MaxBlkSize] = write_maxblksize, 93 [NFSD_MaxBlkSize] = write_maxblksize,
92#ifdef CONFIG_NFSD_V4 94#ifdef CONFIG_NFSD_V4
93 [NFSD_Leasetime] = write_leasetime, 95 [NFSD_Leasetime] = write_leasetime,
96 [NFSD_Gracetime] = write_gracetime,
94 [NFSD_RecoveryDir] = write_recoverydir, 97 [NFSD_RecoveryDir] = write_recoverydir,
95#endif 98#endif
96}; 99};
@@ -1204,29 +1207,45 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
1204} 1207}
1205 1208
1206#ifdef CONFIG_NFSD_V4 1209#ifdef CONFIG_NFSD_V4
1207extern time_t nfs4_leasetime(void); 1210static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
1208
1209static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
1210{ 1211{
1211 /* if size > 10 seconds, call
1212 * nfs4_reset_lease() then write out the new lease (seconds) as reply
1213 */
1214 char *mesg = buf; 1212 char *mesg = buf;
1215 int rv, lease; 1213 int rv, i;
1216 1214
1217 if (size > 0) { 1215 if (size > 0) {
1218 if (nfsd_serv) 1216 if (nfsd_serv)
1219 return -EBUSY; 1217 return -EBUSY;
1220 rv = get_int(&mesg, &lease); 1218 rv = get_int(&mesg, &i);
1221 if (rv) 1219 if (rv)
1222 return rv; 1220 return rv;
1223 if (lease < 10 || lease > 3600) 1221 /*
1222 * Some sanity checking. We don't have a reason for
1223 * these particular numbers, but problems with the
1224 * extremes are:
1225 * - Too short: the briefest network outage may
1226 * cause clients to lose all their locks. Also,
1227 * the frequent polling may be wasteful.
1228 * - Too long: do you really want reboot recovery
1229 * to take more than an hour? Or to make other
1230 * clients wait an hour before being able to
1231 * revoke a dead client's locks?
1232 */
1233 if (i < 10 || i > 3600)
1224 return -EINVAL; 1234 return -EINVAL;
1225 nfs4_reset_lease(lease); 1235 *time = i;
1226 } 1236 }
1227 1237
1228 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", 1238 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
1229 nfs4_lease_time()); 1239}
1240
1241static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
1242{
1243 ssize_t rv;
1244
1245 mutex_lock(&nfsd_mutex);
1246 rv = __nfsd4_write_time(file, buf, size, time);
1247 mutex_unlock(&nfsd_mutex);
1248 return rv;
1230} 1249}
1231 1250
1232/** 1251/**
@@ -1252,12 +1271,22 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
1252 */ 1271 */
1253static ssize_t write_leasetime(struct file *file, char *buf, size_t size) 1272static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
1254{ 1273{
1255 ssize_t rv; 1274 return nfsd4_write_time(file, buf, size, &nfsd4_lease);
1275}
1256 1276
1257 mutex_lock(&nfsd_mutex); 1277/**
1258 rv = __write_leasetime(file, buf, size); 1278 * write_gracetime - Set or report current NFSv4 grace period time
1259 mutex_unlock(&nfsd_mutex); 1279 *
1260 return rv; 1280 * As above, but sets the time of the NFSv4 grace period.
1281 *
1282 * Note this should never be set to less than the *previous*
1283 * lease-period time, but we don't try to enforce this. (In the common
1284 * case (a new boot), we don't know what the previous lease time was
1285 * anyway.)
1286 */
1287static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
1288{
1289 return nfsd4_write_time(file, buf, size, &nfsd4_grace);
1261} 1290}
1262 1291
1263extern char *nfs4_recoverydir(void); 1292extern char *nfs4_recoverydir(void);
@@ -1351,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1351 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, 1380 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
1352#ifdef CONFIG_NFSD_V4 1381#ifdef CONFIG_NFSD_V4
1353 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1382 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
1383 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
1354 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, 1384 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
1355#endif 1385#endif
1356 /* last one */ {""} 1386 /* last one */ {""}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index e942a1aaac92..72377761270e 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -82,7 +82,6 @@ int nfs4_state_init(void);
82void nfsd4_free_slabs(void); 82void nfsd4_free_slabs(void);
83int nfs4_state_start(void); 83int nfs4_state_start(void);
84void nfs4_state_shutdown(void); 84void nfs4_state_shutdown(void);
85time_t nfs4_lease_time(void);
86void nfs4_reset_lease(time_t leasetime); 85void nfs4_reset_lease(time_t leasetime);
87int nfs4_reset_recoverydir(char *recdir); 86int nfs4_reset_recoverydir(char *recdir);
88#else 87#else
@@ -90,7 +89,6 @@ static inline int nfs4_state_init(void) { return 0; }
90static inline void nfsd4_free_slabs(void) { } 89static inline void nfsd4_free_slabs(void) { }
91static inline int nfs4_state_start(void) { return 0; } 90static inline int nfs4_state_start(void) { return 0; }
92static inline void nfs4_state_shutdown(void) { } 91static inline void nfs4_state_shutdown(void) { }
93static inline time_t nfs4_lease_time(void) { return 0; }
94static inline void nfs4_reset_lease(time_t leasetime) { } 92static inline void nfs4_reset_lease(time_t leasetime) { }
95static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } 93static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
96#endif 94#endif
@@ -229,6 +227,9 @@ extern struct timeval nfssvc_boot;
229 227
230#ifdef CONFIG_NFSD_V4 228#ifdef CONFIG_NFSD_V4
231 229
230extern time_t nfsd4_lease;
231extern time_t nfsd4_grace;
232
232/* before processing a COMPOUND operation, we have to check that there 233/* before processing a COMPOUND operation, we have to check that there
233 * is enough space in the buffer for XDR encode to succeed. otherwise, 234 * is enough space in the buffer for XDR encode to succeed. otherwise,
234 * we might process an operation with side effects, and be unable to 235 * we might process an operation with side effects, and be unable to
@@ -247,7 +248,6 @@ extern struct timeval nfssvc_boot;
247#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ 248#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
248#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ 249#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
249 250
250#define NFSD_LEASE_TIME (nfs4_lease_time())
251#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ 251#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */
252 252
253/* 253/*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 171699eb07c8..06b2a26edfe0 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -120,7 +120,7 @@ u32 nfsd_supported_minorversion;
120int nfsd_vers(int vers, enum vers_op change) 120int nfsd_vers(int vers, enum vers_op change)
121{ 121{
122 if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) 122 if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
123 return -1; 123 return 0;
124 switch(change) { 124 switch(change) {
125 case NFSD_SET: 125 case NFSD_SET:
126 nfsd_versions[vers] = nfsd_version[vers]; 126 nfsd_versions[vers] = nfsd_version[vers];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index fefeae27f25e..006c84230c7c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
70 struct nfs4_client *cbs_clp; 70 struct nfs4_client *cbs_clp;
71}; 71};
72 72
73struct nfs4_rpc_args {
74 void *args_op;
75 struct nfsd4_cb_sequence args_seq;
76};
77
78struct nfsd4_callback {
79 struct nfs4_rpc_args cb_args;
80 struct work_struct cb_work;
81};
82
73struct nfs4_delegation { 83struct nfs4_delegation {
74 struct list_head dl_perfile; 84 struct list_head dl_perfile;
75 struct list_head dl_perclnt; 85 struct list_head dl_perclnt;
@@ -86,6 +96,7 @@ struct nfs4_delegation {
86 stateid_t dl_stateid; 96 stateid_t dl_stateid;
87 struct knfsd_fh dl_fh; 97 struct knfsd_fh dl_fh;
88 int dl_retries; 98 int dl_retries;
99 struct nfsd4_callback dl_recall;
89}; 100};
90 101
91/* client delegation callback info */ 102/* client delegation callback info */
@@ -96,9 +107,7 @@ struct nfs4_cb_conn {
96 u32 cb_prog; 107 u32 cb_prog;
97 u32 cb_minorversion; 108 u32 cb_minorversion;
98 u32 cb_ident; /* minorversion 0 only */ 109 u32 cb_ident; /* minorversion 0 only */
99 /* RPC client info */ 110 struct svc_xprt *cb_xprt; /* minorversion 1 only */
100 atomic_t cb_set; /* successful CB_NULL call */
101 struct rpc_clnt * cb_client;
102}; 111};
103 112
104/* Maximum number of slots per session. 160 is useful for long haul TCP */ 113/* Maximum number of slots per session. 160 is useful for long haul TCP */
@@ -157,7 +166,7 @@ struct nfsd4_session {
157 struct list_head se_hash; /* hash by sessionid */ 166 struct list_head se_hash; /* hash by sessionid */
158 struct list_head se_perclnt; 167 struct list_head se_perclnt;
159 u32 se_flags; 168 u32 se_flags;
160 struct nfs4_client *se_client; /* for expire_client */ 169 struct nfs4_client *se_client;
161 struct nfs4_sessionid se_sessionid; 170 struct nfs4_sessionid se_sessionid;
162 struct nfsd4_channel_attrs se_fchannel; 171 struct nfsd4_channel_attrs se_fchannel;
163 struct nfsd4_channel_attrs se_bchannel; 172 struct nfsd4_channel_attrs se_bchannel;
@@ -212,25 +221,41 @@ struct nfs4_client {
212 struct svc_cred cl_cred; /* setclientid principal */ 221 struct svc_cred cl_cred; /* setclientid principal */
213 clientid_t cl_clientid; /* generated by server */ 222 clientid_t cl_clientid; /* generated by server */
214 nfs4_verifier cl_confirm; /* generated by server */ 223 nfs4_verifier cl_confirm; /* generated by server */
215 struct nfs4_cb_conn cl_cb_conn; /* callback info */
216 atomic_t cl_count; /* ref count */
217 u32 cl_firststate; /* recovery dir creation */ 224 u32 cl_firststate; /* recovery dir creation */
218 225
226 /* for v4.0 and v4.1 callbacks: */
227 struct nfs4_cb_conn cl_cb_conn;
228 struct rpc_clnt *cl_cb_client;
229 atomic_t cl_cb_set;
230
219 /* for nfs41 */ 231 /* for nfs41 */
220 struct list_head cl_sessions; 232 struct list_head cl_sessions;
221 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ 233 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
222 u32 cl_exchange_flags; 234 u32 cl_exchange_flags;
223 struct nfs4_sessionid cl_sessionid; 235 struct nfs4_sessionid cl_sessionid;
236 /* number of rpc's in progress over an associated session: */
237 atomic_t cl_refcount;
224 238
225 /* for nfs41 callbacks */ 239 /* for nfs41 callbacks */
226 /* We currently support a single back channel with a single slot */ 240 /* We currently support a single back channel with a single slot */
227 unsigned long cl_cb_slot_busy; 241 unsigned long cl_cb_slot_busy;
228 u32 cl_cb_seq_nr; 242 u32 cl_cb_seq_nr;
229 struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
230 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ 243 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
231 /* wait here for slots */ 244 /* wait here for slots */
232}; 245};
233 246
247static inline void
248mark_client_expired(struct nfs4_client *clp)
249{
250 clp->cl_time = 0;
251}
252
253static inline bool
254is_client_expired(struct nfs4_client *clp)
255{
256 return clp->cl_time == 0;
257}
258
234/* struct nfs4_client_reset 259/* struct nfs4_client_reset
235 * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl 260 * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
236 * upon lease reset, or from upcall to state_daemon (to read in state 261 * upon lease reset, or from upcall to state_daemon (to read in state
@@ -377,11 +402,14 @@ extern void nfs4_lock_state(void);
377extern void nfs4_unlock_state(void); 402extern void nfs4_unlock_state(void);
378extern int nfs4_in_grace(void); 403extern int nfs4_in_grace(void);
379extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 404extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
380extern void put_nfs4_client(struct nfs4_client *clp);
381extern void nfs4_free_stateowner(struct kref *kref); 405extern void nfs4_free_stateowner(struct kref *kref);
382extern int set_callback_cred(void); 406extern int set_callback_cred(void);
383extern void nfsd4_probe_callback(struct nfs4_client *clp); 407extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
408extern void nfsd4_do_callback_rpc(struct work_struct *);
384extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 409extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
410extern int nfsd4_create_callback_queue(void);
411extern void nfsd4_destroy_callback_queue(void);
412extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *);
385extern void nfs4_put_delegation(struct nfs4_delegation *dp); 413extern void nfs4_put_delegation(struct nfs4_delegation *dp);
386extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); 414extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
387extern void nfsd4_init_recdir(char *recdir_name); 415extern void nfsd4_init_recdir(char *recdir_name);
@@ -392,6 +420,7 @@ extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
392extern void nfsd4_recdir_purge_old(void); 420extern void nfsd4_recdir_purge_old(void);
393extern int nfsd4_create_clid_dir(struct nfs4_client *clp); 421extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
394extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); 422extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
423extern void release_session_client(struct nfsd4_session *);
395 424
396static inline void 425static inline void
397nfs4_put_stateowner(struct nfs4_stateowner *so) 426nfs4_put_stateowner(struct nfs4_stateowner *so)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6dd5f1970e01..ebbf3b6b2457 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -724,7 +724,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
724 struct inode *inode; 724 struct inode *inode;
725 int flags = O_RDONLY|O_LARGEFILE; 725 int flags = O_RDONLY|O_LARGEFILE;
726 __be32 err; 726 __be32 err;
727 int host_err; 727 int host_err = 0;
728 728
729 validate_process_creds(); 729 validate_process_creds();
730 730
@@ -761,7 +761,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
761 * Check to see if there are any leases on this file. 761 * Check to see if there are any leases on this file.
762 * This may block while leases are broken. 762 * This may block while leases are broken.
763 */ 763 */
764 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); 764 if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
765 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
765 if (host_err == -EWOULDBLOCK) 766 if (host_err == -EWOULDBLOCK)
766 host_err = -ETIMEDOUT; 767 host_err = -ETIMEDOUT;
767 if (host_err) /* NOMEM or WOULDBLOCK */ 768 if (host_err) /* NOMEM or WOULDBLOCK */
@@ -998,7 +999,7 @@ static int wait_for_concurrent_writes(struct file *file)
998 999
999 if (inode->i_state & I_DIRTY) { 1000 if (inode->i_state & I_DIRTY) {
1000 dprintk("nfsd: write sync %d\n", task_pid_nr(current)); 1001 dprintk("nfsd: write sync %d\n", task_pid_nr(current));
1001 err = vfs_fsync(file, file->f_path.dentry, 0); 1002 err = vfs_fsync(file, 0);
1002 } 1003 }
1003 last_ino = inode->i_ino; 1004 last_ino = inode->i_ino;
1004 last_dev = inode->i_sb->s_dev; 1005 last_dev = inode->i_sb->s_dev;
@@ -1169,12 +1170,12 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1169 goto out; 1170 goto out;
1170 } 1171 }
1171 1172
1172 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); 1173 err = nfsd_open(rqstp, fhp, S_IFREG,
1174 NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
1173 if (err) 1175 if (err)
1174 goto out; 1176 goto out;
1175 if (EX_ISSYNC(fhp->fh_export)) { 1177 if (EX_ISSYNC(fhp->fh_export)) {
1176 int err2 = vfs_fsync_range(file, file->f_path.dentry, 1178 int err2 = vfs_fsync_range(file, offset, end, 0);
1177 offset, end, 0);
1178 1179
1179 if (err2 != -EINVAL) 1180 if (err2 != -EINVAL)
1180 err = nfserrno(err2); 1181 err = nfserrno(err2);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 4b1de0a9ea75..217a62c2a357 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -20,6 +20,7 @@
20#define NFSD_MAY_OWNER_OVERRIDE 64 20#define NFSD_MAY_OWNER_OVERRIDE 64
21#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ 21#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
22#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 22#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
23#define NFSD_MAY_NOT_BREAK_LEASE 512
23 24
24#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) 25#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
25#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) 26#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index efa337739534..4d476ff08ae6 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
381 struct nfs4_sessionid sessionid; 381 struct nfs4_sessionid sessionid;
382}; 382};
383 383
384struct nfsd4_reclaim_complete {
385 u32 rca_one_fs;
386};
387
384struct nfsd4_op { 388struct nfsd4_op {
385 int opnum; 389 int opnum;
386 __be32 status; 390 __be32 status;
@@ -421,6 +425,7 @@ struct nfsd4_op {
421 struct nfsd4_create_session create_session; 425 struct nfsd4_create_session create_session;
422 struct nfsd4_destroy_session destroy_session; 426 struct nfsd4_destroy_session destroy_session;
423 struct nfsd4_sequence sequence; 427 struct nfsd4_sequence sequence;
428 struct nfsd4_reclaim_complete reclaim_complete;
424 } u; 429 } u;
425 struct nfs4_replay * replay; 430 struct nfs4_replay * replay;
426}; 431};
@@ -513,9 +518,8 @@ extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
513extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, 518extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
514 struct nfsd4_sequence *seq); 519 struct nfsd4_sequence *seq);
515extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, 520extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
516 struct nfsd4_compound_state *, 521 struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
517struct nfsd4_exchange_id *); 522extern __be32 nfsd4_create_session(struct svc_rqst *,
518 extern __be32 nfsd4_create_session(struct svc_rqst *,
519 struct nfsd4_compound_state *, 523 struct nfsd4_compound_state *,
520 struct nfsd4_create_session *); 524 struct nfsd4_create_session *);
521extern __be32 nfsd4_sequence(struct svc_rqst *, 525extern __be32 nfsd4_sequence(struct svc_rqst *,
@@ -524,6 +528,7 @@ extern __be32 nfsd4_sequence(struct svc_rqst *,
524extern __be32 nfsd4_destroy_session(struct svc_rqst *, 528extern __be32 nfsd4_destroy_session(struct svc_rqst *,
525 struct nfsd4_compound_state *, 529 struct nfsd4_compound_state *,
526 struct nfsd4_destroy_session *); 530 struct nfsd4_destroy_session *);
531__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
527extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, 532extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
528 struct nfsd4_open *open); 533 struct nfsd4_open *open);
529extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, 534extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 7cfb87e692da..d7fd696e595c 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -31,6 +31,11 @@
31#include "alloc.h" 31#include "alloc.h"
32 32
33 33
34/**
35 * nilfs_palloc_groups_per_desc_block - get the number of groups that a group
36 * descriptor block can maintain
37 * @inode: inode of metadata file using this allocator
38 */
34static inline unsigned long 39static inline unsigned long
35nilfs_palloc_groups_per_desc_block(const struct inode *inode) 40nilfs_palloc_groups_per_desc_block(const struct inode *inode)
36{ 41{
@@ -38,12 +43,21 @@ nilfs_palloc_groups_per_desc_block(const struct inode *inode)
38 sizeof(struct nilfs_palloc_group_desc); 43 sizeof(struct nilfs_palloc_group_desc);
39} 44}
40 45
46/**
47 * nilfs_palloc_groups_count - get maximum number of groups
48 * @inode: inode of metadata file using this allocator
49 */
41static inline unsigned long 50static inline unsigned long
42nilfs_palloc_groups_count(const struct inode *inode) 51nilfs_palloc_groups_count(const struct inode *inode)
43{ 52{
44 return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */)); 53 return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */));
45} 54}
46 55
56/**
57 * nilfs_palloc_init_blockgroup - initialize private variables for allocator
58 * @inode: inode of metadata file using this allocator
59 * @entry_size: size of the persistent object
60 */
47int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) 61int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
48{ 62{
49 struct nilfs_mdt_info *mi = NILFS_MDT(inode); 63 struct nilfs_mdt_info *mi = NILFS_MDT(inode);
@@ -69,6 +83,12 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
69 return 0; 83 return 0;
70} 84}
71 85
86/**
87 * nilfs_palloc_group - get group number and offset from an entry number
88 * @inode: inode of metadata file using this allocator
89 * @nr: serial number of the entry (e.g. inode number)
90 * @offset: pointer to store offset number in the group
91 */
72static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, 92static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
73 unsigned long *offset) 93 unsigned long *offset)
74{ 94{
@@ -78,6 +98,14 @@ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
78 return group; 98 return group;
79} 99}
80 100
101/**
102 * nilfs_palloc_desc_blkoff - get block offset of a group descriptor block
103 * @inode: inode of metadata file using this allocator
104 * @group: group number
105 *
106 * nilfs_palloc_desc_blkoff() returns block offset of the descriptor
107 * block which contains a descriptor of the specified group.
108 */
81static unsigned long 109static unsigned long
82nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) 110nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
83{ 111{
@@ -86,6 +114,14 @@ nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
86 return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block; 114 return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block;
87} 115}
88 116
117/**
118 * nilfs_palloc_bitmap_blkoff - get block offset of a bitmap block
119 * @inode: inode of metadata file using this allocator
120 * @group: group number
121 *
122 * nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap
123 * block used to allocate/deallocate entries in the specified group.
124 */
89static unsigned long 125static unsigned long
90nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) 126nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
91{ 127{
@@ -95,6 +131,12 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
95 desc_offset * NILFS_MDT(inode)->mi_blocks_per_group; 131 desc_offset * NILFS_MDT(inode)->mi_blocks_per_group;
96} 132}
97 133
134/**
135 * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group
136 * @inode: inode of metadata file using this allocator
137 * @group: group number
138 * @desc: pointer to descriptor structure for the group
139 */
98static unsigned long 140static unsigned long
99nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, 141nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
100 const struct nilfs_palloc_group_desc *desc) 142 const struct nilfs_palloc_group_desc *desc)
@@ -107,6 +149,13 @@ nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
107 return nfree; 149 return nfree;
108} 150}
109 151
152/**
153 * nilfs_palloc_group_desc_add_entries - adjust count of free entries
154 * @inode: inode of metadata file using this allocator
155 * @group: group number
156 * @desc: pointer to descriptor structure for the group
157 * @n: delta to be added
158 */
110static void 159static void
111nilfs_palloc_group_desc_add_entries(struct inode *inode, 160nilfs_palloc_group_desc_add_entries(struct inode *inode,
112 unsigned long group, 161 unsigned long group,
@@ -118,6 +167,11 @@ nilfs_palloc_group_desc_add_entries(struct inode *inode,
118 spin_unlock(nilfs_mdt_bgl_lock(inode, group)); 167 spin_unlock(nilfs_mdt_bgl_lock(inode, group));
119} 168}
120 169
170/**
171 * nilfs_palloc_entry_blkoff - get block offset of an entry block
172 * @inode: inode of metadata file using this allocator
173 * @nr: serial number of the entry (e.g. inode number)
174 */
121static unsigned long 175static unsigned long
122nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) 176nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
123{ 177{
@@ -129,6 +183,12 @@ nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
129 group_offset / NILFS_MDT(inode)->mi_entries_per_block; 183 group_offset / NILFS_MDT(inode)->mi_entries_per_block;
130} 184}
131 185
186/**
187 * nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block
188 * @inode: inode of metadata file
189 * @bh: buffer head of the buffer to be initialized
190 * @kaddr: kernel address mapped for the page including the buffer
191 */
132static void nilfs_palloc_desc_block_init(struct inode *inode, 192static void nilfs_palloc_desc_block_init(struct inode *inode,
133 struct buffer_head *bh, void *kaddr) 193 struct buffer_head *bh, void *kaddr)
134{ 194{
@@ -179,6 +239,13 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
179 return ret; 239 return ret;
180} 240}
181 241
242/**
243 * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block
244 * @inode: inode of metadata file using this allocator
245 * @group: group number
246 * @create: create flag
247 * @bhp: pointer to store the resultant buffer head
248 */
182static int nilfs_palloc_get_desc_block(struct inode *inode, 249static int nilfs_palloc_get_desc_block(struct inode *inode,
183 unsigned long group, 250 unsigned long group,
184 int create, struct buffer_head **bhp) 251 int create, struct buffer_head **bhp)
@@ -191,6 +258,13 @@ static int nilfs_palloc_get_desc_block(struct inode *inode,
191 bhp, &cache->prev_desc, &cache->lock); 258 bhp, &cache->prev_desc, &cache->lock);
192} 259}
193 260
261/**
262 * nilfs_palloc_get_bitmap_block - get buffer head of a bitmap block
263 * @inode: inode of metadata file using this allocator
264 * @group: group number
265 * @create: create flag
266 * @bhp: pointer to store the resultant buffer head
267 */
194static int nilfs_palloc_get_bitmap_block(struct inode *inode, 268static int nilfs_palloc_get_bitmap_block(struct inode *inode,
195 unsigned long group, 269 unsigned long group,
196 int create, struct buffer_head **bhp) 270 int create, struct buffer_head **bhp)
@@ -203,6 +277,13 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode,
203 &cache->prev_bitmap, &cache->lock); 277 &cache->prev_bitmap, &cache->lock);
204} 278}
205 279
280/**
281 * nilfs_palloc_get_entry_block - get buffer head of an entry block
282 * @inode: inode of metadata file using this allocator
283 * @nr: serial number of the entry (e.g. inode number)
284 * @create: create flag
285 * @bhp: pointer to store the resultant buffer head
286 */
206int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, 287int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
207 int create, struct buffer_head **bhp) 288 int create, struct buffer_head **bhp)
208{ 289{
@@ -214,6 +295,13 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
214 &cache->prev_entry, &cache->lock); 295 &cache->prev_entry, &cache->lock);
215} 296}
216 297
298/**
299 * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor
300 * @inode: inode of metadata file using this allocator
301 * @group: group number
302 * @bh: buffer head of the buffer storing the group descriptor block
303 * @kaddr: kernel address mapped for the page including the buffer
304 */
217static struct nilfs_palloc_group_desc * 305static struct nilfs_palloc_group_desc *
218nilfs_palloc_block_get_group_desc(const struct inode *inode, 306nilfs_palloc_block_get_group_desc(const struct inode *inode,
219 unsigned long group, 307 unsigned long group,
@@ -223,6 +311,13 @@ nilfs_palloc_block_get_group_desc(const struct inode *inode,
223 group % nilfs_palloc_groups_per_desc_block(inode); 311 group % nilfs_palloc_groups_per_desc_block(inode);
224} 312}
225 313
314/**
315 * nilfs_palloc_block_get_entry - get kernel address of an entry
316 * @inode: inode of metadata file using this allocator
317 * @nr: serial number of the entry (e.g. inode number)
318 * @bh: buffer head of the buffer storing the entry block
319 * @kaddr: kernel address mapped for the page including the buffer
320 */
226void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, 321void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
227 const struct buffer_head *bh, void *kaddr) 322 const struct buffer_head *bh, void *kaddr)
228{ 323{
@@ -235,11 +330,19 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
235 entry_offset * NILFS_MDT(inode)->mi_entry_size; 330 entry_offset * NILFS_MDT(inode)->mi_entry_size;
236} 331}
237 332
333/**
334 * nilfs_palloc_find_available_slot - find available slot in a group
335 * @inode: inode of metadata file using this allocator
336 * @group: group number
337 * @target: offset number of an entry in the group (start point)
338 * @bitmap: bitmap of the group
339 * @bsize: size in bits
340 */
238static int nilfs_palloc_find_available_slot(struct inode *inode, 341static int nilfs_palloc_find_available_slot(struct inode *inode,
239 unsigned long group, 342 unsigned long group,
240 unsigned long target, 343 unsigned long target,
241 unsigned char *bitmap, 344 unsigned char *bitmap,
242 int bsize) /* size in bits */ 345 int bsize)
243{ 346{
244 int curr, pos, end, i; 347 int curr, pos, end, i;
245 348
@@ -277,6 +380,13 @@ static int nilfs_palloc_find_available_slot(struct inode *inode,
277 return -ENOSPC; 380 return -ENOSPC;
278} 381}
279 382
383/**
384 * nilfs_palloc_rest_groups_in_desc_block - get the remaining number of groups
385 * in a group descriptor block
386 * @inode: inode of metadata file using this allocator
387 * @curr: current group number
388 * @max: maximum number of groups
389 */
280static unsigned long 390static unsigned long
281nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, 391nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
282 unsigned long curr, unsigned long max) 392 unsigned long curr, unsigned long max)
@@ -287,6 +397,11 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
287 max - curr + 1); 397 max - curr + 1);
288} 398}
289 399
400/**
401 * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
402 * @inode: inode of metadata file using this allocator
403 * @req: nilfs_palloc_req structure exchanged for the allocation
404 */
290int nilfs_palloc_prepare_alloc_entry(struct inode *inode, 405int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
291 struct nilfs_palloc_req *req) 406 struct nilfs_palloc_req *req)
292{ 407{
@@ -366,6 +481,11 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
366 return ret; 481 return ret;
367} 482}
368 483
484/**
485 * nilfs_palloc_commit_alloc_entry - finish allocation of a persistent object
486 * @inode: inode of metadata file using this allocator
487 * @req: nilfs_palloc_req structure exchanged for the allocation
488 */
369void nilfs_palloc_commit_alloc_entry(struct inode *inode, 489void nilfs_palloc_commit_alloc_entry(struct inode *inode,
370 struct nilfs_palloc_req *req) 490 struct nilfs_palloc_req *req)
371{ 491{
@@ -377,6 +497,11 @@ void nilfs_palloc_commit_alloc_entry(struct inode *inode,
377 brelse(req->pr_desc_bh); 497 brelse(req->pr_desc_bh);
378} 498}
379 499
500/**
501 * nilfs_palloc_commit_free_entry - finish deallocating a persistent object
502 * @inode: inode of metadata file using this allocator
503 * @req: nilfs_palloc_req structure exchanged for the removal
504 */
380void nilfs_palloc_commit_free_entry(struct inode *inode, 505void nilfs_palloc_commit_free_entry(struct inode *inode,
381 struct nilfs_palloc_req *req) 506 struct nilfs_palloc_req *req)
382{ 507{
@@ -410,6 +535,11 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
410 brelse(req->pr_desc_bh); 535 brelse(req->pr_desc_bh);
411} 536}
412 537
538/**
539 * nilfs_palloc_abort_alloc_entry - cancel allocation of a persistent object
540 * @inode: inode of metadata file using this allocator
541 * @req: nilfs_palloc_req structure exchanged for the allocation
542 */
413void nilfs_palloc_abort_alloc_entry(struct inode *inode, 543void nilfs_palloc_abort_alloc_entry(struct inode *inode,
414 struct nilfs_palloc_req *req) 544 struct nilfs_palloc_req *req)
415{ 545{
@@ -442,6 +572,11 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
442 req->pr_desc_bh = NULL; 572 req->pr_desc_bh = NULL;
443} 573}
444 574
575/**
576 * nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object
577 * @inode: inode of metadata file using this allocator
578 * @req: nilfs_palloc_req structure exchanged for the removal
579 */
445int nilfs_palloc_prepare_free_entry(struct inode *inode, 580int nilfs_palloc_prepare_free_entry(struct inode *inode,
446 struct nilfs_palloc_req *req) 581 struct nilfs_palloc_req *req)
447{ 582{
@@ -464,6 +599,11 @@ int nilfs_palloc_prepare_free_entry(struct inode *inode,
464 return 0; 599 return 0;
465} 600}
466 601
602/**
603 * nilfs_palloc_abort_free_entry - cancel deallocating a persistent object
604 * @inode: inode of metadata file using this allocator
605 * @req: nilfs_palloc_req structure exchanged for the removal
606 */
467void nilfs_palloc_abort_free_entry(struct inode *inode, 607void nilfs_palloc_abort_free_entry(struct inode *inode,
468 struct nilfs_palloc_req *req) 608 struct nilfs_palloc_req *req)
469{ 609{
@@ -475,6 +615,12 @@ void nilfs_palloc_abort_free_entry(struct inode *inode,
475 req->pr_desc_bh = NULL; 615 req->pr_desc_bh = NULL;
476} 616}
477 617
618/**
619 * nilfs_palloc_group_is_in - judge if an entry is in a group
620 * @inode: inode of metadata file using this allocator
621 * @group: group number
622 * @nr: serial number of the entry (e.g. inode number)
623 */
478static int 624static int
479nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) 625nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
480{ 626{
@@ -485,6 +631,12 @@ nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
485 return (nr >= first) && (nr <= last); 631 return (nr >= first) && (nr <= last);
486} 632}
487 633
634/**
635 * nilfs_palloc_freev - deallocate a set of persistent objects
636 * @inode: inode of metadata file using this allocator
637 * @entry_nrs: array of entry numbers to be deallocated
638 * @nitems: number of entries stored in @entry_nrs
639 */
488int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) 640int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
489{ 641{
490 struct buffer_head *desc_bh, *bitmap_bh; 642 struct buffer_head *desc_bh, *bitmap_bh;
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 5cccf874d692..9af34a7e6e13 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -29,6 +29,13 @@
29#include <linux/buffer_head.h> 29#include <linux/buffer_head.h>
30#include <linux/fs.h> 30#include <linux/fs.h>
31 31
32/**
33 * nilfs_palloc_entries_per_group - get the number of entries per group
34 * @inode: inode of metadata file using this allocator
35 *
36 * The number of entries per group is defined by the number of bits
37 * that a bitmap block can maintain.
38 */
32static inline unsigned long 39static inline unsigned long
33nilfs_palloc_entries_per_group(const struct inode *inode) 40nilfs_palloc_entries_per_group(const struct inode *inode)
34{ 41{
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 76c38e3e19d2..b27a342c5af6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -31,63 +31,16 @@
31#include "alloc.h" 31#include "alloc.h"
32#include "dat.h" 32#include "dat.h"
33 33
34/** 34static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35 * struct nilfs_btree_path - A path on which B-tree operations are executed
36 * @bp_bh: buffer head of node block
37 * @bp_sib_bh: buffer head of sibling node block
38 * @bp_index: index of child node
39 * @bp_oldreq: ptr end request for old ptr
40 * @bp_newreq: ptr alloc request for new ptr
41 * @bp_op: rebalance operation
42 */
43struct nilfs_btree_path {
44 struct buffer_head *bp_bh;
45 struct buffer_head *bp_sib_bh;
46 int bp_index;
47 union nilfs_bmap_ptr_req bp_oldreq;
48 union nilfs_bmap_ptr_req bp_newreq;
49 struct nilfs_btnode_chkey_ctxt bp_ctxt;
50 void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51 int, __u64 *, __u64 *);
52};
53
54/*
55 * B-tree path operations
56 */
57
58static struct kmem_cache *nilfs_btree_path_cache;
59
60int __init nilfs_btree_path_cache_init(void)
61{
62 nilfs_btree_path_cache =
63 kmem_cache_create("nilfs2_btree_path_cache",
64 sizeof(struct nilfs_btree_path) *
65 NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66 return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67}
68
69void nilfs_btree_path_cache_destroy(void)
70{
71 kmem_cache_destroy(nilfs_btree_path_cache);
72}
73
74static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75{
76 return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
77}
78
79static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
80{ 35{
81 kmem_cache_free(nilfs_btree_path_cache, path); 36 struct nilfs_btree_path *path;
82} 37 int level = NILFS_BTREE_LEVEL_DATA;
83 38
84static void nilfs_btree_init_path(struct nilfs_btree_path *path) 39 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
85{ 40 if (path == NULL)
86 int level; 41 goto out;
87 42
88 for (level = NILFS_BTREE_LEVEL_DATA; 43 for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
89 level < NILFS_BTREE_LEVEL_MAX;
90 level++) {
91 path[level].bp_bh = NULL; 44 path[level].bp_bh = NULL;
92 path[level].bp_sib_bh = NULL; 45 path[level].bp_sib_bh = NULL;
93 path[level].bp_index = 0; 46 path[level].bp_index = 0;
@@ -95,15 +48,19 @@ static void nilfs_btree_init_path(struct nilfs_btree_path *path)
95 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 48 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
96 path[level].bp_op = NULL; 49 path[level].bp_op = NULL;
97 } 50 }
51
52out:
53 return path;
98} 54}
99 55
100static void nilfs_btree_release_path(struct nilfs_btree_path *path) 56static void nilfs_btree_free_path(struct nilfs_btree_path *path)
101{ 57{
102 int level; 58 int level = NILFS_BTREE_LEVEL_DATA;
103 59
104 for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX; 60 for (; level < NILFS_BTREE_LEVEL_MAX; level++)
105 level++)
106 brelse(path[level].bp_bh); 61 brelse(path[level].bp_bh);
62
63 kmem_cache_free(nilfs_btree_path_cache, path);
107} 64}
108 65
109/* 66/*
@@ -566,14 +523,12 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
566 path = nilfs_btree_alloc_path(); 523 path = nilfs_btree_alloc_path();
567 if (path == NULL) 524 if (path == NULL)
568 return -ENOMEM; 525 return -ENOMEM;
569 nilfs_btree_init_path(path);
570 526
571 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 527 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
572 528
573 if (ptrp != NULL) 529 if (ptrp != NULL)
574 *ptrp = ptr; 530 *ptrp = ptr;
575 531
576 nilfs_btree_release_path(path);
577 nilfs_btree_free_path(path); 532 nilfs_btree_free_path(path);
578 533
579 return ret; 534 return ret;
@@ -594,7 +549,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
594 path = nilfs_btree_alloc_path(); 549 path = nilfs_btree_alloc_path();
595 if (path == NULL) 550 if (path == NULL)
596 return -ENOMEM; 551 return -ENOMEM;
597 nilfs_btree_init_path(path); 552
598 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 553 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
599 if (ret < 0) 554 if (ret < 0)
600 goto out; 555 goto out;
@@ -655,7 +610,6 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
655 *ptrp = ptr; 610 *ptrp = ptr;
656 ret = cnt; 611 ret = cnt;
657 out: 612 out:
658 nilfs_btree_release_path(path);
659 nilfs_btree_free_path(path); 613 nilfs_btree_free_path(path);
660 return ret; 614 return ret;
661} 615}
@@ -1123,7 +1077,6 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1123 path = nilfs_btree_alloc_path(); 1077 path = nilfs_btree_alloc_path();
1124 if (path == NULL) 1078 if (path == NULL)
1125 return -ENOMEM; 1079 return -ENOMEM;
1126 nilfs_btree_init_path(path);
1127 1080
1128 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1081 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1129 NILFS_BTREE_LEVEL_NODE_MIN); 1082 NILFS_BTREE_LEVEL_NODE_MIN);
@@ -1140,7 +1093,6 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1140 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); 1093 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1141 1094
1142 out: 1095 out:
1143 nilfs_btree_release_path(path);
1144 nilfs_btree_free_path(path); 1096 nilfs_btree_free_path(path);
1145 return ret; 1097 return ret;
1146} 1098}
@@ -1456,7 +1408,7 @@ static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1456 path = nilfs_btree_alloc_path(); 1408 path = nilfs_btree_alloc_path();
1457 if (path == NULL) 1409 if (path == NULL)
1458 return -ENOMEM; 1410 return -ENOMEM;
1459 nilfs_btree_init_path(path); 1411
1460 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1412 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1461 NILFS_BTREE_LEVEL_NODE_MIN); 1413 NILFS_BTREE_LEVEL_NODE_MIN);
1462 if (ret < 0) 1414 if (ret < 0)
@@ -1473,7 +1425,6 @@ static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1473 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); 1425 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1474 1426
1475out: 1427out:
1476 nilfs_btree_release_path(path);
1477 nilfs_btree_free_path(path); 1428 nilfs_btree_free_path(path);
1478 return ret; 1429 return ret;
1479} 1430}
@@ -1488,11 +1439,9 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1488 path = nilfs_btree_alloc_path(); 1439 path = nilfs_btree_alloc_path();
1489 if (path == NULL) 1440 if (path == NULL)
1490 return -ENOMEM; 1441 return -ENOMEM;
1491 nilfs_btree_init_path(path);
1492 1442
1493 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1443 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1494 1444
1495 nilfs_btree_release_path(path);
1496 nilfs_btree_free_path(path); 1445 nilfs_btree_free_path(path);
1497 1446
1498 return ret; 1447 return ret;
@@ -1923,7 +1872,6 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1923 path = nilfs_btree_alloc_path(); 1872 path = nilfs_btree_alloc_path();
1924 if (path == NULL) 1873 if (path == NULL)
1925 return -ENOMEM; 1874 return -ENOMEM;
1926 nilfs_btree_init_path(path);
1927 1875
1928 if (buffer_nilfs_node(bh)) { 1876 if (buffer_nilfs_node(bh)) {
1929 node = (struct nilfs_btree_node *)bh->b_data; 1877 node = (struct nilfs_btree_node *)bh->b_data;
@@ -1947,7 +1895,6 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1947 nilfs_btree_propagate_p(btree, path, level, bh); 1895 nilfs_btree_propagate_p(btree, path, level, bh);
1948 1896
1949 out: 1897 out:
1950 nilfs_btree_release_path(path);
1951 nilfs_btree_free_path(path); 1898 nilfs_btree_free_path(path);
1952 1899
1953 return ret; 1900 return ret;
@@ -2108,7 +2055,6 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2108 path = nilfs_btree_alloc_path(); 2055 path = nilfs_btree_alloc_path();
2109 if (path == NULL) 2056 if (path == NULL)
2110 return -ENOMEM; 2057 return -ENOMEM;
2111 nilfs_btree_init_path(path);
2112 2058
2113 if (buffer_nilfs_node(*bh)) { 2059 if (buffer_nilfs_node(*bh)) {
2114 node = (struct nilfs_btree_node *)(*bh)->b_data; 2060 node = (struct nilfs_btree_node *)(*bh)->b_data;
@@ -2130,7 +2076,6 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2130 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2076 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2131 2077
2132 out: 2078 out:
2133 nilfs_btree_release_path(path);
2134 nilfs_btree_free_path(path); 2079 nilfs_btree_free_path(path);
2135 2080
2136 return ret; 2081 return ret;
@@ -2175,7 +2120,6 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2175 path = nilfs_btree_alloc_path(); 2120 path = nilfs_btree_alloc_path();
2176 if (path == NULL) 2121 if (path == NULL)
2177 return -ENOMEM; 2122 return -ENOMEM;
2178 nilfs_btree_init_path(path);
2179 2123
2180 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); 2124 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2181 if (ret < 0) { 2125 if (ret < 0) {
@@ -2195,7 +2139,6 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2195 nilfs_bmap_set_dirty(&btree->bt_bmap); 2139 nilfs_bmap_set_dirty(&btree->bt_bmap);
2196 2140
2197 out: 2141 out:
2198 nilfs_btree_release_path(path);
2199 nilfs_btree_free_path(path); 2142 nilfs_btree_free_path(path);
2200 return ret; 2143 return ret;
2201} 2144}
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 4b82d84ade75..af638d59e3bf 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -30,9 +30,6 @@
30#include "btnode.h" 30#include "btnode.h"
31#include "bmap.h" 31#include "bmap.h"
32 32
33struct nilfs_btree;
34struct nilfs_btree_path;
35
36/** 33/**
37 * struct nilfs_btree - B-tree structure 34 * struct nilfs_btree - B-tree structure
38 * @bt_bmap: bmap base structure 35 * @bt_bmap: bmap base structure
@@ -41,6 +38,25 @@ struct nilfs_btree {
41 struct nilfs_bmap bt_bmap; 38 struct nilfs_bmap bt_bmap;
42}; 39};
43 40
41/**
42 * struct nilfs_btree_path - A path on which B-tree operations are executed
43 * @bp_bh: buffer head of node block
44 * @bp_sib_bh: buffer head of sibling node block
45 * @bp_index: index of child node
46 * @bp_oldreq: ptr end request for old ptr
47 * @bp_newreq: ptr alloc request for new ptr
48 * @bp_op: rebalance operation
49 */
50struct nilfs_btree_path {
51 struct buffer_head *bp_bh;
52 struct buffer_head *bp_sib_bh;
53 int bp_index;
54 union nilfs_bmap_ptr_req bp_oldreq;
55 union nilfs_bmap_ptr_req bp_newreq;
56 struct nilfs_btnode_chkey_ctxt bp_ctxt;
57 void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
58 int, __u64 *, __u64 *);
59};
44 60
45#define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE 61#define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE
46#define NILFS_BTREE_ROOT_NCHILDREN_MAX \ 62#define NILFS_BTREE_ROOT_NCHILDREN_MAX \
@@ -57,6 +73,7 @@ struct nilfs_btree {
57#define NILFS_BTREE_KEY_MIN ((__u64)0) 73#define NILFS_BTREE_KEY_MIN ((__u64)0)
58#define NILFS_BTREE_KEY_MAX (~(__u64)0) 74#define NILFS_BTREE_KEY_MAX (~(__u64)0)
59 75
76extern struct kmem_cache *nilfs_btree_path_cache;
60 77
61int nilfs_btree_path_cache_init(void); 78int nilfs_btree_path_cache_init(void);
62void nilfs_btree_path_cache_destroy(void); 79void nilfs_btree_path_cache_destroy(void);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 0957b58f909d..39e038ac8fcb 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -280,16 +280,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
280 /* reference count of i_bh inherits from nilfs_mdt_read_block() */ 280 /* reference count of i_bh inherits from nilfs_mdt_read_block() */
281 281
282 atomic_inc(&sbi->s_inodes_count); 282 atomic_inc(&sbi->s_inodes_count);
283 283 inode_init_owner(inode, dir, mode);
284 inode->i_uid = current_fsuid();
285 if (dir->i_mode & S_ISGID) {
286 inode->i_gid = dir->i_gid;
287 if (S_ISDIR(mode))
288 mode |= S_ISGID;
289 } else
290 inode->i_gid = current_fsgid();
291
292 inode->i_mode = mode;
293 inode->i_ino = ino; 284 inode->i_ino = ino;
294 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 285 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
295 286
@@ -451,7 +442,7 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
451 inode->i_op = &nilfs_special_inode_operations; 442 inode->i_op = &nilfs_special_inode_operations;
452 init_special_inode( 443 init_special_inode(
453 inode, inode->i_mode, 444 inode, inode->i_mode,
454 new_decode_dev(le64_to_cpu(raw_inode->i_device_code))); 445 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
455 } 446 }
456 nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); 447 nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh);
457 brelse(bh); 448 brelse(bh);
@@ -511,7 +502,7 @@ void nilfs_write_inode_common(struct inode *inode,
511 nilfs_bmap_write(ii->i_bmap, raw_inode); 502 nilfs_bmap_write(ii->i_bmap, raw_inode);
512 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 503 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
513 raw_inode->i_device_code = 504 raw_inode->i_device_code =
514 cpu_to_le64(new_encode_dev(inode->i_rdev)); 505 cpu_to_le64(huge_encode_dev(inode->i_rdev));
515 /* When extending inode, nilfs->ns_inode_size should be checked 506 /* When extending inode, nilfs->ns_inode_size should be checked
516 for substitutions of appended fields */ 507 for substitutions of appended fields */
517} 508}
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba43146f3c30..bae2a516b4ee 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -105,6 +105,8 @@ static void store_segsum_info(struct nilfs_segsum_info *ssi,
105 105
106 ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize); 106 ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
107 ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi); 107 ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
108
109 /* need to verify ->ss_bytes field if read ->ss_cno */
108} 110}
109 111
110/** 112/**
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 17851f77f739..2e6a2723b8fa 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -40,35 +40,10 @@ struct nilfs_write_info {
40 sector_t blocknr; 40 sector_t blocknr;
41}; 41};
42 42
43
44static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, 43static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
45 struct the_nilfs *nilfs); 44 struct the_nilfs *nilfs);
46static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf); 45static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf);
47 46
48
49static struct kmem_cache *nilfs_segbuf_cachep;
50
51static void nilfs_segbuf_init_once(void *obj)
52{
53 memset(obj, 0, sizeof(struct nilfs_segment_buffer));
54}
55
56int __init nilfs_init_segbuf_cache(void)
57{
58 nilfs_segbuf_cachep =
59 kmem_cache_create("nilfs2_segbuf_cache",
60 sizeof(struct nilfs_segment_buffer),
61 0, SLAB_RECLAIM_ACCOUNT,
62 nilfs_segbuf_init_once);
63
64 return (nilfs_segbuf_cachep == NULL) ? -ENOMEM : 0;
65}
66
67void nilfs_destroy_segbuf_cache(void)
68{
69 kmem_cache_destroy(nilfs_segbuf_cachep);
70}
71
72struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) 47struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb)
73{ 48{
74 struct nilfs_segment_buffer *segbuf; 49 struct nilfs_segment_buffer *segbuf;
@@ -81,6 +56,7 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb)
81 INIT_LIST_HEAD(&segbuf->sb_list); 56 INIT_LIST_HEAD(&segbuf->sb_list);
82 INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); 57 INIT_LIST_HEAD(&segbuf->sb_segsum_buffers);
83 INIT_LIST_HEAD(&segbuf->sb_payload_buffers); 58 INIT_LIST_HEAD(&segbuf->sb_payload_buffers);
59 segbuf->sb_super_root = NULL;
84 60
85 init_completion(&segbuf->sb_bio_event); 61 init_completion(&segbuf->sb_bio_event);
86 atomic_set(&segbuf->sb_err, 0); 62 atomic_set(&segbuf->sb_err, 0);
@@ -158,7 +134,7 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf,
158} 134}
159 135
160int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, 136int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
161 time_t ctime) 137 time_t ctime, __u64 cno)
162{ 138{
163 int err; 139 int err;
164 140
@@ -171,6 +147,7 @@ int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
171 segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); 147 segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary);
172 segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; 148 segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0;
173 segbuf->sb_sum.ctime = ctime; 149 segbuf->sb_sum.ctime = ctime;
150 segbuf->sb_sum.cno = cno;
174 return 0; 151 return 0;
175} 152}
176 153
@@ -196,13 +173,14 @@ void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf)
196 raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo); 173 raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo);
197 raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes); 174 raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes);
198 raw_sum->ss_pad = 0; 175 raw_sum->ss_pad = 0;
176 raw_sum->ss_cno = cpu_to_le64(segbuf->sb_sum.cno);
199} 177}
200 178
201/* 179/*
202 * CRC calculation routines 180 * CRC calculation routines
203 */ 181 */
204void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, 182static void
205 u32 seed) 183nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, u32 seed)
206{ 184{
207 struct buffer_head *bh; 185 struct buffer_head *bh;
208 struct nilfs_segment_summary *raw_sum; 186 struct nilfs_segment_summary *raw_sum;
@@ -229,8 +207,8 @@ void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf,
229 raw_sum->ss_sumsum = cpu_to_le32(crc); 207 raw_sum->ss_sumsum = cpu_to_le32(crc);
230} 208}
231 209
232void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, 210static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
233 u32 seed) 211 u32 seed)
234{ 212{
235 struct buffer_head *bh; 213 struct buffer_head *bh;
236 struct nilfs_segment_summary *raw_sum; 214 struct nilfs_segment_summary *raw_sum;
@@ -256,6 +234,20 @@ void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
256 raw_sum->ss_datasum = cpu_to_le32(crc); 234 raw_sum->ss_datasum = cpu_to_le32(crc);
257} 235}
258 236
237static void
238nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
239 u32 seed)
240{
241 struct nilfs_super_root *raw_sr;
242 u32 crc;
243
244 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
245 crc = crc32_le(seed,
246 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
247 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
248 raw_sr->sr_sum = cpu_to_le32(crc);
249}
250
259static void nilfs_release_buffers(struct list_head *list) 251static void nilfs_release_buffers(struct list_head *list)
260{ 252{
261 struct buffer_head *bh, *n; 253 struct buffer_head *bh, *n;
@@ -282,6 +274,7 @@ static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf)
282{ 274{
283 nilfs_release_buffers(&segbuf->sb_segsum_buffers); 275 nilfs_release_buffers(&segbuf->sb_segsum_buffers);
284 nilfs_release_buffers(&segbuf->sb_payload_buffers); 276 nilfs_release_buffers(&segbuf->sb_payload_buffers);
277 segbuf->sb_super_root = NULL;
285} 278}
286 279
287/* 280/*
@@ -334,6 +327,23 @@ int nilfs_wait_on_logs(struct list_head *logs)
334 return ret; 327 return ret;
335} 328}
336 329
330/**
331 * nilfs_add_checksums_on_logs - add checksums on the logs
332 * @logs: list of segment buffers storing target logs
333 * @seed: checksum seed value
334 */
335void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed)
336{
337 struct nilfs_segment_buffer *segbuf;
338
339 list_for_each_entry(segbuf, logs, sb_list) {
340 if (segbuf->sb_super_root)
341 nilfs_segbuf_fill_in_super_root_crc(segbuf, seed);
342 nilfs_segbuf_fill_in_segsum_crc(segbuf, seed);
343 nilfs_segbuf_fill_in_data_crc(segbuf, seed);
344 }
345}
346
337/* 347/*
338 * BIO operations 348 * BIO operations
339 */ 349 */
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index 94dfd3517bc0..fdf1c3b6d673 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -37,6 +37,7 @@
37 * @sumbytes: Byte count of segment summary 37 * @sumbytes: Byte count of segment summary
38 * @nfileblk: Total number of file blocks 38 * @nfileblk: Total number of file blocks
39 * @seg_seq: Segment sequence number 39 * @seg_seq: Segment sequence number
40 * @cno: Checkpoint number
40 * @ctime: Creation time 41 * @ctime: Creation time
41 * @next: Block number of the next full segment 42 * @next: Block number of the next full segment
42 */ 43 */
@@ -48,6 +49,7 @@ struct nilfs_segsum_info {
48 unsigned long sumbytes; 49 unsigned long sumbytes;
49 unsigned long nfileblk; 50 unsigned long nfileblk;
50 u64 seg_seq; 51 u64 seg_seq;
52 __u64 cno;
51 time_t ctime; 53 time_t ctime;
52 sector_t next; 54 sector_t next;
53}; 55};
@@ -76,6 +78,7 @@ struct nilfs_segsum_info {
76 * @sb_rest_blocks: Number of residual blocks in the current segment 78 * @sb_rest_blocks: Number of residual blocks in the current segment
77 * @sb_segsum_buffers: List of buffers for segment summaries 79 * @sb_segsum_buffers: List of buffers for segment summaries
78 * @sb_payload_buffers: List of buffers for segment payload 80 * @sb_payload_buffers: List of buffers for segment payload
81 * @sb_super_root: Pointer to buffer storing a super root block (if exists)
79 * @sb_nbio: Number of flying bio requests 82 * @sb_nbio: Number of flying bio requests
80 * @sb_err: I/O error status 83 * @sb_err: I/O error status
81 * @sb_bio_event: Completion event of log writing 84 * @sb_bio_event: Completion event of log writing
@@ -95,6 +98,7 @@ struct nilfs_segment_buffer {
95 /* Buffers */ 98 /* Buffers */
96 struct list_head sb_segsum_buffers; 99 struct list_head sb_segsum_buffers;
97 struct list_head sb_payload_buffers; /* including super root */ 100 struct list_head sb_payload_buffers; /* including super root */
101 struct buffer_head *sb_super_root;
98 102
99 /* io status */ 103 /* io status */
100 int sb_nbio; 104 int sb_nbio;
@@ -121,6 +125,7 @@ struct nilfs_segment_buffer {
121 b_assoc_buffers)) 125 b_assoc_buffers))
122#define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head) 126#define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head)
123 127
128extern struct kmem_cache *nilfs_segbuf_cachep;
124 129
125int __init nilfs_init_segbuf_cache(void); 130int __init nilfs_init_segbuf_cache(void);
126void nilfs_destroy_segbuf_cache(void); 131void nilfs_destroy_segbuf_cache(void);
@@ -132,13 +137,11 @@ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
132 struct nilfs_segment_buffer *prev); 137 struct nilfs_segment_buffer *prev);
133void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, 138void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
134 struct the_nilfs *); 139 struct the_nilfs *);
135int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); 140int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t, __u64);
136int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *); 141int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *);
137int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, 142int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
138 struct buffer_head **); 143 struct buffer_head **);
139void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); 144void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *);
140void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *, u32);
141void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *, u32);
142 145
143static inline void 146static inline void
144nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, 147nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf,
@@ -171,6 +174,7 @@ void nilfs_truncate_logs(struct list_head *logs,
171 struct nilfs_segment_buffer *last); 174 struct nilfs_segment_buffer *last);
172int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs); 175int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs);
173int nilfs_wait_on_logs(struct list_head *logs); 176int nilfs_wait_on_logs(struct list_head *logs);
177void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed);
174 178
175static inline void nilfs_destroy_logs(struct list_head *logs) 179static inline void nilfs_destroy_logs(struct list_head *logs)
176{ 180{
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6a7dbd8451db..c9201649cc49 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -116,42 +116,6 @@ static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *,
116#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) 116#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a)
117#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) 117#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a)
118 118
119/*
120 * Transaction
121 */
122static struct kmem_cache *nilfs_transaction_cachep;
123
124/**
125 * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info
126 *
127 * nilfs_init_transaction_cache() creates a slab cache for the struct
128 * nilfs_transaction_info.
129 *
130 * Return Value: On success, it returns 0. On error, one of the following
131 * negative error code is returned.
132 *
133 * %-ENOMEM - Insufficient memory available.
134 */
135int nilfs_init_transaction_cache(void)
136{
137 nilfs_transaction_cachep =
138 kmem_cache_create("nilfs2_transaction_cache",
139 sizeof(struct nilfs_transaction_info),
140 0, SLAB_RECLAIM_ACCOUNT, NULL);
141 return (nilfs_transaction_cachep == NULL) ? -ENOMEM : 0;
142}
143
144/**
145 * nilfs_destroy_transaction_cache - destroy the cache for transaction info
146 *
147 * nilfs_destroy_transaction_cache() frees the slab cache for the struct
148 * nilfs_transaction_info.
149 */
150void nilfs_destroy_transaction_cache(void)
151{
152 kmem_cache_destroy(nilfs_transaction_cachep);
153}
154
155static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) 119static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
156{ 120{
157 struct nilfs_transaction_info *cur_ti = current->journal_info; 121 struct nilfs_transaction_info *cur_ti = current->journal_info;
@@ -402,7 +366,8 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
402 366
403 if (nilfs_doing_gc()) 367 if (nilfs_doing_gc())
404 flags = NILFS_SS_GC; 368 flags = NILFS_SS_GC;
405 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime); 369 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime,
370 sci->sc_sbi->s_nilfs->ns_cno);
406 if (unlikely(err)) 371 if (unlikely(err))
407 return err; 372 return err;
408 373
@@ -435,7 +400,7 @@ static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
435 return err; 400 return err;
436 segbuf = sci->sc_curseg; 401 segbuf = sci->sc_curseg;
437 } 402 }
438 err = nilfs_segbuf_extend_payload(segbuf, &sci->sc_super_root); 403 err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root);
439 if (likely(!err)) 404 if (likely(!err))
440 segbuf->sb_sum.flags |= NILFS_SS_SR; 405 segbuf->sb_sum.flags |= NILFS_SS_SR;
441 return err; 406 return err;
@@ -599,7 +564,7 @@ static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
599 *vblocknr = binfo->bi_v.bi_vblocknr; 564 *vblocknr = binfo->bi_v.bi_vblocknr;
600} 565}
601 566
602struct nilfs_sc_operations nilfs_sc_file_ops = { 567static struct nilfs_sc_operations nilfs_sc_file_ops = {
603 .collect_data = nilfs_collect_file_data, 568 .collect_data = nilfs_collect_file_data,
604 .collect_node = nilfs_collect_file_node, 569 .collect_node = nilfs_collect_file_node,
605 .collect_bmap = nilfs_collect_file_bmap, 570 .collect_bmap = nilfs_collect_file_bmap,
@@ -649,7 +614,7 @@ static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
649 *binfo_dat = binfo->bi_dat; 614 *binfo_dat = binfo->bi_dat;
650} 615}
651 616
652struct nilfs_sc_operations nilfs_sc_dat_ops = { 617static struct nilfs_sc_operations nilfs_sc_dat_ops = {
653 .collect_data = nilfs_collect_dat_data, 618 .collect_data = nilfs_collect_dat_data,
654 .collect_node = nilfs_collect_file_node, 619 .collect_node = nilfs_collect_file_node,
655 .collect_bmap = nilfs_collect_dat_bmap, 620 .collect_bmap = nilfs_collect_dat_bmap,
@@ -657,7 +622,7 @@ struct nilfs_sc_operations nilfs_sc_dat_ops = {
657 .write_node_binfo = nilfs_write_dat_node_binfo, 622 .write_node_binfo = nilfs_write_dat_node_binfo,
658}; 623};
659 624
660struct nilfs_sc_operations nilfs_sc_dsync_ops = { 625static struct nilfs_sc_operations nilfs_sc_dsync_ops = {
661 .collect_data = nilfs_collect_file_data, 626 .collect_data = nilfs_collect_file_data,
662 .collect_node = NULL, 627 .collect_node = NULL,
663 .collect_bmap = NULL, 628 .collect_bmap = NULL,
@@ -932,43 +897,16 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci,
932 } 897 }
933} 898}
934 899
935/*
936 * CRC calculation routines
937 */
938static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed)
939{
940 struct nilfs_super_root *raw_sr =
941 (struct nilfs_super_root *)bh_sr->b_data;
942 u32 crc;
943
944 crc = crc32_le(seed,
945 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
946 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
947 raw_sr->sr_sum = cpu_to_le32(crc);
948}
949
950static void nilfs_segctor_fill_in_checksums(struct nilfs_sc_info *sci,
951 u32 seed)
952{
953 struct nilfs_segment_buffer *segbuf;
954
955 if (sci->sc_super_root)
956 nilfs_fill_in_super_root_crc(sci->sc_super_root, seed);
957
958 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
959 nilfs_segbuf_fill_in_segsum_crc(segbuf, seed);
960 nilfs_segbuf_fill_in_data_crc(segbuf, seed);
961 }
962}
963
964static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, 900static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
965 struct the_nilfs *nilfs) 901 struct the_nilfs *nilfs)
966{ 902{
967 struct buffer_head *bh_sr = sci->sc_super_root; 903 struct buffer_head *bh_sr;
968 struct nilfs_super_root *raw_sr = 904 struct nilfs_super_root *raw_sr;
969 (struct nilfs_super_root *)bh_sr->b_data;
970 unsigned isz = nilfs->ns_inode_size; 905 unsigned isz = nilfs->ns_inode_size;
971 906
907 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
908 raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
909
972 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); 910 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
973 raw_sr->sr_nongc_ctime 911 raw_sr->sr_nongc_ctime
974 = cpu_to_le64(nilfs_doing_gc() ? 912 = cpu_to_le64(nilfs_doing_gc() ?
@@ -1491,7 +1429,6 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1491 1429
1492 /* Collection retry loop */ 1430 /* Collection retry loop */
1493 for (;;) { 1431 for (;;) {
1494 sci->sc_super_root = NULL;
1495 sci->sc_nblk_this_inc = 0; 1432 sci->sc_nblk_this_inc = 0;
1496 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1433 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1497 1434
@@ -1568,7 +1505,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1568 ssp.offset = sizeof(struct nilfs_segment_summary); 1505 ssp.offset = sizeof(struct nilfs_segment_summary);
1569 1506
1570 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 1507 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
1571 if (bh == sci->sc_super_root) 1508 if (bh == segbuf->sb_super_root)
1572 break; 1509 break;
1573 if (!finfo) { 1510 if (!finfo) {
1574 finfo = nilfs_segctor_map_segsum_entry( 1511 finfo = nilfs_segctor_map_segsum_entry(
@@ -1729,7 +1666,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1729 1666
1730 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1667 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1731 b_assoc_buffers) { 1668 b_assoc_buffers) {
1732 if (bh == sci->sc_super_root) { 1669 if (bh == segbuf->sb_super_root) {
1733 if (bh->b_page != bd_page) { 1670 if (bh->b_page != bd_page) {
1734 lock_page(bd_page); 1671 lock_page(bd_page);
1735 clear_page_dirty_for_io(bd_page); 1672 clear_page_dirty_for_io(bd_page);
@@ -1848,7 +1785,7 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err)
1848} 1785}
1849 1786
1850static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, 1787static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1851 struct buffer_head *bh_sr, int err) 1788 int err)
1852{ 1789{
1853 struct nilfs_segment_buffer *segbuf; 1790 struct nilfs_segment_buffer *segbuf;
1854 struct page *bd_page = NULL, *fs_page = NULL; 1791 struct page *bd_page = NULL, *fs_page = NULL;
@@ -1869,7 +1806,7 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1869 1806
1870 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1807 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1871 b_assoc_buffers) { 1808 b_assoc_buffers) {
1872 if (bh == bh_sr) { 1809 if (bh == segbuf->sb_super_root) {
1873 if (bh->b_page != bd_page) { 1810 if (bh->b_page != bd_page) {
1874 end_page_writeback(bd_page); 1811 end_page_writeback(bd_page);
1875 bd_page = bh->b_page; 1812 bd_page = bh->b_page;
@@ -1898,7 +1835,7 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1898 1835
1899 list_splice_tail_init(&sci->sc_write_logs, &logs); 1836 list_splice_tail_init(&sci->sc_write_logs, &logs);
1900 ret = nilfs_wait_on_logs(&logs); 1837 ret = nilfs_wait_on_logs(&logs);
1901 nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret ? : err); 1838 nilfs_abort_logs(&logs, NULL, ret ? : err);
1902 1839
1903 list_splice_tail_init(&sci->sc_segbufs, &logs); 1840 list_splice_tail_init(&sci->sc_segbufs, &logs);
1904 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1841 nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
@@ -1914,7 +1851,6 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1914 } 1851 }
1915 1852
1916 nilfs_destroy_logs(&logs); 1853 nilfs_destroy_logs(&logs);
1917 sci->sc_super_root = NULL;
1918} 1854}
1919 1855
1920static void nilfs_set_next_segment(struct the_nilfs *nilfs, 1856static void nilfs_set_next_segment(struct the_nilfs *nilfs,
@@ -1933,7 +1869,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1933 struct nilfs_segment_buffer *segbuf; 1869 struct nilfs_segment_buffer *segbuf;
1934 struct page *bd_page = NULL, *fs_page = NULL; 1870 struct page *bd_page = NULL, *fs_page = NULL;
1935 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 1871 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
1936 int update_sr = (sci->sc_super_root != NULL); 1872 int update_sr = false;
1937 1873
1938 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { 1874 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
1939 struct buffer_head *bh; 1875 struct buffer_head *bh;
@@ -1964,11 +1900,12 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1964 set_buffer_uptodate(bh); 1900 set_buffer_uptodate(bh);
1965 clear_buffer_dirty(bh); 1901 clear_buffer_dirty(bh);
1966 clear_buffer_nilfs_volatile(bh); 1902 clear_buffer_nilfs_volatile(bh);
1967 if (bh == sci->sc_super_root) { 1903 if (bh == segbuf->sb_super_root) {
1968 if (bh->b_page != bd_page) { 1904 if (bh->b_page != bd_page) {
1969 end_page_writeback(bd_page); 1905 end_page_writeback(bd_page);
1970 bd_page = bh->b_page; 1906 bd_page = bh->b_page;
1971 } 1907 }
1908 update_sr = true;
1972 break; 1909 break;
1973 } 1910 }
1974 if (bh->b_page != fs_page) { 1911 if (bh->b_page != fs_page) {
@@ -2115,7 +2052,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2115 struct nilfs_sb_info *sbi = sci->sc_sbi; 2052 struct nilfs_sb_info *sbi = sci->sc_sbi;
2116 struct the_nilfs *nilfs = sbi->s_nilfs; 2053 struct the_nilfs *nilfs = sbi->s_nilfs;
2117 struct page *failed_page; 2054 struct page *failed_page;
2118 int err, has_sr = 0; 2055 int err;
2119 2056
2120 sci->sc_stage.scnt = NILFS_ST_INIT; 2057 sci->sc_stage.scnt = NILFS_ST_INIT;
2121 2058
@@ -2143,8 +2080,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2143 if (unlikely(err)) 2080 if (unlikely(err))
2144 goto failed; 2081 goto failed;
2145 2082
2146 has_sr = (sci->sc_super_root != NULL);
2147
2148 /* Avoid empty segment */ 2083 /* Avoid empty segment */
2149 if (sci->sc_stage.scnt == NILFS_ST_DONE && 2084 if (sci->sc_stage.scnt == NILFS_ST_DONE &&
2150 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { 2085 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) {
@@ -2159,7 +2094,8 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2159 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2094 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2160 nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); 2095 nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile);
2161 2096
2162 if (has_sr) { 2097 if (mode == SC_LSEG_SR &&
2098 sci->sc_stage.scnt >= NILFS_ST_CPFILE) {
2163 err = nilfs_segctor_fill_in_checkpoint(sci); 2099 err = nilfs_segctor_fill_in_checkpoint(sci);
2164 if (unlikely(err)) 2100 if (unlikely(err))
2165 goto failed_to_write; 2101 goto failed_to_write;
@@ -2171,11 +2107,12 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2171 /* Write partial segments */ 2107 /* Write partial segments */
2172 err = nilfs_segctor_prepare_write(sci, &failed_page); 2108 err = nilfs_segctor_prepare_write(sci, &failed_page);
2173 if (err) { 2109 if (err) {
2174 nilfs_abort_logs(&sci->sc_segbufs, failed_page, 2110 nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
2175 sci->sc_super_root, err);
2176 goto failed_to_write; 2111 goto failed_to_write;
2177 } 2112 }
2178 nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); 2113
2114 nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2115 nilfs->ns_crc_seed);
2179 2116
2180 err = nilfs_segctor_write(sci, nilfs); 2117 err = nilfs_segctor_write(sci, nilfs);
2181 if (unlikely(err)) 2118 if (unlikely(err))
@@ -2196,8 +2133,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2196 } 2133 }
2197 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2134 } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2198 2135
2199 sci->sc_super_root = NULL;
2200
2201 out: 2136 out:
2202 nilfs_segctor_check_out_files(sci, sbi); 2137 nilfs_segctor_check_out_files(sci, sbi);
2203 return err; 2138 return err;
@@ -2224,9 +2159,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2224static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) 2159static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
2225{ 2160{
2226 spin_lock(&sci->sc_state_lock); 2161 spin_lock(&sci->sc_state_lock);
2227 if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { 2162 if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
2228 sci->sc_timer->expires = jiffies + sci->sc_interval; 2163 sci->sc_timer.expires = jiffies + sci->sc_interval;
2229 add_timer(sci->sc_timer); 2164 add_timer(&sci->sc_timer);
2230 sci->sc_state |= NILFS_SEGCTOR_COMMIT; 2165 sci->sc_state |= NILFS_SEGCTOR_COMMIT;
2231 } 2166 }
2232 spin_unlock(&sci->sc_state_lock); 2167 spin_unlock(&sci->sc_state_lock);
@@ -2431,9 +2366,7 @@ static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
2431 spin_lock(&sci->sc_state_lock); 2366 spin_lock(&sci->sc_state_lock);
2432 sci->sc_seq_accepted = sci->sc_seq_request; 2367 sci->sc_seq_accepted = sci->sc_seq_request;
2433 spin_unlock(&sci->sc_state_lock); 2368 spin_unlock(&sci->sc_state_lock);
2434 2369 del_timer_sync(&sci->sc_timer);
2435 if (sci->sc_timer)
2436 del_timer_sync(sci->sc_timer);
2437} 2370}
2438 2371
2439/** 2372/**
@@ -2459,9 +2392,9 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
2459 sci->sc_flush_request &= ~FLUSH_DAT_BIT; 2392 sci->sc_flush_request &= ~FLUSH_DAT_BIT;
2460 2393
2461 /* re-enable timer if checkpoint creation was not done */ 2394 /* re-enable timer if checkpoint creation was not done */
2462 if (sci->sc_timer && (sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2395 if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2463 time_before(jiffies, sci->sc_timer->expires)) 2396 time_before(jiffies, sci->sc_timer.expires))
2464 add_timer(sci->sc_timer); 2397 add_timer(&sci->sc_timer);
2465 } 2398 }
2466 spin_unlock(&sci->sc_state_lock); 2399 spin_unlock(&sci->sc_state_lock);
2467} 2400}
@@ -2640,13 +2573,10 @@ static int nilfs_segctor_thread(void *arg)
2640{ 2573{
2641 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; 2574 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2642 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 2575 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
2643 struct timer_list timer;
2644 int timeout = 0; 2576 int timeout = 0;
2645 2577
2646 init_timer(&timer); 2578 sci->sc_timer.data = (unsigned long)current;
2647 timer.data = (unsigned long)current; 2579 sci->sc_timer.function = nilfs_construction_timeout;
2648 timer.function = nilfs_construction_timeout;
2649 sci->sc_timer = &timer;
2650 2580
2651 /* start sync. */ 2581 /* start sync. */
2652 sci->sc_task = current; 2582 sci->sc_task = current;
@@ -2695,7 +2625,7 @@ static int nilfs_segctor_thread(void *arg)
2695 should_sleep = 0; 2625 should_sleep = 0;
2696 else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) 2626 else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
2697 should_sleep = time_before(jiffies, 2627 should_sleep = time_before(jiffies,
2698 sci->sc_timer->expires); 2628 sci->sc_timer.expires);
2699 2629
2700 if (should_sleep) { 2630 if (should_sleep) {
2701 spin_unlock(&sci->sc_state_lock); 2631 spin_unlock(&sci->sc_state_lock);
@@ -2704,7 +2634,7 @@ static int nilfs_segctor_thread(void *arg)
2704 } 2634 }
2705 finish_wait(&sci->sc_wait_daemon, &wait); 2635 finish_wait(&sci->sc_wait_daemon, &wait);
2706 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2636 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2707 time_after_eq(jiffies, sci->sc_timer->expires)); 2637 time_after_eq(jiffies, sci->sc_timer.expires));
2708 2638
2709 if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) 2639 if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs))
2710 set_nilfs_discontinued(nilfs); 2640 set_nilfs_discontinued(nilfs);
@@ -2713,8 +2643,6 @@ static int nilfs_segctor_thread(void *arg)
2713 2643
2714 end_thread: 2644 end_thread:
2715 spin_unlock(&sci->sc_state_lock); 2645 spin_unlock(&sci->sc_state_lock);
2716 del_timer_sync(sci->sc_timer);
2717 sci->sc_timer = NULL;
2718 2646
2719 /* end sync. */ 2647 /* end sync. */
2720 sci->sc_task = NULL; 2648 sci->sc_task = NULL;
@@ -2750,13 +2678,6 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2750 } 2678 }
2751} 2679}
2752 2680
2753static int nilfs_segctor_init(struct nilfs_sc_info *sci)
2754{
2755 sci->sc_seq_done = sci->sc_seq_request;
2756
2757 return nilfs_segctor_start_thread(sci);
2758}
2759
2760/* 2681/*
2761 * Setup & clean-up functions 2682 * Setup & clean-up functions
2762 */ 2683 */
@@ -2780,6 +2701,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi)
2780 INIT_LIST_HEAD(&sci->sc_write_logs); 2701 INIT_LIST_HEAD(&sci->sc_write_logs);
2781 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2702 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2782 INIT_LIST_HEAD(&sci->sc_copied_buffers); 2703 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2704 init_timer(&sci->sc_timer);
2783 2705
2784 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2706 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
2785 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; 2707 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
@@ -2846,6 +2768,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2846 2768
2847 down_write(&sbi->s_nilfs->ns_segctor_sem); 2769 down_write(&sbi->s_nilfs->ns_segctor_sem);
2848 2770
2771 del_timer_sync(&sci->sc_timer);
2849 kfree(sci); 2772 kfree(sci);
2850} 2773}
2851 2774
@@ -2880,7 +2803,7 @@ int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi)
2880 return -ENOMEM; 2803 return -ENOMEM;
2881 2804
2882 nilfs_attach_writer(nilfs, sbi); 2805 nilfs_attach_writer(nilfs, sbi);
2883 err = nilfs_segctor_init(NILFS_SC(sbi)); 2806 err = nilfs_segctor_start_thread(NILFS_SC(sbi));
2884 if (err) { 2807 if (err) {
2885 nilfs_detach_writer(nilfs, sbi); 2808 nilfs_detach_writer(nilfs, sbi);
2886 kfree(sbi->s_sc_info); 2809 kfree(sbi->s_sc_info);
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 82dfd6a686b9..dca142361ccf 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -100,7 +100,6 @@ struct nilfs_segsum_pointer {
100 * @sc_write_logs: List of segment buffers to hold logs under writing 100 * @sc_write_logs: List of segment buffers to hold logs under writing
101 * @sc_segbuf_nblocks: Number of available blocks in segment buffers. 101 * @sc_segbuf_nblocks: Number of available blocks in segment buffers.
102 * @sc_curseg: Current segment buffer 102 * @sc_curseg: Current segment buffer
103 * @sc_super_root: Pointer to the super root buffer
104 * @sc_stage: Collection stage 103 * @sc_stage: Collection stage
105 * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary 104 * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary
106 * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary 105 * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary
@@ -148,7 +147,6 @@ struct nilfs_sc_info {
148 struct list_head sc_write_logs; 147 struct list_head sc_write_logs;
149 unsigned long sc_segbuf_nblocks; 148 unsigned long sc_segbuf_nblocks;
150 struct nilfs_segment_buffer *sc_curseg; 149 struct nilfs_segment_buffer *sc_curseg;
151 struct buffer_head *sc_super_root;
152 150
153 struct nilfs_cstage sc_stage; 151 struct nilfs_cstage sc_stage;
154 152
@@ -179,7 +177,7 @@ struct nilfs_sc_info {
179 unsigned long sc_lseg_stime; /* in 1/HZ seconds */ 177 unsigned long sc_lseg_stime; /* in 1/HZ seconds */
180 unsigned long sc_watermark; 178 unsigned long sc_watermark;
181 179
182 struct timer_list *sc_timer; 180 struct timer_list sc_timer;
183 struct task_struct *sc_task; 181 struct task_struct *sc_task;
184}; 182};
185 183
@@ -219,6 +217,8 @@ enum {
219 */ 217 */
220#define NILFS_SC_DEFAULT_WATERMARK 3600 218#define NILFS_SC_DEFAULT_WATERMARK 3600
221 219
220/* super.c */
221extern struct kmem_cache *nilfs_transaction_cachep;
222 222
223/* segment.c */ 223/* segment.c */
224extern int nilfs_init_transaction_cache(void); 224extern int nilfs_init_transaction_cache(void);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 48145f505a6a..03b34b738993 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -67,6 +67,11 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
67 "(NILFS)"); 67 "(NILFS)");
68MODULE_LICENSE("GPL"); 68MODULE_LICENSE("GPL");
69 69
70struct kmem_cache *nilfs_inode_cachep;
71struct kmem_cache *nilfs_transaction_cachep;
72struct kmem_cache *nilfs_segbuf_cachep;
73struct kmem_cache *nilfs_btree_path_cache;
74
70static int nilfs_remount(struct super_block *sb, int *flags, char *data); 75static int nilfs_remount(struct super_block *sb, int *flags, char *data);
71 76
72/** 77/**
@@ -129,7 +134,6 @@ void nilfs_warning(struct super_block *sb, const char *function,
129 va_end(args); 134 va_end(args);
130} 135}
131 136
132static struct kmem_cache *nilfs_inode_cachep;
133 137
134struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) 138struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs)
135{ 139{
@@ -155,34 +159,6 @@ void nilfs_destroy_inode(struct inode *inode)
155 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); 159 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
156} 160}
157 161
158static void init_once(void *obj)
159{
160 struct nilfs_inode_info *ii = obj;
161
162 INIT_LIST_HEAD(&ii->i_dirty);
163#ifdef CONFIG_NILFS_XATTR
164 init_rwsem(&ii->xattr_sem);
165#endif
166 nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
167 ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union;
168 inode_init_once(&ii->vfs_inode);
169}
170
171static int nilfs_init_inode_cache(void)
172{
173 nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache",
174 sizeof(struct nilfs_inode_info),
175 0, SLAB_RECLAIM_ACCOUNT,
176 init_once);
177
178 return (nilfs_inode_cachep == NULL) ? -ENOMEM : 0;
179}
180
181static inline void nilfs_destroy_inode_cache(void)
182{
183 kmem_cache_destroy(nilfs_inode_cachep);
184}
185
186static void nilfs_clear_inode(struct inode *inode) 162static void nilfs_clear_inode(struct inode *inode)
187{ 163{
188 struct nilfs_inode_info *ii = NILFS_I(inode); 164 struct nilfs_inode_info *ii = NILFS_I(inode);
@@ -266,8 +242,8 @@ int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb)
266 int err; 242 int err;
267 243
268 /* nilfs->sem must be locked by the caller. */ 244 /* nilfs->sem must be locked by the caller. */
269 if (sbp[0]->s_magic != NILFS_SUPER_MAGIC) { 245 if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
270 if (sbp[1] && sbp[1]->s_magic == NILFS_SUPER_MAGIC) 246 if (sbp[1] && sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC))
271 nilfs_swap_super_block(nilfs); 247 nilfs_swap_super_block(nilfs);
272 else { 248 else {
273 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", 249 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
@@ -470,10 +446,10 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
470 if (nilfs_test_opt(sbi, SNAPSHOT)) 446 if (nilfs_test_opt(sbi, SNAPSHOT))
471 seq_printf(seq, ",cp=%llu", 447 seq_printf(seq, ",cp=%llu",
472 (unsigned long long int)sbi->s_snapshot_cno); 448 (unsigned long long int)sbi->s_snapshot_cno);
473 if (nilfs_test_opt(sbi, ERRORS_RO))
474 seq_printf(seq, ",errors=remount-ro");
475 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 449 if (nilfs_test_opt(sbi, ERRORS_PANIC))
476 seq_printf(seq, ",errors=panic"); 450 seq_printf(seq, ",errors=panic");
451 if (nilfs_test_opt(sbi, ERRORS_CONT))
452 seq_printf(seq, ",errors=continue");
477 if (nilfs_test_opt(sbi, STRICT_ORDER)) 453 if (nilfs_test_opt(sbi, STRICT_ORDER))
478 seq_printf(seq, ",order=strict"); 454 seq_printf(seq, ",order=strict");
479 if (nilfs_test_opt(sbi, NORECOVERY)) 455 if (nilfs_test_opt(sbi, NORECOVERY))
@@ -631,7 +607,7 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi,
631 struct nilfs_super_block *sbp) 607 struct nilfs_super_block *sbp)
632{ 608{
633 sbi->s_mount_opt = 609 sbi->s_mount_opt =
634 NILFS_MOUNT_ERRORS_CONT | NILFS_MOUNT_BARRIER; 610 NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
635} 611}
636 612
637static int nilfs_setup_super(struct nilfs_sb_info *sbi) 613static int nilfs_setup_super(struct nilfs_sb_info *sbi)
@@ -778,9 +754,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
778 goto failed_sbi; 754 goto failed_sbi;
779 } 755 }
780 cno = sbi->s_snapshot_cno; 756 cno = sbi->s_snapshot_cno;
781 } else 757 }
782 /* Read-only mount */
783 sbi->s_snapshot_cno = cno;
784 } 758 }
785 759
786 err = nilfs_attach_checkpoint(sbi, cno); 760 err = nilfs_attach_checkpoint(sbi, cno);
@@ -849,7 +823,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
849 struct the_nilfs *nilfs = sbi->s_nilfs; 823 struct the_nilfs *nilfs = sbi->s_nilfs;
850 unsigned long old_sb_flags; 824 unsigned long old_sb_flags;
851 struct nilfs_mount_options old_opts; 825 struct nilfs_mount_options old_opts;
852 int err; 826 int was_snapshot, err;
853 827
854 lock_kernel(); 828 lock_kernel();
855 829
@@ -857,6 +831,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
857 old_sb_flags = sb->s_flags; 831 old_sb_flags = sb->s_flags;
858 old_opts.mount_opt = sbi->s_mount_opt; 832 old_opts.mount_opt = sbi->s_mount_opt;
859 old_opts.snapshot_cno = sbi->s_snapshot_cno; 833 old_opts.snapshot_cno = sbi->s_snapshot_cno;
834 was_snapshot = nilfs_test_opt(sbi, SNAPSHOT);
860 835
861 if (!parse_options(data, sb)) { 836 if (!parse_options(data, sb)) {
862 err = -EINVAL; 837 err = -EINVAL;
@@ -864,20 +839,32 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
864 } 839 }
865 sb->s_flags = (sb->s_flags & ~MS_POSIXACL); 840 sb->s_flags = (sb->s_flags & ~MS_POSIXACL);
866 841
867 if ((*flags & MS_RDONLY) && 842 err = -EINVAL;
868 sbi->s_snapshot_cno != old_opts.snapshot_cno) { 843 if (was_snapshot) {
869 printk(KERN_WARNING "NILFS (device %s): couldn't " 844 if (!(*flags & MS_RDONLY)) {
870 "remount to a different snapshot.\n", 845 printk(KERN_ERR "NILFS (device %s): cannot remount "
871 sb->s_id); 846 "snapshot read/write.\n",
872 err = -EINVAL; 847 sb->s_id);
873 goto restore_opts; 848 goto restore_opts;
849 } else if (sbi->s_snapshot_cno != old_opts.snapshot_cno) {
850 printk(KERN_ERR "NILFS (device %s): cannot "
851 "remount to a different snapshot.\n",
852 sb->s_id);
853 goto restore_opts;
854 }
855 } else {
856 if (nilfs_test_opt(sbi, SNAPSHOT)) {
857 printk(KERN_ERR "NILFS (device %s): cannot change "
858 "a regular mount to a snapshot.\n",
859 sb->s_id);
860 goto restore_opts;
861 }
874 } 862 }
875 863
876 if (!nilfs_valid_fs(nilfs)) { 864 if (!nilfs_valid_fs(nilfs)) {
877 printk(KERN_WARNING "NILFS (device %s): couldn't " 865 printk(KERN_WARNING "NILFS (device %s): couldn't "
878 "remount because the filesystem is in an " 866 "remount because the filesystem is in an "
879 "incomplete recovery state.\n", sb->s_id); 867 "incomplete recovery state.\n", sb->s_id);
880 err = -EINVAL;
881 goto restore_opts; 868 goto restore_opts;
882 } 869 }
883 870
@@ -888,9 +875,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
888 nilfs_detach_segment_constructor(sbi); 875 nilfs_detach_segment_constructor(sbi);
889 sb->s_flags |= MS_RDONLY; 876 sb->s_flags |= MS_RDONLY;
890 877
891 sbi->s_snapshot_cno = nilfs_last_cno(nilfs);
892 /* nilfs_set_opt(sbi, SNAPSHOT); */
893
894 /* 878 /*
895 * Remounting a valid RW partition RDONLY, so set 879 * Remounting a valid RW partition RDONLY, so set
896 * the RDONLY flag and then mark the partition as valid again. 880 * the RDONLY flag and then mark the partition as valid again.
@@ -909,24 +893,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
909 * store the current valid flag. (It may have been changed 893 * store the current valid flag. (It may have been changed
910 * by fsck since we originally mounted the partition.) 894 * by fsck since we originally mounted the partition.)
911 */ 895 */
912 if (nilfs->ns_current && nilfs->ns_current != sbi) {
913 printk(KERN_WARNING "NILFS (device %s): couldn't "
914 "remount because an RW-mount exists.\n",
915 sb->s_id);
916 err = -EBUSY;
917 goto restore_opts;
918 }
919 if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) {
920 printk(KERN_WARNING "NILFS (device %s): couldn't "
921 "remount because the current RO-mount is not "
922 "the latest one.\n",
923 sb->s_id);
924 err = -EINVAL;
925 goto restore_opts;
926 }
927 sb->s_flags &= ~MS_RDONLY; 896 sb->s_flags &= ~MS_RDONLY;
928 nilfs_clear_opt(sbi, SNAPSHOT);
929 sbi->s_snapshot_cno = 0;
930 897
931 err = nilfs_attach_segment_constructor(sbi); 898 err = nilfs_attach_segment_constructor(sbi);
932 if (err) 899 if (err)
@@ -935,8 +902,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
935 down_write(&nilfs->ns_sem); 902 down_write(&nilfs->ns_sem);
936 nilfs_setup_super(sbi); 903 nilfs_setup_super(sbi);
937 up_write(&nilfs->ns_sem); 904 up_write(&nilfs->ns_sem);
938
939 nilfs->ns_current = sbi;
940 } 905 }
941 out: 906 out:
942 up_write(&nilfs->ns_super_sem); 907 up_write(&nilfs->ns_super_sem);
@@ -1022,10 +987,14 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1022{ 987{
1023 struct nilfs_super_data sd; 988 struct nilfs_super_data sd;
1024 struct super_block *s; 989 struct super_block *s;
990 fmode_t mode = FMODE_READ;
1025 struct the_nilfs *nilfs; 991 struct the_nilfs *nilfs;
1026 int err, need_to_close = 1; 992 int err, need_to_close = 1;
1027 993
1028 sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); 994 if (!(flags & MS_RDONLY))
995 mode |= FMODE_WRITE;
996
997 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1029 if (IS_ERR(sd.bdev)) 998 if (IS_ERR(sd.bdev))
1030 return PTR_ERR(sd.bdev); 999 return PTR_ERR(sd.bdev);
1031 1000
@@ -1092,10 +1061,12 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1092 1061
1093 /* New superblock instance created */ 1062 /* New superblock instance created */
1094 s->s_flags = flags; 1063 s->s_flags = flags;
1064 s->s_mode = mode;
1095 strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); 1065 strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
1096 sb_set_blocksize(s, block_size(sd.bdev)); 1066 sb_set_blocksize(s, block_size(sd.bdev));
1097 1067
1098 err = nilfs_fill_super(s, data, flags & MS_VERBOSE, nilfs); 1068 err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0,
1069 nilfs);
1099 if (err) 1070 if (err)
1100 goto cancel_new; 1071 goto cancel_new;
1101 1072
@@ -1106,7 +1077,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1106 mutex_unlock(&nilfs->ns_mount_mutex); 1077 mutex_unlock(&nilfs->ns_mount_mutex);
1107 put_nilfs(nilfs); 1078 put_nilfs(nilfs);
1108 if (need_to_close) 1079 if (need_to_close)
1109 close_bdev_exclusive(sd.bdev, flags); 1080 close_bdev_exclusive(sd.bdev, mode);
1110 simple_set_mnt(mnt, s); 1081 simple_set_mnt(mnt, s);
1111 return 0; 1082 return 0;
1112 1083
@@ -1114,7 +1085,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1114 mutex_unlock(&nilfs->ns_mount_mutex); 1085 mutex_unlock(&nilfs->ns_mount_mutex);
1115 put_nilfs(nilfs); 1086 put_nilfs(nilfs);
1116 failed: 1087 failed:
1117 close_bdev_exclusive(sd.bdev, flags); 1088 close_bdev_exclusive(sd.bdev, mode);
1118 1089
1119 return err; 1090 return err;
1120 1091
@@ -1124,7 +1095,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1124 put_nilfs(nilfs); 1095 put_nilfs(nilfs);
1125 deactivate_locked_super(s); 1096 deactivate_locked_super(s);
1126 /* 1097 /*
1127 * deactivate_super() invokes close_bdev_exclusive(). 1098 * deactivate_locked_super() invokes close_bdev_exclusive().
1128 * We must finish all post-cleaning before this call; 1099 * We must finish all post-cleaning before this call;
1129 * put_nilfs() needs the block device. 1100 * put_nilfs() needs the block device.
1130 */ 1101 */
@@ -1139,54 +1110,93 @@ struct file_system_type nilfs_fs_type = {
1139 .fs_flags = FS_REQUIRES_DEV, 1110 .fs_flags = FS_REQUIRES_DEV,
1140}; 1111};
1141 1112
1142static int __init init_nilfs_fs(void) 1113static void nilfs_inode_init_once(void *obj)
1143{ 1114{
1144 int err; 1115 struct nilfs_inode_info *ii = obj;
1145
1146 err = nilfs_init_inode_cache();
1147 if (err)
1148 goto failed;
1149 1116
1150 err = nilfs_init_transaction_cache(); 1117 INIT_LIST_HEAD(&ii->i_dirty);
1151 if (err) 1118#ifdef CONFIG_NILFS_XATTR
1152 goto failed_inode_cache; 1119 init_rwsem(&ii->xattr_sem);
1120#endif
1121 nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
1122 ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union;
1123 inode_init_once(&ii->vfs_inode);
1124}
1153 1125
1154 err = nilfs_init_segbuf_cache(); 1126static void nilfs_segbuf_init_once(void *obj)
1155 if (err) 1127{
1156 goto failed_transaction_cache; 1128 memset(obj, 0, sizeof(struct nilfs_segment_buffer));
1129}
1157 1130
1158 err = nilfs_btree_path_cache_init(); 1131static void nilfs_destroy_cachep(void)
1159 if (err) 1132{
1160 goto failed_segbuf_cache; 1133 if (nilfs_inode_cachep)
1134 kmem_cache_destroy(nilfs_inode_cachep);
1135 if (nilfs_transaction_cachep)
1136 kmem_cache_destroy(nilfs_transaction_cachep);
1137 if (nilfs_segbuf_cachep)
1138 kmem_cache_destroy(nilfs_segbuf_cachep);
1139 if (nilfs_btree_path_cache)
1140 kmem_cache_destroy(nilfs_btree_path_cache);
1141}
1161 1142
1162 err = register_filesystem(&nilfs_fs_type); 1143static int __init nilfs_init_cachep(void)
1163 if (err) 1144{
1164 goto failed_btree_path_cache; 1145 nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache",
1146 sizeof(struct nilfs_inode_info), 0,
1147 SLAB_RECLAIM_ACCOUNT, nilfs_inode_init_once);
1148 if (!nilfs_inode_cachep)
1149 goto fail;
1150
1151 nilfs_transaction_cachep = kmem_cache_create("nilfs2_transaction_cache",
1152 sizeof(struct nilfs_transaction_info), 0,
1153 SLAB_RECLAIM_ACCOUNT, NULL);
1154 if (!nilfs_transaction_cachep)
1155 goto fail;
1156
1157 nilfs_segbuf_cachep = kmem_cache_create("nilfs2_segbuf_cache",
1158 sizeof(struct nilfs_segment_buffer), 0,
1159 SLAB_RECLAIM_ACCOUNT, nilfs_segbuf_init_once);
1160 if (!nilfs_segbuf_cachep)
1161 goto fail;
1162
1163 nilfs_btree_path_cache = kmem_cache_create("nilfs2_btree_path_cache",
1164 sizeof(struct nilfs_btree_path) * NILFS_BTREE_LEVEL_MAX,
1165 0, 0, NULL);
1166 if (!nilfs_btree_path_cache)
1167 goto fail;
1165 1168
1166 return 0; 1169 return 0;
1167 1170
1168 failed_btree_path_cache: 1171fail:
1169 nilfs_btree_path_cache_destroy(); 1172 nilfs_destroy_cachep();
1173 return -ENOMEM;
1174}
1175
1176static int __init init_nilfs_fs(void)
1177{
1178 int err;
1170 1179
1171 failed_segbuf_cache: 1180 err = nilfs_init_cachep();
1172 nilfs_destroy_segbuf_cache(); 1181 if (err)
1182 goto fail;
1173 1183
1174 failed_transaction_cache: 1184 err = register_filesystem(&nilfs_fs_type);
1175 nilfs_destroy_transaction_cache(); 1185 if (err)
1186 goto free_cachep;
1176 1187
1177 failed_inode_cache: 1188 printk(KERN_INFO "NILFS version 2 loaded\n");
1178 nilfs_destroy_inode_cache(); 1189 return 0;
1179 1190
1180 failed: 1191free_cachep:
1192 nilfs_destroy_cachep();
1193fail:
1181 return err; 1194 return err;
1182} 1195}
1183 1196
1184static void __exit exit_nilfs_fs(void) 1197static void __exit exit_nilfs_fs(void)
1185{ 1198{
1186 nilfs_destroy_segbuf_cache(); 1199 nilfs_destroy_cachep();
1187 nilfs_destroy_transaction_cache();
1188 nilfs_destroy_inode_cache();
1189 nilfs_btree_path_cache_destroy();
1190 unregister_filesystem(&nilfs_fs_type); 1200 unregister_filesystem(&nilfs_fs_type);
1191} 1201}
1192 1202
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 33871f7e4f01..8c1097327abc 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -486,11 +486,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
486 printk(KERN_WARNING 486 printk(KERN_WARNING
487 "NILFS warning: unable to read secondary superblock\n"); 487 "NILFS warning: unable to read secondary superblock\n");
488 488
489 /*
490 * Compare two super blocks and set 1 in swp if the secondary
491 * super block is valid and newer. Otherwise, set 0 in swp.
492 */
489 valid[0] = nilfs_valid_sb(sbp[0]); 493 valid[0] = nilfs_valid_sb(sbp[0]);
490 valid[1] = nilfs_valid_sb(sbp[1]); 494 valid[1] = nilfs_valid_sb(sbp[1]);
491 swp = valid[1] && 495 swp = valid[1] && (!valid[0] ||
492 (!valid[0] || 496 le64_to_cpu(sbp[1]->s_last_cno) >
493 le64_to_cpu(sbp[1]->s_wtime) > le64_to_cpu(sbp[0]->s_wtime)); 497 le64_to_cpu(sbp[0]->s_last_cno));
494 498
495 if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) { 499 if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) {
496 brelse(sbh[1]); 500 brelse(sbh[1]);
@@ -670,7 +674,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
670 start * sects_per_block, 674 start * sects_per_block,
671 nblocks * sects_per_block, 675 nblocks * sects_per_block,
672 GFP_NOFS, 676 GFP_NOFS,
673 DISCARD_FL_BARRIER); 677 BLKDEV_IFL_BARRIER);
674 if (ret < 0) 678 if (ret < 0)
675 return ret; 679 return ret;
676 nblocks = 0; 680 nblocks = 0;
@@ -680,7 +684,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
680 ret = blkdev_issue_discard(nilfs->ns_bdev, 684 ret = blkdev_issue_discard(nilfs->ns_bdev,
681 start * sects_per_block, 685 start * sects_per_block,
682 nblocks * sects_per_block, 686 nblocks * sects_per_block,
683 GFP_NOFS, DISCARD_FL_BARRIER); 687 GFP_NOFS, BLKDEV_IFL_BARRIER);
684 return ret; 688 return ret;
685} 689}
686 690
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 40b1cf914ccb..27b75ebc7460 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -110,14 +110,10 @@ EXPORT_SYMBOL_GPL(get_inotify_watch);
110int pin_inotify_watch(struct inotify_watch *watch) 110int pin_inotify_watch(struct inotify_watch *watch)
111{ 111{
112 struct super_block *sb = watch->inode->i_sb; 112 struct super_block *sb = watch->inode->i_sb;
113 spin_lock(&sb_lock); 113 if (atomic_inc_not_zero(&sb->s_active)) {
114 if (sb->s_count >= S_BIAS) {
115 atomic_inc(&sb->s_active);
116 spin_unlock(&sb_lock);
117 atomic_inc(&watch->count); 114 atomic_inc(&watch->count);
118 return 1; 115 return 1;
119 } 116 }
120 spin_unlock(&sb_lock);
121 return 0; 117 return 0;
122} 118}
123 119
@@ -515,34 +511,8 @@ EXPORT_SYMBOL_GPL(inotify_init_watch);
515 * done. Cleanup is just deactivate_super(). However, that leaves a messy 511 * done. Cleanup is just deactivate_super(). However, that leaves a messy
516 * case - what if we *are* racing with umount() and active references to 512 * case - what if we *are* racing with umount() and active references to
517 * superblock can't be acquired anymore? We can bump ->s_count, grab 513 * superblock can't be acquired anymore? We can bump ->s_count, grab
518 * ->s_umount, which will almost certainly wait until the superblock is shut 514 * ->s_umount, which will wait until the superblock is shut down and the
519 * down and the watch in question is pining for fjords. That's fine, but 515 * watch in question is pining for fjords.
520 * there is a problem - we might have hit the window between ->s_active
521 * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
522 * is past the point of no return and is heading for shutdown) and the
523 * moment when deactivate_super() acquires ->s_umount. We could just do
524 * drop_super() yield() and retry, but that's rather antisocial and this
525 * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having
526 * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
527 * that we won't race with inotify_umount_inodes(). So we could grab a
528 * reference to watch and do the rest as above, just with drop_super() instead
529 * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we
530 * could grab ->s_umount. So the watch could've been gone already.
531 *
532 * That still can be dealt with - we need to save watch->wd, do idr_find()
533 * and compare its result with our pointer. If they match, we either have
534 * the damn thing still alive or we'd lost not one but two races at once,
535 * the watch had been killed and a new one got created with the same ->wd
536 * at the same address. That couldn't have happened in inotify_destroy(),
537 * but inotify_rm_wd() could run into that. Still, "new one got created"
538 * is not a problem - we have every right to kill it or leave it alone,
539 * whatever's more convenient.
540 *
541 * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
542 * "grab it and kill it" check. If it's been our original watch, we are
543 * fine, if it's a newcomer - nevermind, just pretend that we'd won the
544 * race and kill the fscker anyway; we are safe since we know that its
545 * superblock won't be going away.
546 * 516 *
547 * And yes, this is far beyond mere "not very pretty"; so's the entire 517 * And yes, this is far beyond mere "not very pretty"; so's the entire
548 * concept of inotify to start with. 518 * concept of inotify to start with.
@@ -556,57 +526,31 @@ EXPORT_SYMBOL_GPL(inotify_init_watch);
556 * Called with ih->mutex held, drops it. Possible return values: 526 * Called with ih->mutex held, drops it. Possible return values:
557 * 0 - nothing to do, it has died 527 * 0 - nothing to do, it has died
558 * 1 - remove it, drop the reference and deactivate_super() 528 * 1 - remove it, drop the reference and deactivate_super()
559 * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
560 * that variant, since it involved a lot of PITA, but that's the best that
561 * could've been done.
562 */ 529 */
563static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) 530static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
564{ 531{
565 struct super_block *sb = watch->inode->i_sb; 532 struct super_block *sb = watch->inode->i_sb;
566 s32 wd = watch->wd;
567 533
568 spin_lock(&sb_lock); 534 if (atomic_inc_not_zero(&sb->s_active)) {
569 if (sb->s_count >= S_BIAS) {
570 atomic_inc(&sb->s_active);
571 spin_unlock(&sb_lock);
572 get_inotify_watch(watch); 535 get_inotify_watch(watch);
573 mutex_unlock(&ih->mutex); 536 mutex_unlock(&ih->mutex);
574 return 1; /* the best outcome */ 537 return 1; /* the best outcome */
575 } 538 }
539 spin_lock(&sb_lock);
576 sb->s_count++; 540 sb->s_count++;
577 spin_unlock(&sb_lock); 541 spin_unlock(&sb_lock);
578 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ 542 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
579 down_read(&sb->s_umount); 543 down_read(&sb->s_umount);
580 if (likely(!sb->s_root)) { 544 /* fs is already shut down; the watch is dead */
581 /* fs is already shut down; the watch is dead */ 545 drop_super(sb);
582 drop_super(sb); 546 return 0;
583 return 0;
584 }
585 /* raced with the final deactivate_super() */
586 mutex_lock(&ih->mutex);
587 if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
588 /* the watch is dead */
589 mutex_unlock(&ih->mutex);
590 drop_super(sb);
591 return 0;
592 }
593 /* still alive or freed and reused with the same sb and wd; kill */
594 get_inotify_watch(watch);
595 mutex_unlock(&ih->mutex);
596 return 2;
597} 547}
598 548
599static void unpin_and_kill(struct inotify_watch *watch, int how) 549static void unpin_and_kill(struct inotify_watch *watch)
600{ 550{
601 struct super_block *sb = watch->inode->i_sb; 551 struct super_block *sb = watch->inode->i_sb;
602 put_inotify_watch(watch); 552 put_inotify_watch(watch);
603 switch (how) { 553 deactivate_super(sb);
604 case 1:
605 deactivate_super(sb);
606 break;
607 case 2:
608 drop_super(sb);
609 }
610} 554}
611 555
612/** 556/**
@@ -628,7 +572,6 @@ void inotify_destroy(struct inotify_handle *ih)
628 struct list_head *watches; 572 struct list_head *watches;
629 struct super_block *sb; 573 struct super_block *sb;
630 struct inode *inode; 574 struct inode *inode;
631 int how;
632 575
633 mutex_lock(&ih->mutex); 576 mutex_lock(&ih->mutex);
634 watches = &ih->watches; 577 watches = &ih->watches;
@@ -638,8 +581,7 @@ void inotify_destroy(struct inotify_handle *ih)
638 } 581 }
639 watch = list_first_entry(watches, struct inotify_watch, h_list); 582 watch = list_first_entry(watches, struct inotify_watch, h_list);
640 sb = watch->inode->i_sb; 583 sb = watch->inode->i_sb;
641 how = pin_to_kill(ih, watch); 584 if (!pin_to_kill(ih, watch))
642 if (!how)
643 continue; 585 continue;
644 586
645 inode = watch->inode; 587 inode = watch->inode;
@@ -654,7 +596,7 @@ void inotify_destroy(struct inotify_handle *ih)
654 596
655 mutex_unlock(&ih->mutex); 597 mutex_unlock(&ih->mutex);
656 mutex_unlock(&inode->inotify_mutex); 598 mutex_unlock(&inode->inotify_mutex);
657 unpin_and_kill(watch, how); 599 unpin_and_kill(watch);
658 } 600 }
659 601
660 /* free this handle: the put matching the get in inotify_init() */ 602 /* free this handle: the put matching the get in inotify_init() */
@@ -857,7 +799,6 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
857 struct inotify_watch *watch; 799 struct inotify_watch *watch;
858 struct super_block *sb; 800 struct super_block *sb;
859 struct inode *inode; 801 struct inode *inode;
860 int how;
861 802
862 mutex_lock(&ih->mutex); 803 mutex_lock(&ih->mutex);
863 watch = idr_find(&ih->idr, wd); 804 watch = idr_find(&ih->idr, wd);
@@ -866,8 +807,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
866 return -EINVAL; 807 return -EINVAL;
867 } 808 }
868 sb = watch->inode->i_sb; 809 sb = watch->inode->i_sb;
869 how = pin_to_kill(ih, watch); 810 if (!pin_to_kill(ih, watch))
870 if (!how)
871 return 0; 811 return 0;
872 812
873 inode = watch->inode; 813 inode = watch->inode;
@@ -881,7 +821,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
881 821
882 mutex_unlock(&ih->mutex); 822 mutex_unlock(&ih->mutex);
883 mutex_unlock(&inode->inotify_mutex); 823 mutex_unlock(&inode->inotify_mutex);
884 unpin_and_kill(watch, how); 824 unpin_and_kill(watch);
885 825
886 return 0; 826 return 0;
887} 827}
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 791c0886c060..07d9fd854350 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -29,6 +29,7 @@ ocfs2-objs := \
29 mmap.o \ 29 mmap.o \
30 namei.o \ 30 namei.o \
31 refcounttree.o \ 31 refcounttree.o \
32 reservations.o \
32 resize.o \ 33 resize.o \
33 slot_map.o \ 34 slot_map.o \
34 suballoc.o \ 35 suballoc.o \
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index e13fc9e8fcdc..da702294d7e7 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -489,7 +489,7 @@ cleanup:
489 return ret; 489 return ret;
490} 490}
491 491
492struct xattr_handler ocfs2_xattr_acl_access_handler = { 492const struct xattr_handler ocfs2_xattr_acl_access_handler = {
493 .prefix = POSIX_ACL_XATTR_ACCESS, 493 .prefix = POSIX_ACL_XATTR_ACCESS,
494 .flags = ACL_TYPE_ACCESS, 494 .flags = ACL_TYPE_ACCESS,
495 .list = ocfs2_xattr_list_acl_access, 495 .list = ocfs2_xattr_list_acl_access,
@@ -497,7 +497,7 @@ struct xattr_handler ocfs2_xattr_acl_access_handler = {
497 .set = ocfs2_xattr_set_acl, 497 .set = ocfs2_xattr_set_acl,
498}; 498};
499 499
500struct xattr_handler ocfs2_xattr_acl_default_handler = { 500const struct xattr_handler ocfs2_xattr_acl_default_handler = {
501 .prefix = POSIX_ACL_XATTR_DEFAULT, 501 .prefix = POSIX_ACL_XATTR_DEFAULT,
502 .flags = ACL_TYPE_DEFAULT, 502 .flags = ACL_TYPE_DEFAULT,
503 .list = ocfs2_xattr_list_acl_default, 503 .list = ocfs2_xattr_list_acl_default,
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9f8bd913c51e..215e12ce1d85 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1006,7 +1006,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1006 int count, status, i; 1006 int count, status, i;
1007 u16 suballoc_bit_start; 1007 u16 suballoc_bit_start;
1008 u32 num_got; 1008 u32 num_got;
1009 u64 first_blkno; 1009 u64 suballoc_loc, first_blkno;
1010 struct ocfs2_super *osb = 1010 struct ocfs2_super *osb =
1011 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); 1011 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
1012 struct ocfs2_extent_block *eb; 1012 struct ocfs2_extent_block *eb;
@@ -1015,10 +1015,10 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1015 1015
1016 count = 0; 1016 count = 0;
1017 while (count < wanted) { 1017 while (count < wanted) {
1018 status = ocfs2_claim_metadata(osb, 1018 status = ocfs2_claim_metadata(handle,
1019 handle,
1020 meta_ac, 1019 meta_ac,
1021 wanted - count, 1020 wanted - count,
1021 &suballoc_loc,
1022 &suballoc_bit_start, 1022 &suballoc_bit_start,
1023 &num_got, 1023 &num_got,
1024 &first_blkno); 1024 &first_blkno);
@@ -1052,6 +1052,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1052 eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 1052 eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
1053 eb->h_suballoc_slot = 1053 eb->h_suballoc_slot =
1054 cpu_to_le16(meta_ac->ac_alloc_slot); 1054 cpu_to_le16(meta_ac->ac_alloc_slot);
1055 eb->h_suballoc_loc = cpu_to_le64(suballoc_loc);
1055 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1056 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1056 eb->h_list.l_count = 1057 eb->h_list.l_count =
1057 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 1058 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
@@ -1061,11 +1062,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1061 1062
1062 /* We'll also be dirtied by the caller, so 1063 /* We'll also be dirtied by the caller, so
1063 * this isn't absolutely necessary. */ 1064 * this isn't absolutely necessary. */
1064 status = ocfs2_journal_dirty(handle, bhs[i]); 1065 ocfs2_journal_dirty(handle, bhs[i]);
1065 if (status < 0) {
1066 mlog_errno(status);
1067 goto bail;
1068 }
1069 } 1066 }
1070 1067
1071 count += num_got; 1068 count += num_got;
@@ -1129,8 +1126,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1129 goto out; 1126 goto out;
1130 } 1127 }
1131 1128
1132 status = ocfs2_extend_trans(handle, path_num_items(path) + 1129 status = ocfs2_extend_trans(handle, path_num_items(path));
1133 handle->h_buffer_credits);
1134 if (status < 0) { 1130 if (status < 0) {
1135 mlog_errno(status); 1131 mlog_errno(status);
1136 goto out; 1132 goto out;
@@ -1270,12 +1266,7 @@ static int ocfs2_add_branch(handle_t *handle,
1270 if (!eb_el->l_tree_depth) 1266 if (!eb_el->l_tree_depth)
1271 new_last_eb_blk = le64_to_cpu(eb->h_blkno); 1267 new_last_eb_blk = le64_to_cpu(eb->h_blkno);
1272 1268
1273 status = ocfs2_journal_dirty(handle, bh); 1269 ocfs2_journal_dirty(handle, bh);
1274 if (status < 0) {
1275 mlog_errno(status);
1276 goto bail;
1277 }
1278
1279 next_blkno = le64_to_cpu(eb->h_blkno); 1270 next_blkno = le64_to_cpu(eb->h_blkno);
1280 } 1271 }
1281 1272
@@ -1321,17 +1312,10 @@ static int ocfs2_add_branch(handle_t *handle,
1321 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; 1312 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
1322 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 1313 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
1323 1314
1324 status = ocfs2_journal_dirty(handle, *last_eb_bh); 1315 ocfs2_journal_dirty(handle, *last_eb_bh);
1325 if (status < 0) 1316 ocfs2_journal_dirty(handle, et->et_root_bh);
1326 mlog_errno(status); 1317 if (eb_bh)
1327 status = ocfs2_journal_dirty(handle, et->et_root_bh); 1318 ocfs2_journal_dirty(handle, eb_bh);
1328 if (status < 0)
1329 mlog_errno(status);
1330 if (eb_bh) {
1331 status = ocfs2_journal_dirty(handle, eb_bh);
1332 if (status < 0)
1333 mlog_errno(status);
1334 }
1335 1319
1336 /* 1320 /*
1337 * Some callers want to track the rightmost leaf so pass it 1321 * Some callers want to track the rightmost leaf so pass it
@@ -1399,11 +1383,7 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1399 for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++) 1383 for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
1400 eb_el->l_recs[i] = root_el->l_recs[i]; 1384 eb_el->l_recs[i] = root_el->l_recs[i];
1401 1385
1402 status = ocfs2_journal_dirty(handle, new_eb_bh); 1386 ocfs2_journal_dirty(handle, new_eb_bh);
1403 if (status < 0) {
1404 mlog_errno(status);
1405 goto bail;
1406 }
1407 1387
1408 status = ocfs2_et_root_journal_access(handle, et, 1388 status = ocfs2_et_root_journal_access(handle, et,
1409 OCFS2_JOURNAL_ACCESS_WRITE); 1389 OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1428,11 +1408,7 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1428 if (root_el->l_tree_depth == cpu_to_le16(1)) 1408 if (root_el->l_tree_depth == cpu_to_le16(1))
1429 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 1409 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
1430 1410
1431 status = ocfs2_journal_dirty(handle, et->et_root_bh); 1411 ocfs2_journal_dirty(handle, et->et_root_bh);
1432 if (status < 0) {
1433 mlog_errno(status);
1434 goto bail;
1435 }
1436 1412
1437 *ret_new_eb_bh = new_eb_bh; 1413 *ret_new_eb_bh = new_eb_bh;
1438 new_eb_bh = NULL; 1414 new_eb_bh = NULL;
@@ -2064,7 +2040,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2064 struct ocfs2_path *right_path, 2040 struct ocfs2_path *right_path,
2065 int subtree_index) 2041 int subtree_index)
2066{ 2042{
2067 int ret, i, idx; 2043 int i, idx;
2068 struct ocfs2_extent_list *el, *left_el, *right_el; 2044 struct ocfs2_extent_list *el, *left_el, *right_el;
2069 struct ocfs2_extent_rec *left_rec, *right_rec; 2045 struct ocfs2_extent_rec *left_rec, *right_rec;
2070 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; 2046 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
@@ -2102,13 +2078,8 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2102 ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec, 2078 ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec,
2103 right_el); 2079 right_el);
2104 2080
2105 ret = ocfs2_journal_dirty(handle, left_path->p_node[i].bh); 2081 ocfs2_journal_dirty(handle, left_path->p_node[i].bh);
2106 if (ret) 2082 ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
2107 mlog_errno(ret);
2108
2109 ret = ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
2110 if (ret)
2111 mlog_errno(ret);
2112 2083
2113 /* 2084 /*
2114 * Setup our list pointers now so that the current 2085 * Setup our list pointers now so that the current
@@ -2132,9 +2103,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2132 2103
2133 root_bh = left_path->p_node[subtree_index].bh; 2104 root_bh = left_path->p_node[subtree_index].bh;
2134 2105
2135 ret = ocfs2_journal_dirty(handle, root_bh); 2106 ocfs2_journal_dirty(handle, root_bh);
2136 if (ret)
2137 mlog_errno(ret);
2138} 2107}
2139 2108
2140static int ocfs2_rotate_subtree_right(handle_t *handle, 2109static int ocfs2_rotate_subtree_right(handle_t *handle,
@@ -2207,11 +2176,7 @@ static int ocfs2_rotate_subtree_right(handle_t *handle,
2207 2176
2208 ocfs2_create_empty_extent(right_el); 2177 ocfs2_create_empty_extent(right_el);
2209 2178
2210 ret = ocfs2_journal_dirty(handle, right_leaf_bh); 2179 ocfs2_journal_dirty(handle, right_leaf_bh);
2211 if (ret) {
2212 mlog_errno(ret);
2213 goto out;
2214 }
2215 2180
2216 /* Do the copy now. */ 2181 /* Do the copy now. */
2217 i = le16_to_cpu(left_el->l_next_free_rec) - 1; 2182 i = le16_to_cpu(left_el->l_next_free_rec) - 1;
@@ -2230,11 +2195,7 @@ static int ocfs2_rotate_subtree_right(handle_t *handle,
2230 memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 2195 memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2231 le16_add_cpu(&left_el->l_next_free_rec, 1); 2196 le16_add_cpu(&left_el->l_next_free_rec, 1);
2232 2197
2233 ret = ocfs2_journal_dirty(handle, left_leaf_bh); 2198 ocfs2_journal_dirty(handle, left_leaf_bh);
2234 if (ret) {
2235 mlog_errno(ret);
2236 goto out;
2237 }
2238 2199
2239 ocfs2_complete_edge_insert(handle, left_path, right_path, 2200 ocfs2_complete_edge_insert(handle, left_path, right_path,
2240 subtree_index); 2201 subtree_index);
@@ -2249,8 +2210,8 @@ out:
2249 * 2210 *
2250 * Will return zero if the path passed in is already the leftmost path. 2211 * Will return zero if the path passed in is already the leftmost path.
2251 */ 2212 */
2252static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, 2213int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
2253 struct ocfs2_path *path, u32 *cpos) 2214 struct ocfs2_path *path, u32 *cpos)
2254{ 2215{
2255 int i, j, ret = 0; 2216 int i, j, ret = 0;
2256 u64 blkno; 2217 u64 blkno;
@@ -2327,20 +2288,14 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
2327 int op_credits, 2288 int op_credits,
2328 struct ocfs2_path *path) 2289 struct ocfs2_path *path)
2329{ 2290{
2330 int ret; 2291 int ret = 0;
2331 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; 2292 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
2332 2293
2333 if (handle->h_buffer_credits < credits) { 2294 if (handle->h_buffer_credits < credits)
2334 ret = ocfs2_extend_trans(handle, 2295 ret = ocfs2_extend_trans(handle,
2335 credits - handle->h_buffer_credits); 2296 credits - handle->h_buffer_credits);
2336 if (ret)
2337 return ret;
2338 2297
2339 if (unlikely(handle->h_buffer_credits < credits)) 2298 return ret;
2340 return ocfs2_extend_trans(handle, credits);
2341 }
2342
2343 return 0;
2344} 2299}
2345 2300
2346/* 2301/*
@@ -2584,8 +2539,7 @@ static int ocfs2_update_edge_lengths(handle_t *handle,
2584 * records for all the bh in the path. 2539 * records for all the bh in the path.
2585 * So we have to allocate extra credits and access them. 2540 * So we have to allocate extra credits and access them.
2586 */ 2541 */
2587 ret = ocfs2_extend_trans(handle, 2542 ret = ocfs2_extend_trans(handle, subtree_index);
2588 handle->h_buffer_credits + subtree_index);
2589 if (ret) { 2543 if (ret) {
2590 mlog_errno(ret); 2544 mlog_errno(ret);
2591 goto out; 2545 goto out;
@@ -2823,12 +2777,8 @@ static int ocfs2_rotate_subtree_left(handle_t *handle,
2823 ocfs2_remove_empty_extent(right_leaf_el); 2777 ocfs2_remove_empty_extent(right_leaf_el);
2824 } 2778 }
2825 2779
2826 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 2780 ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
2827 if (ret) 2781 ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
2828 mlog_errno(ret);
2829 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
2830 if (ret)
2831 mlog_errno(ret);
2832 2782
2833 if (del_right_subtree) { 2783 if (del_right_subtree) {
2834 ocfs2_unlink_subtree(handle, et, left_path, right_path, 2784 ocfs2_unlink_subtree(handle, et, left_path, right_path,
@@ -2851,9 +2801,7 @@ static int ocfs2_rotate_subtree_left(handle_t *handle,
2851 if (right_has_empty) 2801 if (right_has_empty)
2852 ocfs2_remove_empty_extent(left_leaf_el); 2802 ocfs2_remove_empty_extent(left_leaf_el);
2853 2803
2854 ret = ocfs2_journal_dirty(handle, et_root_bh); 2804 ocfs2_journal_dirty(handle, et_root_bh);
2855 if (ret)
2856 mlog_errno(ret);
2857 2805
2858 *deleted = 1; 2806 *deleted = 1;
2859 } else 2807 } else
@@ -2962,10 +2910,7 @@ static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
2962 } 2910 }
2963 2911
2964 ocfs2_remove_empty_extent(el); 2912 ocfs2_remove_empty_extent(el);
2965 2913 ocfs2_journal_dirty(handle, bh);
2966 ret = ocfs2_journal_dirty(handle, bh);
2967 if (ret)
2968 mlog_errno(ret);
2969 2914
2970out: 2915out:
2971 return ret; 2916 return ret;
@@ -3506,15 +3451,9 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3506 3451
3507 ocfs2_cleanup_merge(el, index); 3452 ocfs2_cleanup_merge(el, index);
3508 3453
3509 ret = ocfs2_journal_dirty(handle, bh); 3454 ocfs2_journal_dirty(handle, bh);
3510 if (ret)
3511 mlog_errno(ret);
3512
3513 if (right_path) { 3455 if (right_path) {
3514 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); 3456 ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
3515 if (ret)
3516 mlog_errno(ret);
3517
3518 ocfs2_complete_edge_insert(handle, left_path, right_path, 3457 ocfs2_complete_edge_insert(handle, left_path, right_path,
3519 subtree_index); 3458 subtree_index);
3520 } 3459 }
@@ -3683,14 +3622,9 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3683 3622
3684 ocfs2_cleanup_merge(el, index); 3623 ocfs2_cleanup_merge(el, index);
3685 3624
3686 ret = ocfs2_journal_dirty(handle, bh); 3625 ocfs2_journal_dirty(handle, bh);
3687 if (ret)
3688 mlog_errno(ret);
3689
3690 if (left_path) { 3626 if (left_path) {
3691 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 3627 ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
3692 if (ret)
3693 mlog_errno(ret);
3694 3628
3695 /* 3629 /*
3696 * In the situation that the right_rec is empty and the extent 3630 * In the situation that the right_rec is empty and the extent
@@ -4016,10 +3950,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
4016 le32_add_cpu(&rec->e_int_clusters, 3950 le32_add_cpu(&rec->e_int_clusters,
4017 -le32_to_cpu(rec->e_cpos)); 3951 -le32_to_cpu(rec->e_cpos));
4018 3952
4019 ret = ocfs2_journal_dirty(handle, bh); 3953 ocfs2_journal_dirty(handle, bh);
4020 if (ret)
4021 mlog_errno(ret);
4022
4023 } 3954 }
4024} 3955}
4025 3956
@@ -4203,17 +4134,13 @@ static int ocfs2_insert_path(handle_t *handle,
4203 struct buffer_head *leaf_bh = path_leaf_bh(right_path); 4134 struct buffer_head *leaf_bh = path_leaf_bh(right_path);
4204 4135
4205 if (left_path) { 4136 if (left_path) {
4206 int credits = handle->h_buffer_credits;
4207
4208 /* 4137 /*
4209 * There's a chance that left_path got passed back to 4138 * There's a chance that left_path got passed back to
4210 * us without being accounted for in the 4139 * us without being accounted for in the
4211 * journal. Extend our transaction here to be sure we 4140 * journal. Extend our transaction here to be sure we
4212 * can change those blocks. 4141 * can change those blocks.
4213 */ 4142 */
4214 credits += left_path->p_tree_depth; 4143 ret = ocfs2_extend_trans(handle, left_path->p_tree_depth);
4215
4216 ret = ocfs2_extend_trans(handle, credits);
4217 if (ret < 0) { 4144 if (ret < 0) {
4218 mlog_errno(ret); 4145 mlog_errno(ret);
4219 goto out; 4146 goto out;
@@ -4251,17 +4178,13 @@ static int ocfs2_insert_path(handle_t *handle,
4251 * dirty this for us. 4178 * dirty this for us.
4252 */ 4179 */
4253 if (left_path) 4180 if (left_path)
4254 ret = ocfs2_journal_dirty(handle, 4181 ocfs2_journal_dirty(handle,
4255 path_leaf_bh(left_path)); 4182 path_leaf_bh(left_path));
4256 if (ret)
4257 mlog_errno(ret);
4258 } else 4183 } else
4259 ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path), 4184 ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
4260 insert); 4185 insert);
4261 4186
4262 ret = ocfs2_journal_dirty(handle, leaf_bh); 4187 ocfs2_journal_dirty(handle, leaf_bh);
4263 if (ret)
4264 mlog_errno(ret);
4265 4188
4266 if (left_path) { 4189 if (left_path) {
4267 /* 4190 /*
@@ -4384,9 +4307,7 @@ out_update_clusters:
4384 ocfs2_et_update_clusters(et, 4307 ocfs2_et_update_clusters(et,
4385 le16_to_cpu(insert_rec->e_leaf_clusters)); 4308 le16_to_cpu(insert_rec->e_leaf_clusters));
4386 4309
4387 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 4310 ocfs2_journal_dirty(handle, et->et_root_bh);
4388 if (ret)
4389 mlog_errno(ret);
4390 4311
4391out: 4312out:
4392 ocfs2_free_path(left_path); 4313 ocfs2_free_path(left_path);
@@ -4866,7 +4787,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4866 goto leave; 4787 goto leave;
4867 } 4788 }
4868 4789
4869 status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 4790 status = __ocfs2_claim_clusters(handle, data_ac, 1,
4870 clusters_to_add, &bit_off, &num_bits); 4791 clusters_to_add, &bit_off, &num_bits);
4871 if (status < 0) { 4792 if (status < 0) {
4872 if (status != -ENOSPC) 4793 if (status != -ENOSPC)
@@ -4895,11 +4816,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4895 goto leave; 4816 goto leave;
4896 } 4817 }
4897 4818
4898 status = ocfs2_journal_dirty(handle, et->et_root_bh); 4819 ocfs2_journal_dirty(handle, et->et_root_bh);
4899 if (status < 0) {
4900 mlog_errno(status);
4901 goto leave;
4902 }
4903 4820
4904 clusters_to_add -= num_bits; 4821 clusters_to_add -= num_bits;
4905 *logical_offset += num_bits; 4822 *logical_offset += num_bits;
@@ -5309,7 +5226,7 @@ static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
5309 int index, u32 new_range, 5226 int index, u32 new_range,
5310 struct ocfs2_alloc_context *meta_ac) 5227 struct ocfs2_alloc_context *meta_ac)
5311{ 5228{
5312 int ret, depth, credits = handle->h_buffer_credits; 5229 int ret, depth, credits;
5313 struct buffer_head *last_eb_bh = NULL; 5230 struct buffer_head *last_eb_bh = NULL;
5314 struct ocfs2_extent_block *eb; 5231 struct ocfs2_extent_block *eb;
5315 struct ocfs2_extent_list *rightmost_el, *el; 5232 struct ocfs2_extent_list *rightmost_el, *el;
@@ -5340,8 +5257,8 @@ static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
5340 } else 5257 } else
5341 rightmost_el = path_leaf_el(path); 5258 rightmost_el = path_leaf_el(path);
5342 5259
5343 credits += path->p_tree_depth + 5260 credits = path->p_tree_depth +
5344 ocfs2_extend_meta_needed(et->et_root_el); 5261 ocfs2_extend_meta_needed(et->et_root_el);
5345 ret = ocfs2_extend_trans(handle, credits); 5262 ret = ocfs2_extend_trans(handle, credits);
5346 if (ret) { 5263 if (ret) {
5347 mlog_errno(ret); 5264 mlog_errno(ret);
@@ -5671,19 +5588,97 @@ out:
5671 return ret; 5588 return ret;
5672} 5589}
5673 5590
5591/*
5592 * ocfs2_reserve_blocks_for_rec_trunc() would look basically the
5593 * same as ocfs2_lock_alloctors(), except for it accepts a blocks
5594 * number to reserve some extra blocks, and it only handles meta
5595 * data allocations.
5596 *
5597 * Currently, only ocfs2_remove_btree_range() uses it for truncating
5598 * and punching holes.
5599 */
5600static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode,
5601 struct ocfs2_extent_tree *et,
5602 u32 extents_to_split,
5603 struct ocfs2_alloc_context **ac,
5604 int extra_blocks)
5605{
5606 int ret = 0, num_free_extents;
5607 unsigned int max_recs_needed = 2 * extents_to_split;
5608 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5609
5610 *ac = NULL;
5611
5612 num_free_extents = ocfs2_num_free_extents(osb, et);
5613 if (num_free_extents < 0) {
5614 ret = num_free_extents;
5615 mlog_errno(ret);
5616 goto out;
5617 }
5618
5619 if (!num_free_extents ||
5620 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
5621 extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
5622
5623 if (extra_blocks) {
5624 ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac);
5625 if (ret < 0) {
5626 if (ret != -ENOSPC)
5627 mlog_errno(ret);
5628 goto out;
5629 }
5630 }
5631
5632out:
5633 if (ret) {
5634 if (*ac) {
5635 ocfs2_free_alloc_context(*ac);
5636 *ac = NULL;
5637 }
5638 }
5639
5640 return ret;
5641}
5642
5674int ocfs2_remove_btree_range(struct inode *inode, 5643int ocfs2_remove_btree_range(struct inode *inode,
5675 struct ocfs2_extent_tree *et, 5644 struct ocfs2_extent_tree *et,
5676 u32 cpos, u32 phys_cpos, u32 len, 5645 u32 cpos, u32 phys_cpos, u32 len, int flags,
5677 struct ocfs2_cached_dealloc_ctxt *dealloc) 5646 struct ocfs2_cached_dealloc_ctxt *dealloc,
5647 u64 refcount_loc)
5678{ 5648{
5679 int ret; 5649 int ret, credits = 0, extra_blocks = 0;
5680 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 5650 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
5681 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5651 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5682 struct inode *tl_inode = osb->osb_tl_inode; 5652 struct inode *tl_inode = osb->osb_tl_inode;
5683 handle_t *handle; 5653 handle_t *handle;
5684 struct ocfs2_alloc_context *meta_ac = NULL; 5654 struct ocfs2_alloc_context *meta_ac = NULL;
5655 struct ocfs2_refcount_tree *ref_tree = NULL;
5656
5657 if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
5658 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
5659 OCFS2_HAS_REFCOUNT_FL));
5660
5661 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
5662 &ref_tree, NULL);
5663 if (ret) {
5664 mlog_errno(ret);
5665 goto out;
5666 }
5685 5667
5686 ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac); 5668 ret = ocfs2_prepare_refcount_change_for_del(inode,
5669 refcount_loc,
5670 phys_blkno,
5671 len,
5672 &credits,
5673 &extra_blocks);
5674 if (ret < 0) {
5675 mlog_errno(ret);
5676 goto out;
5677 }
5678 }
5679
5680 ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac,
5681 extra_blocks);
5687 if (ret) { 5682 if (ret) {
5688 mlog_errno(ret); 5683 mlog_errno(ret);
5689 return ret; 5684 return ret;
@@ -5699,7 +5694,8 @@ int ocfs2_remove_btree_range(struct inode *inode,
5699 } 5694 }
5700 } 5695 }
5701 5696
5702 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5697 handle = ocfs2_start_trans(osb,
5698 ocfs2_remove_extent_credits(osb->sb) + credits);
5703 if (IS_ERR(handle)) { 5699 if (IS_ERR(handle)) {
5704 ret = PTR_ERR(handle); 5700 ret = PTR_ERR(handle);
5705 mlog_errno(ret); 5701 mlog_errno(ret);
@@ -5724,15 +5720,22 @@ int ocfs2_remove_btree_range(struct inode *inode,
5724 5720
5725 ocfs2_et_update_clusters(et, -len); 5721 ocfs2_et_update_clusters(et, -len);
5726 5722
5727 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 5723 ocfs2_journal_dirty(handle, et->et_root_bh);
5728 if (ret) {
5729 mlog_errno(ret);
5730 goto out_commit;
5731 }
5732 5724
5733 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); 5725 if (phys_blkno) {
5734 if (ret) 5726 if (flags & OCFS2_EXT_REFCOUNTED)
5735 mlog_errno(ret); 5727 ret = ocfs2_decrease_refcount(inode, handle,
5728 ocfs2_blocks_to_clusters(osb->sb,
5729 phys_blkno),
5730 len, meta_ac,
5731 dealloc, 1);
5732 else
5733 ret = ocfs2_truncate_log_append(osb, handle,
5734 phys_blkno, len);
5735 if (ret)
5736 mlog_errno(ret);
5737
5738 }
5736 5739
5737out_commit: 5740out_commit:
5738 ocfs2_commit_trans(osb, handle); 5741 ocfs2_commit_trans(osb, handle);
@@ -5742,6 +5745,9 @@ out:
5742 if (meta_ac) 5745 if (meta_ac)
5743 ocfs2_free_alloc_context(meta_ac); 5746 ocfs2_free_alloc_context(meta_ac);
5744 5747
5748 if (ref_tree)
5749 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
5750
5745 return ret; 5751 return ret;
5746} 5752}
5747 5753
@@ -5850,11 +5856,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5850 } 5856 }
5851 tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); 5857 tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
5852 5858
5853 status = ocfs2_journal_dirty(handle, tl_bh); 5859 ocfs2_journal_dirty(handle, tl_bh);
5854 if (status < 0) {
5855 mlog_errno(status);
5856 goto bail;
5857 }
5858 5860
5859bail: 5861bail:
5860 mlog_exit(status); 5862 mlog_exit(status);
@@ -5893,11 +5895,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5893 5895
5894 tl->tl_used = cpu_to_le16(i); 5896 tl->tl_used = cpu_to_le16(i);
5895 5897
5896 status = ocfs2_journal_dirty(handle, tl_bh); 5898 ocfs2_journal_dirty(handle, tl_bh);
5897 if (status < 0) {
5898 mlog_errno(status);
5899 goto bail;
5900 }
5901 5899
5902 /* TODO: Perhaps we can calculate the bulk of the 5900 /* TODO: Perhaps we can calculate the bulk of the
5903 * credits up front rather than extending like 5901 * credits up front rather than extending like
@@ -6298,6 +6296,7 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
6298 */ 6296 */
6299struct ocfs2_cached_block_free { 6297struct ocfs2_cached_block_free {
6300 struct ocfs2_cached_block_free *free_next; 6298 struct ocfs2_cached_block_free *free_next;
6299 u64 free_bg;
6301 u64 free_blk; 6300 u64 free_blk;
6302 unsigned int free_bit; 6301 unsigned int free_bit;
6303}; 6302};
@@ -6344,8 +6343,11 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
6344 } 6343 }
6345 6344
6346 while (head) { 6345 while (head) {
6347 bg_blkno = ocfs2_which_suballoc_group(head->free_blk, 6346 if (head->free_bg)
6348 head->free_bit); 6347 bg_blkno = head->free_bg;
6348 else
6349 bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
6350 head->free_bit);
6349 mlog(0, "Free bit: (bit %u, blkno %llu)\n", 6351 mlog(0, "Free bit: (bit %u, blkno %llu)\n",
6350 head->free_bit, (unsigned long long)head->free_blk); 6352 head->free_bit, (unsigned long long)head->free_blk);
6351 6353
@@ -6393,7 +6395,7 @@ int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6393 int ret = 0; 6395 int ret = 0;
6394 struct ocfs2_cached_block_free *item; 6396 struct ocfs2_cached_block_free *item;
6395 6397
6396 item = kmalloc(sizeof(*item), GFP_NOFS); 6398 item = kzalloc(sizeof(*item), GFP_NOFS);
6397 if (item == NULL) { 6399 if (item == NULL) {
6398 ret = -ENOMEM; 6400 ret = -ENOMEM;
6399 mlog_errno(ret); 6401 mlog_errno(ret);
@@ -6533,8 +6535,8 @@ ocfs2_find_per_slot_free_list(int type,
6533} 6535}
6534 6536
6535int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 6537int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6536 int type, int slot, u64 blkno, 6538 int type, int slot, u64 suballoc,
6537 unsigned int bit) 6539 u64 blkno, unsigned int bit)
6538{ 6540{
6539 int ret; 6541 int ret;
6540 struct ocfs2_per_slot_free_list *fl; 6542 struct ocfs2_per_slot_free_list *fl;
@@ -6547,7 +6549,7 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6547 goto out; 6549 goto out;
6548 } 6550 }
6549 6551
6550 item = kmalloc(sizeof(*item), GFP_NOFS); 6552 item = kzalloc(sizeof(*item), GFP_NOFS);
6551 if (item == NULL) { 6553 if (item == NULL) {
6552 ret = -ENOMEM; 6554 ret = -ENOMEM;
6553 mlog_errno(ret); 6555 mlog_errno(ret);
@@ -6557,6 +6559,7 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6557 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", 6559 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n",
6558 type, slot, bit, (unsigned long long)blkno); 6560 type, slot, bit, (unsigned long long)blkno);
6559 6561
6562 item->free_bg = suballoc;
6560 item->free_blk = blkno; 6563 item->free_blk = blkno;
6561 item->free_bit = bit; 6564 item->free_bit = bit;
6562 item->free_next = fl->f_first; 6565 item->free_next = fl->f_first;
@@ -6573,433 +6576,11 @@ static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
6573{ 6576{
6574 return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE, 6577 return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE,
6575 le16_to_cpu(eb->h_suballoc_slot), 6578 le16_to_cpu(eb->h_suballoc_slot),
6579 le64_to_cpu(eb->h_suballoc_loc),
6576 le64_to_cpu(eb->h_blkno), 6580 le64_to_cpu(eb->h_blkno),
6577 le16_to_cpu(eb->h_suballoc_bit)); 6581 le16_to_cpu(eb->h_suballoc_bit));
6578} 6582}
6579 6583
6580/* This function will figure out whether the currently last extent
6581 * block will be deleted, and if it will, what the new last extent
6582 * block will be so we can update his h_next_leaf_blk field, as well
6583 * as the dinodes i_last_eb_blk */
6584static int ocfs2_find_new_last_ext_blk(struct inode *inode,
6585 unsigned int clusters_to_del,
6586 struct ocfs2_path *path,
6587 struct buffer_head **new_last_eb)
6588{
6589 int next_free, ret = 0;
6590 u32 cpos;
6591 struct ocfs2_extent_rec *rec;
6592 struct ocfs2_extent_block *eb;
6593 struct ocfs2_extent_list *el;
6594 struct buffer_head *bh = NULL;
6595
6596 *new_last_eb = NULL;
6597
6598 /* we have no tree, so of course, no last_eb. */
6599 if (!path->p_tree_depth)
6600 goto out;
6601
6602 /* trunc to zero special case - this makes tree_depth = 0
6603 * regardless of what it is. */
6604 if (OCFS2_I(inode)->ip_clusters == clusters_to_del)
6605 goto out;
6606
6607 el = path_leaf_el(path);
6608 BUG_ON(!el->l_next_free_rec);
6609
6610 /*
6611 * Make sure that this extent list will actually be empty
6612 * after we clear away the data. We can shortcut out if
6613 * there's more than one non-empty extent in the
6614 * list. Otherwise, a check of the remaining extent is
6615 * necessary.
6616 */
6617 next_free = le16_to_cpu(el->l_next_free_rec);
6618 rec = NULL;
6619 if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6620 if (next_free > 2)
6621 goto out;
6622
6623 /* We may have a valid extent in index 1, check it. */
6624 if (next_free == 2)
6625 rec = &el->l_recs[1];
6626
6627 /*
6628 * Fall through - no more nonempty extents, so we want
6629 * to delete this leaf.
6630 */
6631 } else {
6632 if (next_free > 1)
6633 goto out;
6634
6635 rec = &el->l_recs[0];
6636 }
6637
6638 if (rec) {
6639 /*
6640 * Check it we'll only be trimming off the end of this
6641 * cluster.
6642 */
6643 if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del)
6644 goto out;
6645 }
6646
6647 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
6648 if (ret) {
6649 mlog_errno(ret);
6650 goto out;
6651 }
6652
6653 ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh);
6654 if (ret) {
6655 mlog_errno(ret);
6656 goto out;
6657 }
6658
6659 eb = (struct ocfs2_extent_block *) bh->b_data;
6660 el = &eb->h_list;
6661
6662 /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
6663 * Any corruption is a code bug. */
6664 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
6665
6666 *new_last_eb = bh;
6667 get_bh(*new_last_eb);
6668 mlog(0, "returning block %llu, (cpos: %u)\n",
6669 (unsigned long long)le64_to_cpu(eb->h_blkno), cpos);
6670out:
6671 brelse(bh);
6672
6673 return ret;
6674}
6675
6676/*
6677 * Trim some clusters off the rightmost edge of a tree. Only called
6678 * during truncate.
6679 *
6680 * The caller needs to:
6681 * - start journaling of each path component.
6682 * - compute and fully set up any new last ext block
6683 */
6684static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6685 handle_t *handle, struct ocfs2_truncate_context *tc,
6686 u32 clusters_to_del, u64 *delete_start, u8 *flags)
6687{
6688 int ret, i, index = path->p_tree_depth;
6689 u32 new_edge = 0;
6690 u64 deleted_eb = 0;
6691 struct buffer_head *bh;
6692 struct ocfs2_extent_list *el;
6693 struct ocfs2_extent_rec *rec;
6694
6695 *delete_start = 0;
6696 *flags = 0;
6697
6698 while (index >= 0) {
6699 bh = path->p_node[index].bh;
6700 el = path->p_node[index].el;
6701
6702 mlog(0, "traveling tree (index = %d, block = %llu)\n",
6703 index, (unsigned long long)bh->b_blocknr);
6704
6705 BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
6706
6707 if (index !=
6708 (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) {
6709 ocfs2_error(inode->i_sb,
6710 "Inode %lu has invalid ext. block %llu",
6711 inode->i_ino,
6712 (unsigned long long)bh->b_blocknr);
6713 ret = -EROFS;
6714 goto out;
6715 }
6716
6717find_tail_record:
6718 i = le16_to_cpu(el->l_next_free_rec) - 1;
6719 rec = &el->l_recs[i];
6720
6721 mlog(0, "Extent list before: record %d: (%u, %u, %llu), "
6722 "next = %u\n", i, le32_to_cpu(rec->e_cpos),
6723 ocfs2_rec_clusters(el, rec),
6724 (unsigned long long)le64_to_cpu(rec->e_blkno),
6725 le16_to_cpu(el->l_next_free_rec));
6726
6727 BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del);
6728
6729 if (le16_to_cpu(el->l_tree_depth) == 0) {
6730 /*
6731 * If the leaf block contains a single empty
6732 * extent and no records, we can just remove
6733 * the block.
6734 */
6735 if (i == 0 && ocfs2_is_empty_extent(rec)) {
6736 memset(rec, 0,
6737 sizeof(struct ocfs2_extent_rec));
6738 el->l_next_free_rec = cpu_to_le16(0);
6739
6740 goto delete;
6741 }
6742
6743 /*
6744 * Remove any empty extents by shifting things
6745 * left. That should make life much easier on
6746 * the code below. This condition is rare
6747 * enough that we shouldn't see a performance
6748 * hit.
6749 */
6750 if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6751 le16_add_cpu(&el->l_next_free_rec, -1);
6752
6753 for(i = 0;
6754 i < le16_to_cpu(el->l_next_free_rec); i++)
6755 el->l_recs[i] = el->l_recs[i + 1];
6756
6757 memset(&el->l_recs[i], 0,
6758 sizeof(struct ocfs2_extent_rec));
6759
6760 /*
6761 * We've modified our extent list. The
6762 * simplest way to handle this change
6763 * is to being the search from the
6764 * start again.
6765 */
6766 goto find_tail_record;
6767 }
6768
6769 le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del);
6770
6771 /*
6772 * We'll use "new_edge" on our way back up the
6773 * tree to know what our rightmost cpos is.
6774 */
6775 new_edge = le16_to_cpu(rec->e_leaf_clusters);
6776 new_edge += le32_to_cpu(rec->e_cpos);
6777
6778 /*
6779 * The caller will use this to delete data blocks.
6780 */
6781 *delete_start = le64_to_cpu(rec->e_blkno)
6782 + ocfs2_clusters_to_blocks(inode->i_sb,
6783 le16_to_cpu(rec->e_leaf_clusters));
6784 *flags = rec->e_flags;
6785
6786 /*
6787 * If it's now empty, remove this record.
6788 */
6789 if (le16_to_cpu(rec->e_leaf_clusters) == 0) {
6790 memset(rec, 0,
6791 sizeof(struct ocfs2_extent_rec));
6792 le16_add_cpu(&el->l_next_free_rec, -1);
6793 }
6794 } else {
6795 if (le64_to_cpu(rec->e_blkno) == deleted_eb) {
6796 memset(rec, 0,
6797 sizeof(struct ocfs2_extent_rec));
6798 le16_add_cpu(&el->l_next_free_rec, -1);
6799
6800 goto delete;
6801 }
6802
6803 /* Can this actually happen? */
6804 if (le16_to_cpu(el->l_next_free_rec) == 0)
6805 goto delete;
6806
6807 /*
6808 * We never actually deleted any clusters
6809 * because our leaf was empty. There's no
6810 * reason to adjust the rightmost edge then.
6811 */
6812 if (new_edge == 0)
6813 goto delete;
6814
6815 rec->e_int_clusters = cpu_to_le32(new_edge);
6816 le32_add_cpu(&rec->e_int_clusters,
6817 -le32_to_cpu(rec->e_cpos));
6818
6819 /*
6820 * A deleted child record should have been
6821 * caught above.
6822 */
6823 BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0);
6824 }
6825
6826delete:
6827 ret = ocfs2_journal_dirty(handle, bh);
6828 if (ret) {
6829 mlog_errno(ret);
6830 goto out;
6831 }
6832
6833 mlog(0, "extent list container %llu, after: record %d: "
6834 "(%u, %u, %llu), next = %u.\n",
6835 (unsigned long long)bh->b_blocknr, i,
6836 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec),
6837 (unsigned long long)le64_to_cpu(rec->e_blkno),
6838 le16_to_cpu(el->l_next_free_rec));
6839
6840 /*
6841 * We must be careful to only attempt delete of an
6842 * extent block (and not the root inode block).
6843 */
6844 if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) {
6845 struct ocfs2_extent_block *eb =
6846 (struct ocfs2_extent_block *)bh->b_data;
6847
6848 /*
6849 * Save this for use when processing the
6850 * parent block.
6851 */
6852 deleted_eb = le64_to_cpu(eb->h_blkno);
6853
6854 mlog(0, "deleting this extent block.\n");
6855
6856 ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
6857
6858 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
6859 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
6860 BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
6861
6862 ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb);
6863 /* An error here is not fatal. */
6864 if (ret < 0)
6865 mlog_errno(ret);
6866 } else {
6867 deleted_eb = 0;
6868 }
6869
6870 index--;
6871 }
6872
6873 ret = 0;
6874out:
6875 return ret;
6876}
6877
6878static int ocfs2_do_truncate(struct ocfs2_super *osb,
6879 unsigned int clusters_to_del,
6880 struct inode *inode,
6881 struct buffer_head *fe_bh,
6882 handle_t *handle,
6883 struct ocfs2_truncate_context *tc,
6884 struct ocfs2_path *path,
6885 struct ocfs2_alloc_context *meta_ac)
6886{
6887 int status;
6888 struct ocfs2_dinode *fe;
6889 struct ocfs2_extent_block *last_eb = NULL;
6890 struct ocfs2_extent_list *el;
6891 struct buffer_head *last_eb_bh = NULL;
6892 u64 delete_blk = 0;
6893 u8 rec_flags;
6894
6895 fe = (struct ocfs2_dinode *) fe_bh->b_data;
6896
6897 status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del,
6898 path, &last_eb_bh);
6899 if (status < 0) {
6900 mlog_errno(status);
6901 goto bail;
6902 }
6903
6904 /*
6905 * Each component will be touched, so we might as well journal
6906 * here to avoid having to handle errors later.
6907 */
6908 status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
6909 if (status < 0) {
6910 mlog_errno(status);
6911 goto bail;
6912 }
6913
6914 if (last_eb_bh) {
6915 status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh,
6916 OCFS2_JOURNAL_ACCESS_WRITE);
6917 if (status < 0) {
6918 mlog_errno(status);
6919 goto bail;
6920 }
6921
6922 last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
6923 }
6924
6925 el = &(fe->id2.i_list);
6926
6927 /*
6928 * Lower levels depend on this never happening, but it's best
6929 * to check it up here before changing the tree.
6930 */
6931 if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) {
6932 ocfs2_error(inode->i_sb,
6933 "Inode %lu has an empty extent record, depth %u\n",
6934 inode->i_ino, le16_to_cpu(el->l_tree_depth));
6935 status = -EROFS;
6936 goto bail;
6937 }
6938
6939 dquot_free_space_nodirty(inode,
6940 ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
6941 spin_lock(&OCFS2_I(inode)->ip_lock);
6942 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
6943 clusters_to_del;
6944 spin_unlock(&OCFS2_I(inode)->ip_lock);
6945 le32_add_cpu(&fe->i_clusters, -clusters_to_del);
6946 inode->i_blocks = ocfs2_inode_sector_count(inode);
6947
6948 status = ocfs2_trim_tree(inode, path, handle, tc,
6949 clusters_to_del, &delete_blk, &rec_flags);
6950 if (status) {
6951 mlog_errno(status);
6952 goto bail;
6953 }
6954
6955 if (le32_to_cpu(fe->i_clusters) == 0) {
6956 /* trunc to zero is a special case. */
6957 el->l_tree_depth = 0;
6958 fe->i_last_eb_blk = 0;
6959 } else if (last_eb)
6960 fe->i_last_eb_blk = last_eb->h_blkno;
6961
6962 status = ocfs2_journal_dirty(handle, fe_bh);
6963 if (status < 0) {
6964 mlog_errno(status);
6965 goto bail;
6966 }
6967
6968 if (last_eb) {
6969 /* If there will be a new last extent block, then by
6970 * definition, there cannot be any leaves to the right of
6971 * him. */
6972 last_eb->h_next_leaf_blk = 0;
6973 status = ocfs2_journal_dirty(handle, last_eb_bh);
6974 if (status < 0) {
6975 mlog_errno(status);
6976 goto bail;
6977 }
6978 }
6979
6980 if (delete_blk) {
6981 if (rec_flags & OCFS2_EXT_REFCOUNTED)
6982 status = ocfs2_decrease_refcount(inode, handle,
6983 ocfs2_blocks_to_clusters(osb->sb,
6984 delete_blk),
6985 clusters_to_del, meta_ac,
6986 &tc->tc_dealloc, 1);
6987 else
6988 status = ocfs2_truncate_log_append(osb, handle,
6989 delete_blk,
6990 clusters_to_del);
6991 if (status < 0) {
6992 mlog_errno(status);
6993 goto bail;
6994 }
6995 }
6996 status = 0;
6997bail:
6998 brelse(last_eb_bh);
6999 mlog_exit(status);
7000 return status;
7001}
7002
7003static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) 6584static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
7004{ 6585{
7005 set_buffer_uptodate(bh); 6586 set_buffer_uptodate(bh);
@@ -7307,7 +6888,9 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
7307 goto out_commit; 6888 goto out_commit;
7308 did_quota = 1; 6889 did_quota = 1;
7309 6890
7310 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 6891 data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
6892
6893 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
7311 &num); 6894 &num);
7312 if (ret) { 6895 if (ret) {
7313 mlog_errno(ret); 6896 mlog_errno(ret);
@@ -7406,26 +6989,29 @@ out:
7406 */ 6989 */
7407int ocfs2_commit_truncate(struct ocfs2_super *osb, 6990int ocfs2_commit_truncate(struct ocfs2_super *osb,
7408 struct inode *inode, 6991 struct inode *inode,
7409 struct buffer_head *fe_bh, 6992 struct buffer_head *di_bh)
7410 struct ocfs2_truncate_context *tc)
7411{ 6993{
7412 int status, i, credits, tl_sem = 0; 6994 int status = 0, i, flags = 0;
7413 u32 clusters_to_del, new_highest_cpos, range; 6995 u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff;
7414 u64 blkno = 0; 6996 u64 blkno = 0;
7415 struct ocfs2_extent_list *el; 6997 struct ocfs2_extent_list *el;
7416 handle_t *handle = NULL; 6998 struct ocfs2_extent_rec *rec;
7417 struct inode *tl_inode = osb->osb_tl_inode;
7418 struct ocfs2_path *path = NULL; 6999 struct ocfs2_path *path = NULL;
7419 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 7000 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
7420 struct ocfs2_alloc_context *meta_ac = NULL; 7001 struct ocfs2_extent_list *root_el = &(di->id2.i_list);
7421 struct ocfs2_refcount_tree *ref_tree = NULL; 7002 u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
7003 struct ocfs2_extent_tree et;
7004 struct ocfs2_cached_dealloc_ctxt dealloc;
7422 7005
7423 mlog_entry_void(); 7006 mlog_entry_void();
7424 7007
7008 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
7009 ocfs2_init_dealloc_ctxt(&dealloc);
7010
7425 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 7011 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
7426 i_size_read(inode)); 7012 i_size_read(inode));
7427 7013
7428 path = ocfs2_new_path(fe_bh, &di->id2.i_list, 7014 path = ocfs2_new_path(di_bh, &di->id2.i_list,
7429 ocfs2_journal_access_di); 7015 ocfs2_journal_access_di);
7430 if (!path) { 7016 if (!path) {
7431 status = -ENOMEM; 7017 status = -ENOMEM;
@@ -7444,8 +7030,6 @@ start:
7444 goto bail; 7030 goto bail;
7445 } 7031 }
7446 7032
7447 credits = 0;
7448
7449 /* 7033 /*
7450 * Truncate always works against the rightmost tree branch. 7034 * Truncate always works against the rightmost tree branch.
7451 */ 7035 */
@@ -7480,101 +7064,62 @@ start:
7480 } 7064 }
7481 7065
7482 i = le16_to_cpu(el->l_next_free_rec) - 1; 7066 i = le16_to_cpu(el->l_next_free_rec) - 1;
7483 range = le32_to_cpu(el->l_recs[i].e_cpos) + 7067 rec = &el->l_recs[i];
7484 ocfs2_rec_clusters(el, &el->l_recs[i]); 7068 flags = rec->e_flags;
7485 if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { 7069 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
7486 clusters_to_del = 0; 7070
7487 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 7071 if (i == 0 && ocfs2_is_empty_extent(rec)) {
7488 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); 7072 /*
7489 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 7073 * Lower levels depend on this never happening, but it's best
7074 * to check it up here before changing the tree.
7075 */
7076 if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
7077 ocfs2_error(inode->i_sb, "Inode %lu has an empty "
7078 "extent record, depth %u\n", inode->i_ino,
7079 le16_to_cpu(root_el->l_tree_depth));
7080 status = -EROFS;
7081 goto bail;
7082 }
7083 trunc_cpos = le32_to_cpu(rec->e_cpos);
7084 trunc_len = 0;
7085 blkno = 0;
7086 } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) {
7087 /*
7088 * Truncate entire record.
7089 */
7090 trunc_cpos = le32_to_cpu(rec->e_cpos);
7091 trunc_len = ocfs2_rec_clusters(el, rec);
7092 blkno = le64_to_cpu(rec->e_blkno);
7490 } else if (range > new_highest_cpos) { 7093 } else if (range > new_highest_cpos) {
7491 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + 7094 /*
7492 le32_to_cpu(el->l_recs[i].e_cpos)) - 7095 * Partial truncate. it also should be
7493 new_highest_cpos; 7096 * the last truncate we're doing.
7494 blkno = le64_to_cpu(el->l_recs[i].e_blkno) + 7097 */
7495 ocfs2_clusters_to_blocks(inode->i_sb, 7098 trunc_cpos = new_highest_cpos;
7496 ocfs2_rec_clusters(el, &el->l_recs[i]) - 7099 trunc_len = range - new_highest_cpos;
7497 clusters_to_del); 7100 coff = new_highest_cpos - le32_to_cpu(rec->e_cpos);
7101 blkno = le64_to_cpu(rec->e_blkno) +
7102 ocfs2_clusters_to_blocks(inode->i_sb, coff);
7498 } else { 7103 } else {
7104 /*
7105 * Truncate completed, leave happily.
7106 */
7499 status = 0; 7107 status = 0;
7500 goto bail; 7108 goto bail;
7501 } 7109 }
7502 7110
7503 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 7111 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
7504 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
7505
7506 if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
7507 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
7508 OCFS2_HAS_REFCOUNT_FL));
7509
7510 status = ocfs2_lock_refcount_tree(osb,
7511 le64_to_cpu(di->i_refcount_loc),
7512 1, &ref_tree, NULL);
7513 if (status) {
7514 mlog_errno(status);
7515 goto bail;
7516 }
7517
7518 status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
7519 blkno,
7520 clusters_to_del,
7521 &credits,
7522 &meta_ac);
7523 if (status < 0) {
7524 mlog_errno(status);
7525 goto bail;
7526 }
7527 }
7528
7529 mutex_lock(&tl_inode->i_mutex);
7530 tl_sem = 1;
7531 /* ocfs2_truncate_log_needs_flush guarantees us at least one
7532 * record is free for use. If there isn't any, we flush to get
7533 * an empty truncate log. */
7534 if (ocfs2_truncate_log_needs_flush(osb)) {
7535 status = __ocfs2_flush_truncate_log(osb);
7536 if (status < 0) {
7537 mlog_errno(status);
7538 goto bail;
7539 }
7540 }
7541 7112
7542 credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 7113 status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
7543 (struct ocfs2_dinode *)fe_bh->b_data, 7114 phys_cpos, trunc_len, flags, &dealloc,
7544 el); 7115 refcount_loc);
7545 handle = ocfs2_start_trans(osb, credits);
7546 if (IS_ERR(handle)) {
7547 status = PTR_ERR(handle);
7548 handle = NULL;
7549 mlog_errno(status);
7550 goto bail;
7551 }
7552
7553 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
7554 tc, path, meta_ac);
7555 if (status < 0) { 7116 if (status < 0) {
7556 mlog_errno(status); 7117 mlog_errno(status);
7557 goto bail; 7118 goto bail;
7558 } 7119 }
7559 7120
7560 mutex_unlock(&tl_inode->i_mutex);
7561 tl_sem = 0;
7562
7563 ocfs2_commit_trans(osb, handle);
7564 handle = NULL;
7565
7566 ocfs2_reinit_path(path, 1); 7121 ocfs2_reinit_path(path, 1);
7567 7122
7568 if (meta_ac) {
7569 ocfs2_free_alloc_context(meta_ac);
7570 meta_ac = NULL;
7571 }
7572
7573 if (ref_tree) {
7574 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7575 ref_tree = NULL;
7576 }
7577
7578 /* 7123 /*
7579 * The check above will catch the case where we've truncated 7124 * The check above will catch the case where we've truncated
7580 * away all allocation. 7125 * away all allocation.
@@ -7585,25 +7130,10 @@ bail:
7585 7130
7586 ocfs2_schedule_truncate_log_flush(osb, 1); 7131 ocfs2_schedule_truncate_log_flush(osb, 1);
7587 7132
7588 if (tl_sem) 7133 ocfs2_run_deallocs(osb, &dealloc);
7589 mutex_unlock(&tl_inode->i_mutex);
7590
7591 if (handle)
7592 ocfs2_commit_trans(osb, handle);
7593
7594 if (meta_ac)
7595 ocfs2_free_alloc_context(meta_ac);
7596
7597 if (ref_tree)
7598 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7599
7600 ocfs2_run_deallocs(osb, &tc->tc_dealloc);
7601 7134
7602 ocfs2_free_path(path); 7135 ocfs2_free_path(path);
7603 7136
7604 /* This will drop the ext_alloc cluster lock for us */
7605 ocfs2_free_truncate_context(tc);
7606
7607 mlog_exit(status); 7137 mlog_exit(status);
7608 return status; 7138 return status;
7609} 7139}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 1db4359ccb90..55762b554b99 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -140,8 +140,9 @@ int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
140 struct ocfs2_cached_dealloc_ctxt *dealloc); 140 struct ocfs2_cached_dealloc_ctxt *dealloc);
141int ocfs2_remove_btree_range(struct inode *inode, 141int ocfs2_remove_btree_range(struct inode *inode,
142 struct ocfs2_extent_tree *et, 142 struct ocfs2_extent_tree *et,
143 u32 cpos, u32 phys_cpos, u32 len, 143 u32 cpos, u32 phys_cpos, u32 len, int flags,
144 struct ocfs2_cached_dealloc_ctxt *dealloc); 144 struct ocfs2_cached_dealloc_ctxt *dealloc,
145 u64 refcount_loc);
145 146
146int ocfs2_num_free_extents(struct ocfs2_super *osb, 147int ocfs2_num_free_extents(struct ocfs2_super *osb,
147 struct ocfs2_extent_tree *et); 148 struct ocfs2_extent_tree *et);
@@ -209,7 +210,7 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
209int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 210int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
210 u64 blkno, unsigned int bit); 211 u64 blkno, unsigned int bit);
211int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 212int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
212 int type, int slot, u64 blkno, 213 int type, int slot, u64 suballoc, u64 blkno,
213 unsigned int bit); 214 unsigned int bit);
214static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c) 215static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
215{ 216{
@@ -233,8 +234,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
233 struct ocfs2_truncate_context **tc); 234 struct ocfs2_truncate_context **tc);
234int ocfs2_commit_truncate(struct ocfs2_super *osb, 235int ocfs2_commit_truncate(struct ocfs2_super *osb,
235 struct inode *inode, 236 struct inode *inode,
236 struct buffer_head *fe_bh, 237 struct buffer_head *di_bh);
237 struct ocfs2_truncate_context *tc);
238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, 238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
239 unsigned int start, unsigned int end, int trunc); 239 unsigned int start, unsigned int end, int trunc);
240 240
@@ -319,6 +319,8 @@ int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
319 struct ocfs2_path *path); 319 struct ocfs2_path *path);
320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, 320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
321 struct ocfs2_path *path, u32 *cpos); 321 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
323 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, 324int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
323 struct ocfs2_path *left, 325 struct ocfs2_path *left,
324 struct ocfs2_path *right); 326 struct ocfs2_path *right);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 21441ddb5506..3623ca20cc18 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1735,6 +1735,9 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1735 goto out; 1735 goto out;
1736 } 1736 }
1737 1737
1738 if (data_ac)
1739 data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
1740
1738 credits = ocfs2_calc_extend_credits(inode->i_sb, 1741 credits = ocfs2_calc_extend_credits(inode->i_sb,
1739 &di->id2.i_list, 1742 &di->id2.i_list,
1740 clusters_to_alloc); 1743 clusters_to_alloc);
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 3bb928a2bf7d..c7fba396392d 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -116,6 +116,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
116 define_mask(ERROR), 116 define_mask(ERROR),
117 define_mask(NOTICE), 117 define_mask(NOTICE),
118 define_mask(KTHREAD), 118 define_mask(KTHREAD),
119 define_mask(RESERVATIONS),
119}; 120};
120 121
121static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; 122static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 3dfddbec32f2..fd96e2a2fa56 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -119,6 +119,7 @@
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
122 123
123#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 124#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
124#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 125#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 73e743eea2c8..aa75ca3f78da 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -583,6 +583,9 @@ static void o2net_state_change(struct sock *sk)
583 o2net_sc_queue_work(sc, &sc->sc_connect_work); 583 o2net_sc_queue_work(sc, &sc->sc_connect_work);
584 break; 584 break;
585 default: 585 default:
586 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT
587 " shutdown, state %d\n",
588 SC_NODEF_ARGS(sc), sk->sk_state);
586 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 589 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
587 break; 590 break;
588 } 591 }
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index efd77d071c80..f04ebcfffc4a 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1194,7 +1194,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1194 else 1194 else
1195 de->inode = 0; 1195 de->inode = 0;
1196 dir->i_version++; 1196 dir->i_version++;
1197 status = ocfs2_journal_dirty(handle, bh); 1197 ocfs2_journal_dirty(handle, bh);
1198 goto bail; 1198 goto bail;
1199 } 1199 }
1200 i += le16_to_cpu(de->rec_len); 1200 i += le16_to_cpu(de->rec_len);
@@ -1752,7 +1752,7 @@ int __ocfs2_add_entry(handle_t *handle,
1752 ocfs2_recalc_free_list(dir, handle, lookup); 1752 ocfs2_recalc_free_list(dir, handle, lookup);
1753 1753
1754 dir->i_version++; 1754 dir->i_version++;
1755 status = ocfs2_journal_dirty(handle, insert_bh); 1755 ocfs2_journal_dirty(handle, insert_bh);
1756 retval = 0; 1756 retval = 0;
1757 goto bail; 1757 goto bail;
1758 } 1758 }
@@ -2297,12 +2297,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2297 } 2297 }
2298 2298
2299 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size); 2299 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
2300
2301 ocfs2_journal_dirty(handle, di_bh); 2300 ocfs2_journal_dirty(handle, di_bh);
2302 if (ret) {
2303 mlog_errno(ret);
2304 goto out;
2305 }
2306 2301
2307 i_size_write(inode, size); 2302 i_size_write(inode, size);
2308 inode->i_nlink = 2; 2303 inode->i_nlink = 2;
@@ -2366,11 +2361,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2366 ocfs2_init_dir_trailer(inode, new_bh, size); 2361 ocfs2_init_dir_trailer(inode, new_bh, size);
2367 } 2362 }
2368 2363
2369 status = ocfs2_journal_dirty(handle, new_bh); 2364 ocfs2_journal_dirty(handle, new_bh);
2370 if (status < 0) {
2371 mlog_errno(status);
2372 goto bail;
2373 }
2374 2365
2375 i_size_write(inode, inode->i_sb->s_blocksize); 2366 i_size_write(inode, inode->i_sb->s_blocksize);
2376 inode->i_nlink = 2; 2367 inode->i_nlink = 2;
@@ -2404,15 +2395,15 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2404 int ret; 2395 int ret;
2405 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 2396 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
2406 u16 dr_suballoc_bit; 2397 u16 dr_suballoc_bit;
2407 u64 dr_blkno; 2398 u64 suballoc_loc, dr_blkno;
2408 unsigned int num_bits; 2399 unsigned int num_bits;
2409 struct buffer_head *dx_root_bh = NULL; 2400 struct buffer_head *dx_root_bh = NULL;
2410 struct ocfs2_dx_root_block *dx_root; 2401 struct ocfs2_dx_root_block *dx_root;
2411 struct ocfs2_dir_block_trailer *trailer = 2402 struct ocfs2_dir_block_trailer *trailer =
2412 ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb); 2403 ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
2413 2404
2414 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit, 2405 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
2415 &num_bits, &dr_blkno); 2406 &dr_suballoc_bit, &num_bits, &dr_blkno);
2416 if (ret) { 2407 if (ret) {
2417 mlog_errno(ret); 2408 mlog_errno(ret);
2418 goto out; 2409 goto out;
@@ -2440,6 +2431,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2440 memset(dx_root, 0, osb->sb->s_blocksize); 2431 memset(dx_root, 0, osb->sb->s_blocksize);
2441 strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); 2432 strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
2442 dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 2433 dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
2434 dx_root->dr_suballoc_loc = cpu_to_le64(suballoc_loc);
2443 dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit); 2435 dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
2444 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); 2436 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
2445 dx_root->dr_blkno = cpu_to_le64(dr_blkno); 2437 dx_root->dr_blkno = cpu_to_le64(dr_blkno);
@@ -2458,10 +2450,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2458 dx_root->dr_list.l_count = 2450 dx_root->dr_list.l_count =
2459 cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); 2451 cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
2460 } 2452 }
2461 2453 ocfs2_journal_dirty(handle, dx_root_bh);
2462 ret = ocfs2_journal_dirty(handle, dx_root_bh);
2463 if (ret)
2464 mlog_errno(ret);
2465 2454
2466 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh, 2455 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
2467 OCFS2_JOURNAL_ACCESS_CREATE); 2456 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -2475,9 +2464,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2475 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; 2464 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
2476 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 2465 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
2477 2466
2478 ret = ocfs2_journal_dirty(handle, di_bh); 2467 ocfs2_journal_dirty(handle, di_bh);
2479 if (ret)
2480 mlog_errno(ret);
2481 2468
2482 *ret_dx_root_bh = dx_root_bh; 2469 *ret_dx_root_bh = dx_root_bh;
2483 dx_root_bh = NULL; 2470 dx_root_bh = NULL;
@@ -2558,7 +2545,7 @@ static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
2558 * chance of contiguousness as the directory grows in number 2545 * chance of contiguousness as the directory grows in number
2559 * of entries. 2546 * of entries.
2560 */ 2547 */
2561 ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 1, &phys, &num); 2548 ret = __ocfs2_claim_clusters(handle, data_ac, 1, 1, &phys, &num);
2562 if (ret) { 2549 if (ret) {
2563 mlog_errno(ret); 2550 mlog_errno(ret);
2564 goto out; 2551 goto out;
@@ -2991,7 +2978,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2991 * if we only get one now, that's enough to continue. The rest 2978 * if we only get one now, that's enough to continue. The rest
2992 * will be claimed after the conversion to extents. 2979 * will be claimed after the conversion to extents.
2993 */ 2980 */
2994 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); 2981 if (ocfs2_dir_resv_allowed(osb))
2982 data_ac->ac_resv = &oi->ip_la_data_resv;
2983 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &len);
2995 if (ret) { 2984 if (ret) {
2996 mlog_errno(ret); 2985 mlog_errno(ret);
2997 goto out_commit; 2986 goto out_commit;
@@ -3034,11 +3023,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3034 ocfs2_init_dir_trailer(dir, dirdata_bh, i); 3023 ocfs2_init_dir_trailer(dir, dirdata_bh, i);
3035 } 3024 }
3036 3025
3037 ret = ocfs2_journal_dirty(handle, dirdata_bh); 3026 ocfs2_journal_dirty(handle, dirdata_bh);
3038 if (ret) {
3039 mlog_errno(ret);
3040 goto out_commit;
3041 }
3042 3027
3043 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { 3028 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
3044 /* 3029 /*
@@ -3104,11 +3089,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3104 */ 3089 */
3105 dir->i_blocks = ocfs2_inode_sector_count(dir); 3090 dir->i_blocks = ocfs2_inode_sector_count(dir);
3106 3091
3107 ret = ocfs2_journal_dirty(handle, di_bh); 3092 ocfs2_journal_dirty(handle, di_bh);
3108 if (ret) {
3109 mlog_errno(ret);
3110 goto out_commit;
3111 }
3112 3093
3113 if (ocfs2_supports_indexed_dirs(osb)) { 3094 if (ocfs2_supports_indexed_dirs(osb)) {
3114 ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, 3095 ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
@@ -3138,7 +3119,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3138 * pass. Claim the 2nd cluster as a separate extent. 3119 * pass. Claim the 2nd cluster as a separate extent.
3139 */ 3120 */
3140 if (alloc > len) { 3121 if (alloc > len) {
3141 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 3122 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
3142 &len); 3123 &len);
3143 if (ret) { 3124 if (ret) {
3144 mlog_errno(ret); 3125 mlog_errno(ret);
@@ -3369,6 +3350,9 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3369 goto bail; 3350 goto bail;
3370 } 3351 }
3371 3352
3353 if (ocfs2_dir_resv_allowed(osb))
3354 data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
3355
3372 credits = ocfs2_calc_extend_credits(sb, el, 1); 3356 credits = ocfs2_calc_extend_credits(sb, el, 1);
3373 } else { 3357 } else {
3374 spin_unlock(&OCFS2_I(dir)->ip_lock); 3358 spin_unlock(&OCFS2_I(dir)->ip_lock);
@@ -3423,11 +3407,7 @@ do_extend:
3423 } else { 3407 } else {
3424 de->rec_len = cpu_to_le16(sb->s_blocksize); 3408 de->rec_len = cpu_to_le16(sb->s_blocksize);
3425 } 3409 }
3426 status = ocfs2_journal_dirty(handle, new_bh); 3410 ocfs2_journal_dirty(handle, new_bh);
3427 if (status < 0) {
3428 mlog_errno(status);
3429 goto bail;
3430 }
3431 3411
3432 dir_i_size += dir->i_sb->s_blocksize; 3412 dir_i_size += dir->i_sb->s_blocksize;
3433 i_size_write(dir, dir_i_size); 3413 i_size_write(dir, dir_i_size);
@@ -3906,11 +3886,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3906 sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, 3886 sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
3907 dx_leaf_sort_swap); 3887 dx_leaf_sort_swap);
3908 3888
3909 ret = ocfs2_journal_dirty(handle, dx_leaf_bh); 3889 ocfs2_journal_dirty(handle, dx_leaf_bh);
3910 if (ret) {
3911 mlog_errno(ret);
3912 goto out_commit;
3913 }
3914 3890
3915 ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash, 3891 ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
3916 &split_hash); 3892 &split_hash);
@@ -4490,7 +4466,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4490 4466
4491 blk = le64_to_cpu(dx_root->dr_blkno); 4467 blk = le64_to_cpu(dx_root->dr_blkno);
4492 bit = le16_to_cpu(dx_root->dr_suballoc_bit); 4468 bit = le16_to_cpu(dx_root->dr_suballoc_bit);
4493 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 4469 if (dx_root->dr_suballoc_loc)
4470 bg_blkno = le64_to_cpu(dx_root->dr_suballoc_loc);
4471 else
4472 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
4494 ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh, 4473 ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh,
4495 bit, bg_blkno, 1); 4474 bit, bg_blkno, 1);
4496 if (ret) 4475 if (ret)
@@ -4551,8 +4530,8 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
4551 4530
4552 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); 4531 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
4553 4532
4554 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 4533 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
4555 &dealloc); 4534 &dealloc, 0);
4556 if (ret) { 4535 if (ret) {
4557 mlog_errno(ret); 4536 mlog_errno(ret);
4558 goto out; 4537 goto out;
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 12d5eb78a11a..f44999156839 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -88,7 +88,7 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
88 return 0; 88 return 0;
89} 89}
90 90
91static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 91void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
92{ 92{
93 mlog_entry_void(); 93 mlog_entry_void();
94 94
@@ -145,7 +145,7 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
145} 145}
146 146
147 147
148static void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 148void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
149{ 149{
150 mlog_entry_void(); 150 mlog_entry_void();
151 151
@@ -451,7 +451,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
451 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen, 451 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
452 lock->ml.node, &status); 452 lock->ml.node, &status);
453 if (ret < 0) 453 if (ret < 0)
454 mlog_errno(ret); 454 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
455 "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
456 lock->ml.node);
455 else { 457 else {
456 if (status == DLM_RECOVERING) { 458 if (status == DLM_RECOVERING) {
457 mlog(ML_ERROR, "sent AST to node %u, it thinks this " 459 mlog(ML_ERROR, "sent AST to node %u, it thinks this "
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 0102be35980c..4b6ae2c13b47 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -37,7 +37,7 @@
37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes 37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
38#define DLM_THREAD_MS 200 // flush at least every 200 ms 38#define DLM_THREAD_MS 200 // flush at least every 200 ms
39 39
40#define DLM_HASH_SIZE_DEFAULT (1 << 14) 40#define DLM_HASH_SIZE_DEFAULT (1 << 17)
41#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE 41#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
42# define DLM_HASH_PAGES 1 42# define DLM_HASH_PAGES 1
43#else 43#else
@@ -904,6 +904,8 @@ void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
904 904
905void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 905void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
906void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 906void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
907void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
908void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
907void dlm_do_local_ast(struct dlm_ctxt *dlm, 909void dlm_do_local_ast(struct dlm_ctxt *dlm,
908 struct dlm_lock_resource *res, 910 struct dlm_lock_resource *res,
909 struct dlm_lock *lock); 911 struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 90803b47cd8c..9f30491e5e88 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -390,7 +390,9 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
390 } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED) 390 } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED)
391 dlm_error(ret); 391 dlm_error(ret);
392 } else { 392 } else {
393 mlog_errno(tmpret); 393 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
394 "node %u\n", tmpret, DLM_CONVERT_LOCK_MSG, dlm->key,
395 res->owner);
394 if (dlm_is_host_down(tmpret)) { 396 if (dlm_is_host_down(tmpret)) {
395 /* instead of logging the same network error over 397 /* instead of logging the same network error over
396 * and over, sleep here and wait for the heartbeat 398 * and over, sleep here and wait for the heartbeat
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 988c9055fd4e..6b5a492e1749 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -511,7 +511,7 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm)
511 511
512 assert_spin_locked(&dlm->spinlock); 512 assert_spin_locked(&dlm->spinlock);
513 513
514 printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name); 514 printk(KERN_NOTICE "o2dlm: Nodes in domain %s: ", dlm->name);
515 515
516 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 516 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES,
517 node + 1)) < O2NM_MAX_NODES) { 517 node + 1)) < O2NM_MAX_NODES) {
@@ -534,7 +534,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
534 534
535 node = exit_msg->node_idx; 535 node = exit_msg->node_idx;
536 536
537 printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name); 537 printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s\n", node, dlm->name);
538 538
539 spin_lock(&dlm->spinlock); 539 spin_lock(&dlm->spinlock);
540 clear_bit(node, dlm->domain_map); 540 clear_bit(node, dlm->domain_map);
@@ -565,7 +565,9 @@ static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm,
565 status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, 565 status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
566 &leave_msg, sizeof(leave_msg), node, 566 &leave_msg, sizeof(leave_msg), node,
567 NULL); 567 NULL);
568 568 if (status < 0)
569 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
570 "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node);
569 mlog(0, "status return %d from o2net_send_message\n", status); 571 mlog(0, "status return %d from o2net_send_message\n", status);
570 572
571 return status; 573 return status;
@@ -904,7 +906,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
904 set_bit(assert->node_idx, dlm->domain_map); 906 set_bit(assert->node_idx, dlm->domain_map);
905 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 907 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
906 908
907 printk(KERN_INFO "ocfs2_dlm: Node %u joins domain %s\n", 909 printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n",
908 assert->node_idx, dlm->name); 910 assert->node_idx, dlm->name);
909 __dlm_print_nodes(dlm); 911 __dlm_print_nodes(dlm);
910 912
@@ -962,7 +964,9 @@ static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm,
962 &cancel_msg, sizeof(cancel_msg), node, 964 &cancel_msg, sizeof(cancel_msg), node,
963 NULL); 965 NULL);
964 if (status < 0) { 966 if (status < 0) {
965 mlog_errno(status); 967 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
968 "node %u\n", status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
969 node);
966 goto bail; 970 goto bail;
967 } 971 }
968 972
@@ -1029,10 +1033,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
1029 byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); 1033 byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
1030 1034
1031 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, 1035 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
1032 sizeof(join_msg), node, 1036 sizeof(join_msg), node, &join_resp);
1033 &join_resp);
1034 if (status < 0 && status != -ENOPROTOOPT) { 1037 if (status < 0 && status != -ENOPROTOOPT) {
1035 mlog_errno(status); 1038 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
1039 "node %u\n", status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
1040 node);
1036 goto bail; 1041 goto bail;
1037 } 1042 }
1038 dlm_query_join_wire_to_packet(join_resp, &packet); 1043 dlm_query_join_wire_to_packet(join_resp, &packet);
@@ -1103,7 +1108,9 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm,
1103 &assert_msg, sizeof(assert_msg), node, 1108 &assert_msg, sizeof(assert_msg), node,
1104 NULL); 1109 NULL);
1105 if (status < 0) 1110 if (status < 0)
1106 mlog_errno(status); 1111 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
1112 "node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
1113 node);
1107 1114
1108 return status; 1115 return status;
1109} 1116}
@@ -1516,7 +1523,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1516 goto leave; 1523 goto leave;
1517 } 1524 }
1518 1525
1519 dlm->name = kmalloc(strlen(domain) + 1, GFP_KERNEL); 1526 dlm->name = kstrdup(domain, GFP_KERNEL);
1520 if (dlm->name == NULL) { 1527 if (dlm->name == NULL) {
1521 mlog_errno(-ENOMEM); 1528 mlog_errno(-ENOMEM);
1522 kfree(dlm); 1529 kfree(dlm);
@@ -1550,7 +1557,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1550 for (i = 0; i < DLM_HASH_BUCKETS; i++) 1557 for (i = 0; i < DLM_HASH_BUCKETS; i++)
1551 INIT_HLIST_HEAD(dlm_master_hash(dlm, i)); 1558 INIT_HLIST_HEAD(dlm_master_hash(dlm, i));
1552 1559
1553 strcpy(dlm->name, domain);
1554 dlm->key = key; 1560 dlm->key = key;
1555 dlm->node_num = o2nm_this_node(); 1561 dlm->node_num = o2nm_this_node();
1556 1562
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 733337772671..69cf369961c4 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -329,7 +329,9 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
329 BUG(); 329 BUG();
330 } 330 }
331 } else { 331 } else {
332 mlog_errno(tmpret); 332 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
333 "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key,
334 res->owner);
333 if (dlm_is_host_down(tmpret)) { 335 if (dlm_is_host_down(tmpret)) {
334 ret = DLM_RECOVERING; 336 ret = DLM_RECOVERING;
335 mlog(0, "node %u died so returning DLM_RECOVERING " 337 mlog(0, "node %u died so returning DLM_RECOVERING "
@@ -429,7 +431,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
429 struct dlm_lock *lock; 431 struct dlm_lock *lock;
430 int kernel_allocated = 0; 432 int kernel_allocated = 0;
431 433
432 lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS); 434 lock = kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
433 if (!lock) 435 if (!lock)
434 return NULL; 436 return NULL;
435 437
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9289b4357d27..4a7506a4e314 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -617,13 +617,11 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
617{ 617{
618 struct dlm_lock_resource *res = NULL; 618 struct dlm_lock_resource *res = NULL;
619 619
620 res = (struct dlm_lock_resource *) 620 res = kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
621 kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
622 if (!res) 621 if (!res)
623 goto error; 622 goto error;
624 623
625 res->lockname.name = (char *) 624 res->lockname.name = kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
626 kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
627 if (!res->lockname.name) 625 if (!res->lockname.name)
628 goto error; 626 goto error;
629 627
@@ -757,8 +755,7 @@ lookup:
757 spin_unlock(&dlm->spinlock); 755 spin_unlock(&dlm->spinlock);
758 mlog(0, "allocating a new resource\n"); 756 mlog(0, "allocating a new resource\n");
759 /* nothing found and we need to allocate one. */ 757 /* nothing found and we need to allocate one. */
760 alloc_mle = (struct dlm_master_list_entry *) 758 alloc_mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
761 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
762 if (!alloc_mle) 759 if (!alloc_mle)
763 goto leave; 760 goto leave;
764 res = dlm_new_lockres(dlm, lockid, namelen); 761 res = dlm_new_lockres(dlm, lockid, namelen);
@@ -1542,8 +1539,7 @@ way_up_top:
1542 spin_unlock(&dlm->master_lock); 1539 spin_unlock(&dlm->master_lock);
1543 spin_unlock(&dlm->spinlock); 1540 spin_unlock(&dlm->spinlock);
1544 1541
1545 mle = (struct dlm_master_list_entry *) 1542 mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
1546 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
1547 if (!mle) { 1543 if (!mle) {
1548 response = DLM_MASTER_RESP_ERROR; 1544 response = DLM_MASTER_RESP_ERROR;
1549 mlog_errno(-ENOMEM); 1545 mlog_errno(-ENOMEM);
@@ -1666,7 +1662,9 @@ again:
1666 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key, 1662 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
1667 &assert, sizeof(assert), to, &r); 1663 &assert, sizeof(assert), to, &r);
1668 if (tmpret < 0) { 1664 if (tmpret < 0) {
1669 mlog(0, "assert_master returned %d!\n", tmpret); 1665 mlog(ML_ERROR, "Error %d when sending message %u (key "
1666 "0x%x) to node %u\n", tmpret,
1667 DLM_ASSERT_MASTER_MSG, dlm->key, to);
1670 if (!dlm_is_host_down(tmpret)) { 1668 if (!dlm_is_host_down(tmpret)) {
1671 mlog(ML_ERROR, "unhandled error=%d!\n", tmpret); 1669 mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
1672 BUG(); 1670 BUG();
@@ -2205,7 +2203,9 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2205 ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, 2203 ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
2206 &deref, sizeof(deref), res->owner, &r); 2204 &deref, sizeof(deref), res->owner, &r);
2207 if (ret < 0) 2205 if (ret < 0)
2208 mlog_errno(ret); 2206 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
2207 "node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key,
2208 res->owner);
2209 else if (r < 0) { 2209 else if (r < 0) {
2210 /* BAD. other node says I did not have a ref. */ 2210 /* BAD. other node says I did not have a ref. */
2211 mlog(ML_ERROR,"while dropping ref on %s:%.*s " 2211 mlog(ML_ERROR,"while dropping ref on %s:%.*s "
@@ -2452,8 +2452,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2452 goto leave; 2452 goto leave;
2453 } 2453 }
2454 2454
2455 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 2455 mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
2456 GFP_NOFS);
2457 if (!mle) { 2456 if (!mle) {
2458 mlog_errno(ret); 2457 mlog_errno(ret);
2459 goto leave; 2458 goto leave;
@@ -2975,7 +2974,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2975 &migrate, sizeof(migrate), nodenum, 2974 &migrate, sizeof(migrate), nodenum,
2976 &status); 2975 &status);
2977 if (ret < 0) { 2976 if (ret < 0) {
2978 mlog(0, "migrate_request returned %d!\n", ret); 2977 mlog(ML_ERROR, "Error %d when sending message %u (key "
2978 "0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG,
2979 dlm->key, nodenum);
2979 if (!dlm_is_host_down(ret)) { 2980 if (!dlm_is_host_down(ret)) {
2980 mlog(ML_ERROR, "unhandled error=%d!\n", ret); 2981 mlog(ML_ERROR, "unhandled error=%d!\n", ret);
2981 BUG(); 2982 BUG();
@@ -3033,8 +3034,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
3033 hash = dlm_lockid_hash(name, namelen); 3034 hash = dlm_lockid_hash(name, namelen);
3034 3035
3035 /* preallocate.. if this fails, abort */ 3036 /* preallocate.. if this fails, abort */
3036 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 3037 mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
3037 GFP_NOFS);
3038 3038
3039 if (!mle) { 3039 if (!mle) {
3040 ret = -ENOMEM; 3040 ret = -ENOMEM;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index b4f99de2caf3..f8b75ce4be70 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -803,7 +803,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
803 803
804 /* negative status is handled by caller */ 804 /* negative status is handled by caller */
805 if (ret < 0) 805 if (ret < 0)
806 mlog_errno(ret); 806 mlog(ML_ERROR, "Error %d when sending message %u (key "
807 "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG,
808 dlm->key, request_from);
807 809
808 // return from here, then 810 // return from here, then
809 // sleep until all received or error 811 // sleep until all received or error
@@ -955,10 +957,10 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
955 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, 957 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
956 sizeof(done_msg), send_to, &tmpret); 958 sizeof(done_msg), send_to, &tmpret);
957 if (ret < 0) { 959 if (ret < 0) {
960 mlog(ML_ERROR, "Error %d when sending message %u (key "
961 "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG,
962 dlm->key, send_to);
958 if (!dlm_is_host_down(ret)) { 963 if (!dlm_is_host_down(ret)) {
959 mlog_errno(ret);
960 mlog(ML_ERROR, "%s: unknown error sending data-done "
961 "to %u\n", dlm->name, send_to);
962 BUG(); 964 BUG();
963 } 965 }
964 } else 966 } else
@@ -1126,7 +1128,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1126 if (ret < 0) { 1128 if (ret < 0) {
1127 /* XXX: negative status is not handled. 1129 /* XXX: negative status is not handled.
1128 * this will end up killing this node. */ 1130 * this will end up killing this node. */
1129 mlog_errno(ret); 1131 mlog(ML_ERROR, "Error %d when sending message %u (key "
1132 "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG,
1133 dlm->key, send_to);
1130 } else { 1134 } else {
1131 /* might get an -ENOMEM back here */ 1135 /* might get an -ENOMEM back here */
1132 ret = status; 1136 ret = status;
@@ -1642,7 +1646,9 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1642 &req, sizeof(req), nodenum, &status); 1646 &req, sizeof(req), nodenum, &status);
1643 /* XXX: negative status not handled properly here. */ 1647 /* XXX: negative status not handled properly here. */
1644 if (ret < 0) 1648 if (ret < 0)
1645 mlog_errno(ret); 1649 mlog(ML_ERROR, "Error %d when sending message %u (key "
1650 "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
1651 dlm->key, nodenum);
1646 else { 1652 else {
1647 BUG_ON(status < 0); 1653 BUG_ON(status < 0);
1648 BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN); 1654 BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
@@ -2640,7 +2646,7 @@ retry:
2640 if (dlm_is_host_down(ret)) { 2646 if (dlm_is_host_down(ret)) {
2641 /* node is down. not involved in recovery 2647 /* node is down. not involved in recovery
2642 * so just keep going */ 2648 * so just keep going */
2643 mlog(0, "%s: node %u was down when sending " 2649 mlog(ML_NOTICE, "%s: node %u was down when sending "
2644 "begin reco msg (%d)\n", dlm->name, nodenum, ret); 2650 "begin reco msg (%d)\n", dlm->name, nodenum, ret);
2645 ret = 0; 2651 ret = 0;
2646 } 2652 }
@@ -2660,11 +2666,12 @@ retry:
2660 } 2666 }
2661 if (ret < 0) { 2667 if (ret < 0) {
2662 struct dlm_lock_resource *res; 2668 struct dlm_lock_resource *res;
2669
2663 /* this is now a serious problem, possibly ENOMEM 2670 /* this is now a serious problem, possibly ENOMEM
2664 * in the network stack. must retry */ 2671 * in the network stack. must retry */
2665 mlog_errno(ret); 2672 mlog_errno(ret);
2666 mlog(ML_ERROR, "begin reco of dlm %s to node %u " 2673 mlog(ML_ERROR, "begin reco of dlm %s to node %u "
2667 " returned %d\n", dlm->name, nodenum, ret); 2674 "returned %d\n", dlm->name, nodenum, ret);
2668 res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME, 2675 res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
2669 DLM_RECOVERY_LOCK_NAME_LEN); 2676 DLM_RECOVERY_LOCK_NAME_LEN);
2670 if (res) { 2677 if (res) {
@@ -2789,7 +2796,9 @@ stage2:
2789 if (ret >= 0) 2796 if (ret >= 0)
2790 ret = status; 2797 ret = status;
2791 if (ret < 0) { 2798 if (ret < 0) {
2792 mlog_errno(ret); 2799 mlog(ML_ERROR, "Error %d when sending message %u (key "
2800 "0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG,
2801 dlm->key, nodenum);
2793 if (dlm_is_host_down(ret)) { 2802 if (dlm_is_host_down(ret)) {
2794 /* this has no effect on this recovery 2803 /* this has no effect on this recovery
2795 * session, so set the status to zero to 2804 * session, so set the status to zero to
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1fd1d35..d4f73ca68fe5 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -309,6 +309,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
309 * spinlock, and because we know that it is not migrating/ 309 * spinlock, and because we know that it is not migrating/
310 * recovering/in-progress, it is fine to reserve asts and 310 * recovering/in-progress, it is fine to reserve asts and
311 * basts right before queueing them all throughout */ 311 * basts right before queueing them all throughout */
312 assert_spin_locked(&dlm->ast_lock);
312 assert_spin_locked(&res->spinlock); 313 assert_spin_locked(&res->spinlock);
313 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| 314 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
314 DLM_LOCK_RES_RECOVERING| 315 DLM_LOCK_RES_RECOVERING|
@@ -337,7 +338,7 @@ converting:
337 /* queue the BAST if not already */ 338 /* queue the BAST if not already */
338 if (lock->ml.highest_blocked == LKM_IVMODE) { 339 if (lock->ml.highest_blocked == LKM_IVMODE) {
339 __dlm_lockres_reserve_ast(res); 340 __dlm_lockres_reserve_ast(res);
340 dlm_queue_bast(dlm, lock); 341 __dlm_queue_bast(dlm, lock);
341 } 342 }
342 /* update the highest_blocked if needed */ 343 /* update the highest_blocked if needed */
343 if (lock->ml.highest_blocked < target->ml.convert_type) 344 if (lock->ml.highest_blocked < target->ml.convert_type)
@@ -355,7 +356,7 @@ converting:
355 can_grant = 0; 356 can_grant = 0;
356 if (lock->ml.highest_blocked == LKM_IVMODE) { 357 if (lock->ml.highest_blocked == LKM_IVMODE) {
357 __dlm_lockres_reserve_ast(res); 358 __dlm_lockres_reserve_ast(res);
358 dlm_queue_bast(dlm, lock); 359 __dlm_queue_bast(dlm, lock);
359 } 360 }
360 if (lock->ml.highest_blocked < target->ml.convert_type) 361 if (lock->ml.highest_blocked < target->ml.convert_type)
361 lock->ml.highest_blocked = 362 lock->ml.highest_blocked =
@@ -383,7 +384,7 @@ converting:
383 spin_unlock(&target->spinlock); 384 spin_unlock(&target->spinlock);
384 385
385 __dlm_lockres_reserve_ast(res); 386 __dlm_lockres_reserve_ast(res);
386 dlm_queue_ast(dlm, target); 387 __dlm_queue_ast(dlm, target);
387 /* go back and check for more */ 388 /* go back and check for more */
388 goto converting; 389 goto converting;
389 } 390 }
@@ -402,7 +403,7 @@ blocked:
402 can_grant = 0; 403 can_grant = 0;
403 if (lock->ml.highest_blocked == LKM_IVMODE) { 404 if (lock->ml.highest_blocked == LKM_IVMODE) {
404 __dlm_lockres_reserve_ast(res); 405 __dlm_lockres_reserve_ast(res);
405 dlm_queue_bast(dlm, lock); 406 __dlm_queue_bast(dlm, lock);
406 } 407 }
407 if (lock->ml.highest_blocked < target->ml.type) 408 if (lock->ml.highest_blocked < target->ml.type)
408 lock->ml.highest_blocked = target->ml.type; 409 lock->ml.highest_blocked = target->ml.type;
@@ -418,7 +419,7 @@ blocked:
418 can_grant = 0; 419 can_grant = 0;
419 if (lock->ml.highest_blocked == LKM_IVMODE) { 420 if (lock->ml.highest_blocked == LKM_IVMODE) {
420 __dlm_lockres_reserve_ast(res); 421 __dlm_lockres_reserve_ast(res);
421 dlm_queue_bast(dlm, lock); 422 __dlm_queue_bast(dlm, lock);
422 } 423 }
423 if (lock->ml.highest_blocked < target->ml.type) 424 if (lock->ml.highest_blocked < target->ml.type)
424 lock->ml.highest_blocked = target->ml.type; 425 lock->ml.highest_blocked = target->ml.type;
@@ -444,7 +445,7 @@ blocked:
444 spin_unlock(&target->spinlock); 445 spin_unlock(&target->spinlock);
445 446
446 __dlm_lockres_reserve_ast(res); 447 __dlm_lockres_reserve_ast(res);
447 dlm_queue_ast(dlm, target); 448 __dlm_queue_ast(dlm, target);
448 /* go back and check for more */ 449 /* go back and check for more */
449 goto converting; 450 goto converting;
450 } 451 }
@@ -674,6 +675,7 @@ static int dlm_thread(void *data)
674 /* lockres can be re-dirtied/re-added to the 675 /* lockres can be re-dirtied/re-added to the
675 * dirty_list in this gap, but that is ok */ 676 * dirty_list in this gap, but that is ok */
676 677
678 spin_lock(&dlm->ast_lock);
677 spin_lock(&res->spinlock); 679 spin_lock(&res->spinlock);
678 if (res->owner != dlm->node_num) { 680 if (res->owner != dlm->node_num) {
679 __dlm_print_one_lock_resource(res); 681 __dlm_print_one_lock_resource(res);
@@ -694,6 +696,7 @@ static int dlm_thread(void *data)
694 /* move it to the tail and keep going */ 696 /* move it to the tail and keep going */
695 res->state &= ~DLM_LOCK_RES_DIRTY; 697 res->state &= ~DLM_LOCK_RES_DIRTY;
696 spin_unlock(&res->spinlock); 698 spin_unlock(&res->spinlock);
699 spin_unlock(&dlm->ast_lock);
697 mlog(0, "delaying list shuffling for in-" 700 mlog(0, "delaying list shuffling for in-"
698 "progress lockres %.*s, state=%d\n", 701 "progress lockres %.*s, state=%d\n",
699 res->lockname.len, res->lockname.name, 702 res->lockname.len, res->lockname.name,
@@ -715,6 +718,7 @@ static int dlm_thread(void *data)
715 dlm_shuffle_lists(dlm, res); 718 dlm_shuffle_lists(dlm, res);
716 res->state &= ~DLM_LOCK_RES_DIRTY; 719 res->state &= ~DLM_LOCK_RES_DIRTY;
717 spin_unlock(&res->spinlock); 720 spin_unlock(&res->spinlock);
721 spin_unlock(&dlm->ast_lock);
718 722
719 dlm_lockres_calc_usage(dlm, res); 723 dlm_lockres_calc_usage(dlm, res);
720 724
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index b47c1b92b82b..817287c6a6db 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -354,7 +354,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
354 mlog(0, "master was in-progress. retry\n"); 354 mlog(0, "master was in-progress. retry\n");
355 ret = status; 355 ret = status;
356 } else { 356 } else {
357 mlog_errno(tmpret); 357 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
358 "node %u\n", tmpret, DLM_UNLOCK_LOCK_MSG, dlm->key, owner);
358 if (dlm_is_host_down(tmpret)) { 359 if (dlm_is_host_down(tmpret)) {
359 /* NOTE: this seems strange, but it is what we want. 360 /* NOTE: this seems strange, but it is what we want.
360 * when the master goes down during a cancel or 361 * when the master goes down during a cancel or
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 50c4ee805da4..39eb16ac5f98 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3897,7 +3897,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3897 oinfo->dqi_gi.dqi_free_entry = 3897 oinfo->dqi_gi.dqi_free_entry =
3898 be32_to_cpu(lvb->lvb_free_entry); 3898 be32_to_cpu(lvb->lvb_free_entry);
3899 } else { 3899 } else {
3900 status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh); 3900 status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
3901 oinfo->dqi_giblk, &bh);
3901 if (status) { 3902 if (status) {
3902 mlog_errno(status); 3903 mlog_errno(status);
3903 goto bail; 3904 goto bail;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a5fbd9cea968..97e54b9e654b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -278,10 +278,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
278 inode->i_atime = CURRENT_TIME; 278 inode->i_atime = CURRENT_TIME;
279 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 279 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
280 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); 280 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
281 281 ocfs2_journal_dirty(handle, bh);
282 ret = ocfs2_journal_dirty(handle, bh);
283 if (ret < 0)
284 mlog_errno(ret);
285 282
286out_commit: 283out_commit:
287 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 284 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -430,9 +427,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
430 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); 427 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
431 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 428 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
432 429
433 status = ocfs2_journal_dirty(handle, fe_bh); 430 ocfs2_journal_dirty(handle, fe_bh);
434 if (status < 0)
435 mlog_errno(status);
436 431
437out_commit: 432out_commit:
438 ocfs2_commit_trans(osb, handle); 433 ocfs2_commit_trans(osb, handle);
@@ -449,7 +444,6 @@ static int ocfs2_truncate_file(struct inode *inode,
449 int status = 0; 444 int status = 0;
450 struct ocfs2_dinode *fe = NULL; 445 struct ocfs2_dinode *fe = NULL;
451 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 446 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
452 struct ocfs2_truncate_context *tc = NULL;
453 447
454 mlog_entry("(inode = %llu, new_i_size = %llu\n", 448 mlog_entry("(inode = %llu, new_i_size = %llu\n",
455 (unsigned long long)OCFS2_I(inode)->ip_blkno, 449 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -488,6 +482,9 @@ static int ocfs2_truncate_file(struct inode *inode,
488 482
489 down_write(&OCFS2_I(inode)->ip_alloc_sem); 483 down_write(&OCFS2_I(inode)->ip_alloc_sem);
490 484
485 ocfs2_resv_discard(&osb->osb_la_resmap,
486 &OCFS2_I(inode)->ip_la_data_resv);
487
491 /* 488 /*
492 * The inode lock forced other nodes to sync and drop their 489 * The inode lock forced other nodes to sync and drop their
493 * pages, which (correctly) happens even if we have a truncate 490 * pages, which (correctly) happens even if we have a truncate
@@ -517,13 +514,7 @@ static int ocfs2_truncate_file(struct inode *inode,
517 goto bail_unlock_sem; 514 goto bail_unlock_sem;
518 } 515 }
519 516
520 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); 517 status = ocfs2_commit_truncate(osb, inode, di_bh);
521 if (status < 0) {
522 mlog_errno(status);
523 goto bail_unlock_sem;
524 }
525
526 status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
527 if (status < 0) { 518 if (status < 0) {
528 mlog_errno(status); 519 mlog_errno(status);
529 goto bail_unlock_sem; 520 goto bail_unlock_sem;
@@ -666,11 +657,7 @@ restarted_transaction:
666 goto leave; 657 goto leave;
667 } 658 }
668 659
669 status = ocfs2_journal_dirty(handle, bh); 660 ocfs2_journal_dirty(handle, bh);
670 if (status < 0) {
671 mlog_errno(status);
672 goto leave;
673 }
674 661
675 spin_lock(&OCFS2_I(inode)->ip_lock); 662 spin_lock(&OCFS2_I(inode)->ip_lock);
676 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); 663 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
@@ -946,9 +933,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
946 struct ocfs2_super *osb = OCFS2_SB(sb); 933 struct ocfs2_super *osb = OCFS2_SB(sb);
947 struct buffer_head *bh = NULL; 934 struct buffer_head *bh = NULL;
948 handle_t *handle = NULL; 935 handle_t *handle = NULL;
949 int qtype;
950 struct dquot *transfer_from[MAXQUOTAS] = { };
951 struct dquot *transfer_to[MAXQUOTAS] = { }; 936 struct dquot *transfer_to[MAXQUOTAS] = { };
937 int qtype;
952 938
953 mlog_entry("(0x%p, '%.*s')\n", dentry, 939 mlog_entry("(0x%p, '%.*s')\n", dentry,
954 dentry->d_name.len, dentry->d_name.name); 940 dentry->d_name.len, dentry->d_name.name);
@@ -979,10 +965,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
979 if (status) 965 if (status)
980 return status; 966 return status;
981 967
968 if (is_quota_modification(inode, attr))
969 dquot_initialize(inode);
982 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; 970 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
983 if (size_change) { 971 if (size_change) {
984 dquot_initialize(inode);
985
986 status = ocfs2_rw_lock(inode, 1); 972 status = ocfs2_rw_lock(inode, 1);
987 if (status < 0) { 973 if (status < 0) {
988 mlog_errno(status); 974 mlog_errno(status);
@@ -1032,9 +1018,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1032 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { 1018 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1033 transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, 1019 transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid,
1034 USRQUOTA); 1020 USRQUOTA);
1035 transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, 1021 if (!transfer_to[USRQUOTA]) {
1036 USRQUOTA);
1037 if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) {
1038 status = -ESRCH; 1022 status = -ESRCH;
1039 goto bail_unlock; 1023 goto bail_unlock;
1040 } 1024 }
@@ -1044,9 +1028,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1044 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { 1028 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1045 transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, 1029 transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid,
1046 GRPQUOTA); 1030 GRPQUOTA);
1047 transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, 1031 if (!transfer_to[GRPQUOTA]) {
1048 GRPQUOTA);
1049 if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) {
1050 status = -ESRCH; 1032 status = -ESRCH;
1051 goto bail_unlock; 1033 goto bail_unlock;
1052 } 1034 }
@@ -1058,7 +1040,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1058 mlog_errno(status); 1040 mlog_errno(status);
1059 goto bail_unlock; 1041 goto bail_unlock;
1060 } 1042 }
1061 status = dquot_transfer(inode, attr); 1043 status = __dquot_transfer(inode, transfer_to);
1062 if (status < 0) 1044 if (status < 0)
1063 goto bail_commit; 1045 goto bail_commit;
1064 } else { 1046 } else {
@@ -1098,10 +1080,8 @@ bail:
1098 brelse(bh); 1080 brelse(bh);
1099 1081
1100 /* Release quota pointers in case we acquired them */ 1082 /* Release quota pointers in case we acquired them */
1101 for (qtype = 0; qtype < MAXQUOTAS; qtype++) { 1083 for (qtype = 0; qtype < MAXQUOTAS; qtype++)
1102 dqput(transfer_to[qtype]); 1084 dqput(transfer_to[qtype]);
1103 dqput(transfer_from[qtype]);
1104 }
1105 1085
1106 if (!status && attr->ia_valid & ATTR_MODE) { 1086 if (!status && attr->ia_valid & ATTR_MODE) {
1107 status = ocfs2_acl_chmod(inode); 1087 status = ocfs2_acl_chmod(inode);
@@ -1195,9 +1175,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1195 di = (struct ocfs2_dinode *) bh->b_data; 1175 di = (struct ocfs2_dinode *) bh->b_data;
1196 di->i_mode = cpu_to_le16(inode->i_mode); 1176 di->i_mode = cpu_to_le16(inode->i_mode);
1197 1177
1198 ret = ocfs2_journal_dirty(handle, bh); 1178 ocfs2_journal_dirty(handle, bh);
1199 if (ret < 0)
1200 mlog_errno(ret);
1201 1179
1202out_trans: 1180out_trans:
1203 ocfs2_commit_trans(osb, handle); 1181 ocfs2_commit_trans(osb, handle);
@@ -1434,16 +1412,90 @@ out:
1434 return ret; 1412 return ret;
1435} 1413}
1436 1414
1415static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
1416{
1417 int i;
1418 struct ocfs2_extent_rec *rec = NULL;
1419
1420 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
1421
1422 rec = &el->l_recs[i];
1423
1424 if (le32_to_cpu(rec->e_cpos) < pos)
1425 break;
1426 }
1427
1428 return i;
1429}
1430
1431/*
1432 * Helper to calculate the punching pos and length in one run, we handle the
1433 * following three cases in order:
1434 *
1435 * - remove the entire record
1436 * - remove a partial record
1437 * - no record needs to be removed (hole-punching completed)
1438*/
1439static void ocfs2_calc_trunc_pos(struct inode *inode,
1440 struct ocfs2_extent_list *el,
1441 struct ocfs2_extent_rec *rec,
1442 u32 trunc_start, u32 *trunc_cpos,
1443 u32 *trunc_len, u32 *trunc_end,
1444 u64 *blkno, int *done)
1445{
1446 int ret = 0;
1447 u32 coff, range;
1448
1449 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
1450
1451 if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
1452 *trunc_cpos = le32_to_cpu(rec->e_cpos);
1453 /*
1454 * Skip holes if any.
1455 */
1456 if (range < *trunc_end)
1457 *trunc_end = range;
1458 *trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos);
1459 *blkno = le64_to_cpu(rec->e_blkno);
1460 *trunc_end = le32_to_cpu(rec->e_cpos);
1461 } else if (range > trunc_start) {
1462 *trunc_cpos = trunc_start;
1463 *trunc_len = *trunc_end - trunc_start;
1464 coff = trunc_start - le32_to_cpu(rec->e_cpos);
1465 *blkno = le64_to_cpu(rec->e_blkno) +
1466 ocfs2_clusters_to_blocks(inode->i_sb, coff);
1467 *trunc_end = trunc_start;
1468 } else {
1469 /*
1470 * It may have two following possibilities:
1471 *
1472 * - last record has been removed
1473 * - trunc_start was within a hole
1474 *
1475 * both two cases mean the completion of hole punching.
1476 */
1477 ret = 1;
1478 }
1479
1480 *done = ret;
1481}
1482
1437static int ocfs2_remove_inode_range(struct inode *inode, 1483static int ocfs2_remove_inode_range(struct inode *inode,
1438 struct buffer_head *di_bh, u64 byte_start, 1484 struct buffer_head *di_bh, u64 byte_start,
1439 u64 byte_len) 1485 u64 byte_len)
1440{ 1486{
1441 int ret = 0; 1487 int ret = 0, flags = 0, done = 0, i;
1442 u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; 1488 u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
1489 u32 cluster_in_el;
1443 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1490 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1444 struct ocfs2_cached_dealloc_ctxt dealloc; 1491 struct ocfs2_cached_dealloc_ctxt dealloc;
1445 struct address_space *mapping = inode->i_mapping; 1492 struct address_space *mapping = inode->i_mapping;
1446 struct ocfs2_extent_tree et; 1493 struct ocfs2_extent_tree et;
1494 struct ocfs2_path *path = NULL;
1495 struct ocfs2_extent_list *el = NULL;
1496 struct ocfs2_extent_rec *rec = NULL;
1497 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1498 u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
1447 1499
1448 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 1500 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1449 ocfs2_init_dealloc_ctxt(&dealloc); 1501 ocfs2_init_dealloc_ctxt(&dealloc);
@@ -1469,17 +1521,35 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1469 goto out; 1521 goto out;
1470 } 1522 }
1471 1523
1524 /*
1525 * For reflinks, we may need to CoW 2 clusters which might be
1526 * partially zero'd later, if hole's start and end offset were
1527 * within one cluster(means is not exactly aligned to clustersize).
1528 */
1529
1530 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
1531
1532 ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
1533 if (ret) {
1534 mlog_errno(ret);
1535 goto out;
1536 }
1537
1538 ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
1539 if (ret) {
1540 mlog_errno(ret);
1541 goto out;
1542 }
1543 }
1544
1472 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); 1545 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
1473 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; 1546 trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
1474 if (trunc_len >= trunc_start) 1547 cluster_in_el = trunc_end;
1475 trunc_len -= trunc_start;
1476 else
1477 trunc_len = 0;
1478 1548
1479 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", 1549 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
1480 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1550 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1481 (unsigned long long)byte_start, 1551 (unsigned long long)byte_start,
1482 (unsigned long long)byte_len, trunc_start, trunc_len); 1552 (unsigned long long)byte_len, trunc_start, trunc_end);
1483 1553
1484 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); 1554 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
1485 if (ret) { 1555 if (ret) {
@@ -1487,31 +1557,79 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1487 goto out; 1557 goto out;
1488 } 1558 }
1489 1559
1490 cpos = trunc_start; 1560 path = ocfs2_new_path_from_et(&et);
1491 while (trunc_len) { 1561 if (!path) {
1492 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, 1562 ret = -ENOMEM;
1493 &alloc_size, NULL); 1563 mlog_errno(ret);
1564 goto out;
1565 }
1566
1567 while (trunc_end > trunc_start) {
1568
1569 ret = ocfs2_find_path(INODE_CACHE(inode), path,
1570 cluster_in_el);
1494 if (ret) { 1571 if (ret) {
1495 mlog_errno(ret); 1572 mlog_errno(ret);
1496 goto out; 1573 goto out;
1497 } 1574 }
1498 1575
1499 if (alloc_size > trunc_len) 1576 el = path_leaf_el(path);
1500 alloc_size = trunc_len; 1577
1578 i = ocfs2_find_rec(el, trunc_end);
1579 /*
1580 * Need to go to previous extent block.
1581 */
1582 if (i < 0) {
1583 if (path->p_tree_depth == 0)
1584 break;
1501 1585
1502 /* Only do work for non-holes */ 1586 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
1503 if (phys_cpos != 0) { 1587 path,
1504 ret = ocfs2_remove_btree_range(inode, &et, cpos, 1588 &cluster_in_el);
1505 phys_cpos, alloc_size,
1506 &dealloc);
1507 if (ret) { 1589 if (ret) {
1508 mlog_errno(ret); 1590 mlog_errno(ret);
1509 goto out; 1591 goto out;
1510 } 1592 }
1593
1594 /*
1595 * We've reached the leftmost extent block,
1596 * it's safe to leave.
1597 */
1598 if (cluster_in_el == 0)
1599 break;
1600
1601 /*
1602 * The 'pos' searched for previous extent block is
1603 * always one cluster less than actual trunc_end.
1604 */
1605 trunc_end = cluster_in_el + 1;
1606
1607 ocfs2_reinit_path(path, 1);
1608
1609 continue;
1610
1611 } else
1612 rec = &el->l_recs[i];
1613
1614 ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
1615 &trunc_len, &trunc_end, &blkno, &done);
1616 if (done)
1617 break;
1618
1619 flags = rec->e_flags;
1620 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
1621
1622 ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
1623 phys_cpos, trunc_len, flags,
1624 &dealloc, refcount_loc);
1625 if (ret < 0) {
1626 mlog_errno(ret);
1627 goto out;
1511 } 1628 }
1512 1629
1513 cpos += alloc_size; 1630 cluster_in_el = trunc_end;
1514 trunc_len -= alloc_size; 1631
1632 ocfs2_reinit_path(path, 1);
1515 } 1633 }
1516 1634
1517 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); 1635 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index af189887201c..abb0a95cc717 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -376,6 +376,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
376 376
377 OCFS2_I(inode)->ip_last_used_slot = 0; 377 OCFS2_I(inode)->ip_last_used_slot = 0;
378 OCFS2_I(inode)->ip_last_used_group = 0; 378 OCFS2_I(inode)->ip_last_used_group = 0;
379
380 if (S_ISDIR(inode->i_mode))
381 ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv,
382 OCFS2_RESV_FLAG_DIR);
379 mlog_exit_void(); 383 mlog_exit_void();
380} 384}
381 385
@@ -539,7 +543,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
539 struct buffer_head *fe_bh) 543 struct buffer_head *fe_bh)
540{ 544{
541 int status = 0; 545 int status = 0;
542 struct ocfs2_truncate_context *tc = NULL;
543 struct ocfs2_dinode *fe; 546 struct ocfs2_dinode *fe;
544 handle_t *handle = NULL; 547 handle_t *handle = NULL;
545 548
@@ -582,13 +585,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
582 ocfs2_commit_trans(osb, handle); 585 ocfs2_commit_trans(osb, handle);
583 handle = NULL; 586 handle = NULL;
584 587
585 status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); 588 status = ocfs2_commit_truncate(osb, inode, fe_bh);
586 if (status < 0) {
587 mlog_errno(status);
588 goto out;
589 }
590
591 status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
592 if (status < 0) { 589 if (status < 0) {
593 mlog_errno(status); 590 mlog_errno(status);
594 goto out; 591 goto out;
@@ -659,12 +656,7 @@ static int ocfs2_remove_inode(struct inode *inode,
659 656
660 di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); 657 di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec);
661 di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); 658 di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
662 659 ocfs2_journal_dirty(handle, di_bh);
663 status = ocfs2_journal_dirty(handle, di_bh);
664 if (status < 0) {
665 mlog_errno(status);
666 goto bail_commit;
667 }
668 660
669 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); 661 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
670 dquot_free_inode(inode); 662 dquot_free_inode(inode);
@@ -980,7 +972,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
980void ocfs2_delete_inode(struct inode *inode) 972void ocfs2_delete_inode(struct inode *inode)
981{ 973{
982 int wipe, status; 974 int wipe, status;
983 sigset_t blocked, oldset; 975 sigset_t oldset;
984 struct buffer_head *di_bh = NULL; 976 struct buffer_head *di_bh = NULL;
985 977
986 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 978 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
@@ -1007,13 +999,7 @@ void ocfs2_delete_inode(struct inode *inode)
1007 * messaging paths may return us -ERESTARTSYS. Which would 999 * messaging paths may return us -ERESTARTSYS. Which would
1008 * cause us to exit early, resulting in inodes being orphaned 1000 * cause us to exit early, resulting in inodes being orphaned
1009 * forever. */ 1001 * forever. */
1010 sigfillset(&blocked); 1002 ocfs2_block_signals(&oldset);
1011 status = sigprocmask(SIG_BLOCK, &blocked, &oldset);
1012 if (status < 0) {
1013 mlog_errno(status);
1014 ocfs2_cleanup_delete_inode(inode, 1);
1015 goto bail;
1016 }
1017 1003
1018 /* 1004 /*
1019 * Synchronize us against ocfs2_get_dentry. We take this in 1005 * Synchronize us against ocfs2_get_dentry. We take this in
@@ -1087,9 +1073,7 @@ bail_unlock_nfs_sync:
1087 ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0); 1073 ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
1088 1074
1089bail_unblock: 1075bail_unblock:
1090 status = sigprocmask(SIG_SETMASK, &oldset, NULL); 1076 ocfs2_unblock_signals(&oldset);
1091 if (status < 0)
1092 mlog_errno(status);
1093bail: 1077bail:
1094 clear_inode(inode); 1078 clear_inode(inode);
1095 mlog_exit_void(); 1079 mlog_exit_void();
@@ -1123,6 +1107,10 @@ void ocfs2_clear_inode(struct inode *inode)
1123 ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); 1107 ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
1124 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); 1108 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
1125 1109
1110 ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
1111 &oi->ip_la_data_resv);
1112 ocfs2_resv_init_once(&oi->ip_la_data_resv);
1113
1126 /* We very well may get a clear_inode before all an inodes 1114 /* We very well may get a clear_inode before all an inodes
1127 * metadata has hit disk. Of course, we can't drop any cluster 1115 * metadata has hit disk. Of course, we can't drop any cluster
1128 * locks until the journal has finished with it. The only 1116 * locks until the journal has finished with it. The only
@@ -1298,13 +1286,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1298 fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); 1286 fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
1299 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 1287 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
1300 1288
1301 status = ocfs2_journal_dirty(handle, bh); 1289 ocfs2_journal_dirty(handle, bh);
1302 if (status < 0)
1303 mlog_errno(status);
1304
1305 status = 0;
1306leave: 1290leave:
1307
1308 mlog_exit(status); 1291 mlog_exit(status);
1309 return status; 1292 return status;
1310} 1293}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 0b28e1921a39..9f5f5fcadc45 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -70,6 +70,8 @@ struct ocfs2_inode_info
70 /* Only valid if the inode is the dir. */ 70 /* Only valid if the inode is the dir. */
71 u32 ip_last_used_slot; 71 u32 ip_last_used_slot;
72 u64 ip_last_used_group; 72 u64 ip_last_used_group;
73
74 struct ocfs2_alloc_reservation ip_la_data_resv;
73}; 75};
74 76
75/* 77/*
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 9336c60e3a36..47878cf16418 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -402,9 +402,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
402} 402}
403 403
404/* 404/*
405 * 'nblocks' is what you want to add to the current 405 * 'nblocks' is what you want to add to the current transaction.
406 * transaction. extend_trans will either extend the current handle by
407 * nblocks, or commit it and start a new one with nblocks credits.
408 * 406 *
409 * This might call jbd2_journal_restart() which will commit dirty buffers 407 * This might call jbd2_journal_restart() which will commit dirty buffers
410 * and then restart the transaction. Before calling 408 * and then restart the transaction. Before calling
@@ -422,11 +420,15 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
422 */ 420 */
423int ocfs2_extend_trans(handle_t *handle, int nblocks) 421int ocfs2_extend_trans(handle_t *handle, int nblocks)
424{ 422{
425 int status; 423 int status, old_nblocks;
426 424
427 BUG_ON(!handle); 425 BUG_ON(!handle);
428 BUG_ON(!nblocks); 426 BUG_ON(nblocks < 0);
427
428 if (!nblocks)
429 return 0;
429 430
431 old_nblocks = handle->h_buffer_credits;
430 mlog_entry_void(); 432 mlog_entry_void();
431 433
432 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); 434 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
@@ -445,7 +447,8 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
445 mlog(0, 447 mlog(0,
446 "jbd2_journal_extend failed, trying " 448 "jbd2_journal_extend failed, trying "
447 "jbd2_journal_restart\n"); 449 "jbd2_journal_restart\n");
448 status = jbd2_journal_restart(handle, nblocks); 450 status = jbd2_journal_restart(handle,
451 old_nblocks + nblocks);
449 if (status < 0) { 452 if (status < 0) {
450 mlog_errno(status); 453 mlog_errno(status);
451 goto bail; 454 goto bail;
@@ -734,8 +737,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
734 return __ocfs2_journal_access(handle, ci, bh, NULL, type); 737 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
735} 738}
736 739
737int ocfs2_journal_dirty(handle_t *handle, 740void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
738 struct buffer_head *bh)
739{ 741{
740 int status; 742 int status;
741 743
@@ -743,13 +745,9 @@ int ocfs2_journal_dirty(handle_t *handle,
743 (unsigned long long)bh->b_blocknr); 745 (unsigned long long)bh->b_blocknr);
744 746
745 status = jbd2_journal_dirty_metadata(handle, bh); 747 status = jbd2_journal_dirty_metadata(handle, bh);
746 if (status < 0) 748 BUG_ON(status);
747 mlog(ML_ERROR, "Could not dirty metadata buffer. "
748 "(bh->b_blocknr=%llu)\n",
749 (unsigned long long)bh->b_blocknr);
750 749
751 mlog_exit(status); 750 mlog_exit_void();
752 return status;
753} 751}
754 752
755#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) 753#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3f74e09b0d80..b5baaa8e710f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -325,8 +325,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
325 * <modify the bh> 325 * <modify the bh>
326 * ocfs2_journal_dirty(handle, bh); 326 * ocfs2_journal_dirty(handle, bh);
327 */ 327 */
328int ocfs2_journal_dirty(handle_t *handle, 328void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh);
329 struct buffer_head *bh);
330 329
331/* 330/*
332 * Credit Macros: 331 * Credit Macros:
@@ -562,6 +561,18 @@ static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
562 return blocks; 561 return blocks;
563} 562}
564 563
564/*
565 * Allocating a discontiguous block group requires the credits from
566 * ocfs2_calc_group_alloc_credits() as well as enough credits to fill
567 * the group descriptor's extent list. The caller already has started
568 * the transaction with ocfs2_calc_group_alloc_credits(). They extend
569 * it with these credits.
570 */
571static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb)
572{
573 return ocfs2_extent_recs_per_gd(sb);
574}
575
565static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, 576static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
566 unsigned int clusters_to_del, 577 unsigned int clusters_to_del,
567 struct ocfs2_dinode *fe, 578 struct ocfs2_dinode *fe,
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index c983715d8d8c..3d7419682dc0 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
52 52
53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
54 struct ocfs2_dinode *alloc, 54 struct ocfs2_dinode *alloc,
55 u32 numbits); 55 u32 *numbits,
56 struct ocfs2_alloc_reservation *resv);
56 57
57static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 58static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
58 59
@@ -74,6 +75,144 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
74static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 75static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
75 struct inode *local_alloc_inode); 76 struct inode *local_alloc_inode);
76 77
78/*
79 * ocfs2_la_default_mb() - determine a default size, in megabytes of
80 * the local alloc.
81 *
82 * Generally, we'd like to pick as large a local alloc as
83 * possible. Performance on large workloads tends to scale
84 * proportionally to la size. In addition to that, the reservations
85 * code functions more efficiently as it can reserve more windows for
86 * write.
87 *
88 * Some things work against us when trying to choose a large local alloc:
89 *
90 * - We need to ensure our sizing is picked to leave enough space in
91 * group descriptors for other allocations (such as block groups,
92 * etc). Picking default sizes which are a multiple of 4 could help
93 * - block groups are allocated in 2mb and 4mb chunks.
94 *
95 * - Likewise, we don't want to starve other nodes of bits on small
96 * file systems. This can easily be taken care of by limiting our
97 * default to a reasonable size (256M) on larger cluster sizes.
98 *
99 * - Some file systems can't support very large sizes - 4k and 8k in
100 * particular are limited to less than 128 and 256 megabytes respectively.
101 *
102 * The following reference table shows group descriptor and local
103 * alloc maximums at various cluster sizes (4k blocksize)
104 *
105 * csize: 4K group: 126M la: 121M
106 * csize: 8K group: 252M la: 243M
107 * csize: 16K group: 504M la: 486M
108 * csize: 32K group: 1008M la: 972M
109 * csize: 64K group: 2016M la: 1944M
110 * csize: 128K group: 4032M la: 3888M
111 * csize: 256K group: 8064M la: 7776M
112 * csize: 512K group: 16128M la: 15552M
113 * csize: 1024K group: 32256M la: 31104M
114 */
115#define OCFS2_LA_MAX_DEFAULT_MB 256
116#define OCFS2_LA_OLD_DEFAULT 8
117unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
118{
119 unsigned int la_mb;
120 unsigned int gd_mb;
121 unsigned int megs_per_slot;
122 struct super_block *sb = osb->sb;
123
124 gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
125 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat));
126
127 /*
128 * This takes care of files systems with very small group
129 * descriptors - 512 byte blocksize at cluster sizes lower
130 * than 16K and also 1k blocksize with 4k cluster size.
131 */
132 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
133 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
134 return OCFS2_LA_OLD_DEFAULT;
135
136 /*
137 * Leave enough room for some block groups and make the final
138 * value we work from a multiple of 4.
139 */
140 gd_mb -= 16;
141 gd_mb &= 0xFFFFFFFB;
142
143 la_mb = gd_mb;
144
145 /*
146 * Keep window sizes down to a reasonable default
147 */
148 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
149 /*
150 * Some clustersize / blocksize combinations will have
151 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
152 * default size, but get poor distribution when
153 * limited to exactly 256 megabytes.
154 *
155 * As an example, 16K clustersize at 4K blocksize
156 * gives us a cluster group size of 504M. Paring the
157 * local alloc size down to 256 however, would give us
158 * only one window and around 200MB left in the
159 * cluster group. Instead, find the first size below
160 * 256 which would give us an even distribution.
161 *
162 * Larger cluster group sizes actually work out pretty
163 * well when pared to 256, so we don't have to do this
164 * for any group that fits more than two
165 * OCFS2_LA_MAX_DEFAULT_MB windows.
166 */
167 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
168 la_mb = 256;
169 else {
170 unsigned int gd_mult = gd_mb;
171
172 while (gd_mult > 256)
173 gd_mult = gd_mult >> 1;
174
175 la_mb = gd_mult;
176 }
177 }
178
179 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
180 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
181 /* Too many nodes, too few disk clusters. */
182 if (megs_per_slot < la_mb)
183 la_mb = megs_per_slot;
184
185 return la_mb;
186}
187
188void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
189{
190 struct super_block *sb = osb->sb;
191 unsigned int la_default_mb = ocfs2_la_default_mb(osb);
192 unsigned int la_max_mb;
193
194 la_max_mb = ocfs2_clusters_to_megabytes(sb,
195 ocfs2_local_alloc_size(sb) * 8);
196
197 mlog(0, "requested: %dM, max: %uM, default: %uM\n",
198 requested_mb, la_max_mb, la_default_mb);
199
200 if (requested_mb == -1) {
201 /* No user request - use defaults */
202 osb->local_alloc_default_bits =
203 ocfs2_megabytes_to_clusters(sb, la_default_mb);
204 } else if (requested_mb > la_max_mb) {
205 /* Request is too big, we give the maximum available */
206 osb->local_alloc_default_bits =
207 ocfs2_megabytes_to_clusters(sb, la_max_mb);
208 } else {
209 osb->local_alloc_default_bits =
210 ocfs2_megabytes_to_clusters(sb, requested_mb);
211 }
212
213 osb->local_alloc_bits = osb->local_alloc_default_bits;
214}
215
77static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 216static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
78{ 217{
79 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 218 return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
@@ -156,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
156 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 295 osb->local_alloc_bits, (osb->bitmap_cpg - 1));
157 osb->local_alloc_bits = 296 osb->local_alloc_bits =
158 ocfs2_megabytes_to_clusters(osb->sb, 297 ocfs2_megabytes_to_clusters(osb->sb,
159 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 298 ocfs2_la_default_mb(osb));
160 } 299 }
161 300
162 /* read the alloc off disk */ 301 /* read the alloc off disk */
@@ -262,6 +401,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
262 401
263 osb->local_alloc_state = OCFS2_LA_DISABLED; 402 osb->local_alloc_state = OCFS2_LA_DISABLED;
264 403
404 ocfs2_resmap_uninit(&osb->osb_la_resmap);
405
265 main_bm_inode = ocfs2_get_system_file_inode(osb, 406 main_bm_inode = ocfs2_get_system_file_inode(osb,
266 GLOBAL_BITMAP_SYSTEM_INODE, 407 GLOBAL_BITMAP_SYSTEM_INODE,
267 OCFS2_INVALID_SLOT); 408 OCFS2_INVALID_SLOT);
@@ -305,12 +446,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
305 } 446 }
306 447
307 ocfs2_clear_local_alloc(alloc); 448 ocfs2_clear_local_alloc(alloc);
308 449 ocfs2_journal_dirty(handle, bh);
309 status = ocfs2_journal_dirty(handle, bh);
310 if (status < 0) {
311 mlog_errno(status);
312 goto out_commit;
313 }
314 450
315 brelse(bh); 451 brelse(bh);
316 osb->local_alloc_bh = NULL; 452 osb->local_alloc_bh = NULL;
@@ -481,46 +617,6 @@ out:
481 return status; 617 return status;
482} 618}
483 619
484/* Check to see if the local alloc window is within ac->ac_max_block */
485static int ocfs2_local_alloc_in_range(struct inode *inode,
486 struct ocfs2_alloc_context *ac,
487 u32 bits_wanted)
488{
489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
490 struct ocfs2_dinode *alloc;
491 struct ocfs2_local_alloc *la;
492 int start;
493 u64 block_off;
494
495 if (!ac->ac_max_block)
496 return 1;
497
498 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
499 la = OCFS2_LOCAL_ALLOC(alloc);
500
501 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
502 if (start == -1) {
503 mlog_errno(-ENOSPC);
504 return 0;
505 }
506
507 /*
508 * Converting (bm_off + start + bits_wanted) to blocks gives us
509 * the blkno just past our actual allocation. This is perfect
510 * to compare with ac_max_block.
511 */
512 block_off = ocfs2_clusters_to_blocks(inode->i_sb,
513 le32_to_cpu(la->la_bm_off) +
514 start + bits_wanted);
515 mlog(0, "Checking %llu against %llu\n",
516 (unsigned long long)block_off,
517 (unsigned long long)ac->ac_max_block);
518 if (block_off > ac->ac_max_block)
519 return 0;
520
521 return 1;
522}
523
524/* 620/*
525 * make sure we've got at least bits_wanted contiguous bits in the 621 * make sure we've got at least bits_wanted contiguous bits in the
526 * local alloc. You lose them when you drop i_mutex. 622 * local alloc. You lose them when you drop i_mutex.
@@ -613,17 +709,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
613 mlog(0, "Calling in_range for max block %llu\n", 709 mlog(0, "Calling in_range for max block %llu\n",
614 (unsigned long long)ac->ac_max_block); 710 (unsigned long long)ac->ac_max_block);
615 711
616 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
617 bits_wanted)) {
618 /*
619 * The window is outside ac->ac_max_block.
620 * This errno tells the caller to keep localalloc enabled
621 * but to get the allocation from the main bitmap.
622 */
623 status = -EFBIG;
624 goto bail;
625 }
626
627 ac->ac_inode = local_alloc_inode; 712 ac->ac_inode = local_alloc_inode;
628 /* We should never use localalloc from another slot */ 713 /* We should never use localalloc from another slot */
629 ac->ac_alloc_slot = osb->slot_num; 714 ac->ac_alloc_slot = osb->slot_num;
@@ -664,7 +749,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
664 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 749 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
665 la = OCFS2_LOCAL_ALLOC(alloc); 750 la = OCFS2_LOCAL_ALLOC(alloc);
666 751
667 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 752 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
753 ac->ac_resv);
668 if (start == -1) { 754 if (start == -1) {
669 /* TODO: Shouldn't we just BUG here? */ 755 /* TODO: Shouldn't we just BUG here? */
670 status = -ENOSPC; 756 status = -ENOSPC;
@@ -674,8 +760,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
674 760
675 bitmap = la->la_bitmap; 761 bitmap = la->la_bitmap;
676 *bit_off = le32_to_cpu(la->la_bm_off) + start; 762 *bit_off = le32_to_cpu(la->la_bm_off) + start;
677 /* local alloc is always contiguous by nature -- we never
678 * delete bits from it! */
679 *num_bits = bits_wanted; 763 *num_bits = bits_wanted;
680 764
681 status = ocfs2_journal_access_di(handle, 765 status = ocfs2_journal_access_di(handle,
@@ -687,18 +771,15 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
687 goto bail; 771 goto bail;
688 } 772 }
689 773
774 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
775 bits_wanted);
776
690 while(bits_wanted--) 777 while(bits_wanted--)
691 ocfs2_set_bit(start++, bitmap); 778 ocfs2_set_bit(start++, bitmap);
692 779
693 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 780 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
781 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
694 782
695 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
696 if (status < 0) {
697 mlog_errno(status);
698 goto bail;
699 }
700
701 status = 0;
702bail: 783bail:
703 mlog_exit(status); 784 mlog_exit(status);
704 return status; 785 return status;
@@ -722,13 +803,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
722} 803}
723 804
724static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 805static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
725 struct ocfs2_dinode *alloc, 806 struct ocfs2_dinode *alloc,
726 u32 numbits) 807 u32 *numbits,
808 struct ocfs2_alloc_reservation *resv)
727{ 809{
728 int numfound, bitoff, left, startoff, lastzero; 810 int numfound, bitoff, left, startoff, lastzero;
811 int local_resv = 0;
812 struct ocfs2_alloc_reservation r;
729 void *bitmap = NULL; 813 void *bitmap = NULL;
814 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
730 815
731 mlog_entry("(numbits wanted = %u)\n", numbits); 816 mlog_entry("(numbits wanted = %u)\n", *numbits);
732 817
733 if (!alloc->id1.bitmap1.i_total) { 818 if (!alloc->id1.bitmap1.i_total) {
734 mlog(0, "No bits in my window!\n"); 819 mlog(0, "No bits in my window!\n");
@@ -736,6 +821,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
736 goto bail; 821 goto bail;
737 } 822 }
738 823
824 if (!resv) {
825 local_resv = 1;
826 ocfs2_resv_init_once(&r);
827 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
828 resv = &r;
829 }
830
831 numfound = *numbits;
832 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
833 if (numfound < *numbits)
834 *numbits = numfound;
835 goto bail;
836 }
837
838 /*
839 * Code error. While reservations are enabled, local
840 * allocation should _always_ go through them.
841 */
842 BUG_ON(osb->osb_resv_level != 0);
843
844 /*
845 * Reservations are disabled. Handle this the old way.
846 */
847
739 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 848 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
740 849
741 numfound = bitoff = startoff = 0; 850 numfound = bitoff = startoff = 0;
@@ -761,7 +870,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
761 startoff = bitoff+1; 870 startoff = bitoff+1;
762 } 871 }
763 /* we got everything we needed */ 872 /* we got everything we needed */
764 if (numfound == numbits) { 873 if (numfound == *numbits) {
765 /* mlog(0, "Found it all!\n"); */ 874 /* mlog(0, "Found it all!\n"); */
766 break; 875 break;
767 } 876 }
@@ -770,12 +879,15 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
770 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 879 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
771 numfound); 880 numfound);
772 881
773 if (numfound == numbits) 882 if (numfound == *numbits)
774 bitoff = startoff - numfound; 883 bitoff = startoff - numfound;
775 else 884 else
776 bitoff = -1; 885 bitoff = -1;
777 886
778bail: 887bail:
888 if (local_resv)
889 ocfs2_resv_discard(resmap, resv);
890
779 mlog_exit(bitoff); 891 mlog_exit(bitoff);
780 return bitoff; 892 return bitoff;
781} 893}
@@ -1049,7 +1161,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1049 /* we used the generic suballoc reserve function, but we set 1161 /* we used the generic suballoc reserve function, but we set
1050 * everything up nicely, so there's no reason why we can't use 1162 * everything up nicely, so there's no reason why we can't use
1051 * the more specific cluster api to claim bits. */ 1163 * the more specific cluster api to claim bits. */
1052 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, 1164 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,
1053 &cluster_off, &cluster_count); 1165 &cluster_off, &cluster_count);
1054 if (status == -ENOSPC) { 1166 if (status == -ENOSPC) {
1055retry_enospc: 1167retry_enospc:
@@ -1063,7 +1175,7 @@ retry_enospc:
1063 goto bail; 1175 goto bail;
1064 1176
1065 ac->ac_bits_wanted = osb->local_alloc_default_bits; 1177 ac->ac_bits_wanted = osb->local_alloc_default_bits;
1066 status = ocfs2_claim_clusters(osb, handle, ac, 1178 status = ocfs2_claim_clusters(handle, ac,
1067 osb->local_alloc_bits, 1179 osb->local_alloc_bits,
1068 &cluster_off, 1180 &cluster_off,
1069 &cluster_count); 1181 &cluster_count);
@@ -1098,6 +1210,9 @@ retry_enospc:
1098 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1210 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1099 le16_to_cpu(la->la_size)); 1211 le16_to_cpu(la->la_size));
1100 1212
1213 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
1214 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
1215
1101 mlog(0, "New window allocated:\n"); 1216 mlog(0, "New window allocated:\n");
1102 mlog(0, "window la_bm_off = %u\n", 1217 mlog(0, "window la_bm_off = %u\n",
1103 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1218 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
@@ -1169,12 +1284,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1169 } 1284 }
1170 1285
1171 ocfs2_clear_local_alloc(alloc); 1286 ocfs2_clear_local_alloc(alloc);
1172 1287 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1173 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1174 if (status < 0) {
1175 mlog_errno(status);
1176 goto bail;
1177 }
1178 1288
1179 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1289 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
1180 main_bm_inode, main_bm_bh); 1290 main_bm_inode, main_bm_bh);
@@ -1192,7 +1302,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1192 1302
1193 atomic_inc(&osb->alloc_stats.moves); 1303 atomic_inc(&osb->alloc_stats.moves);
1194 1304
1195 status = 0;
1196bail: 1305bail:
1197 if (handle) 1306 if (handle)
1198 ocfs2_commit_trans(osb, handle); 1307 ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index ac5ea9f86653..1be9b5864460 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -30,6 +30,9 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb);
30 30
31void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb); 31void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb);
32 32
33void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb);
34unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb);
35
33int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 36int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
34 int node_num, 37 int node_num,
35 struct ocfs2_dinode **alloc_copy); 38 struct ocfs2_dinode **alloc_copy);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 7898bd3a99f5..af2b8fe1f139 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -41,44 +41,20 @@
41#include "file.h" 41#include "file.h"
42#include "inode.h" 42#include "inode.h"
43#include "mmap.h" 43#include "mmap.h"
44#include "super.h"
44 45
45static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
46{
47 /* The best way to deal with signals in the vm path is
48 * to block them upfront, rather than allowing the
49 * locking paths to return -ERESTARTSYS. */
50 sigfillset(blocked);
51
52 /* We should technically never get a bad return value
53 * from sigprocmask */
54 return sigprocmask(SIG_BLOCK, blocked, oldset);
55}
56
57static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
58{
59 return sigprocmask(SIG_SETMASK, oldset, NULL);
60}
61 46
62static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) 47static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
63{ 48{
64 sigset_t blocked, oldset; 49 sigset_t oldset;
65 int error, ret; 50 int ret;
66 51
67 mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff); 52 mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff);
68 53
69 error = ocfs2_vm_op_block_sigs(&blocked, &oldset); 54 ocfs2_block_signals(&oldset);
70 if (error < 0) {
71 mlog_errno(error);
72 ret = VM_FAULT_SIGBUS;
73 goto out;
74 }
75
76 ret = filemap_fault(area, vmf); 55 ret = filemap_fault(area, vmf);
56 ocfs2_unblock_signals(&oldset);
77 57
78 error = ocfs2_vm_op_unblock_sigs(&oldset);
79 if (error < 0)
80 mlog_errno(error);
81out:
82 mlog_exit_ptr(vmf->page); 58 mlog_exit_ptr(vmf->page);
83 return ret; 59 return ret;
84} 60}
@@ -158,14 +134,10 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
158 struct page *page = vmf->page; 134 struct page *page = vmf->page;
159 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 135 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
160 struct buffer_head *di_bh = NULL; 136 struct buffer_head *di_bh = NULL;
161 sigset_t blocked, oldset; 137 sigset_t oldset;
162 int ret, ret2; 138 int ret;
163 139
164 ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); 140 ocfs2_block_signals(&oldset);
165 if (ret < 0) {
166 mlog_errno(ret);
167 return ret;
168 }
169 141
170 /* 142 /*
171 * The cluster locks taken will block a truncate from another 143 * The cluster locks taken will block a truncate from another
@@ -193,9 +165,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
193 ocfs2_inode_unlock(inode, 1); 165 ocfs2_inode_unlock(inode, 1);
194 166
195out: 167out:
196 ret2 = ocfs2_vm_op_unblock_sigs(&oldset); 168 ocfs2_unblock_signals(&oldset);
197 if (ret2 < 0)
198 mlog_errno(ret2);
199 if (ret) 169 if (ret)
200 ret = VM_FAULT_SIGBUS; 170 ret = VM_FAULT_SIGBUS;
201 return ret; 171 return ret;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 4cbb18f26c5f..f171b51a74f7 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -204,14 +204,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
204 inode->i_nlink = 2; 204 inode->i_nlink = 2;
205 else 205 else
206 inode->i_nlink = 1; 206 inode->i_nlink = 1;
207 inode->i_uid = current_fsuid(); 207 inode_init_owner(inode, dir, mode);
208 if (dir->i_mode & S_ISGID) {
209 inode->i_gid = dir->i_gid;
210 if (S_ISDIR(mode))
211 mode |= S_ISGID;
212 } else
213 inode->i_gid = current_fsgid();
214 inode->i_mode = mode;
215 dquot_initialize(inode); 208 dquot_initialize(inode);
216 return inode; 209 return inode;
217} 210}
@@ -239,6 +232,8 @@ static int ocfs2_mknod(struct inode *dir,
239 }; 232 };
240 int did_quota_inode = 0; 233 int did_quota_inode = 0;
241 struct ocfs2_dir_lookup_result lookup = { NULL, }; 234 struct ocfs2_dir_lookup_result lookup = { NULL, };
235 sigset_t oldset;
236 int did_block_signals = 0;
242 237
243 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, 238 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
244 (unsigned long)dev, dentry->d_name.len, 239 (unsigned long)dev, dentry->d_name.len,
@@ -350,6 +345,10 @@ static int ocfs2_mknod(struct inode *dir,
350 goto leave; 345 goto leave;
351 } 346 }
352 347
348 /* Starting to change things, restart is no longer possible. */
349 ocfs2_block_signals(&oldset);
350 did_block_signals = 1;
351
353 status = dquot_alloc_inode(inode); 352 status = dquot_alloc_inode(inode);
354 if (status) 353 if (status)
355 goto leave; 354 goto leave;
@@ -384,11 +383,7 @@ static int ocfs2_mknod(struct inode *dir,
384 goto leave; 383 goto leave;
385 } 384 }
386 ocfs2_add_links_count(dirfe, 1); 385 ocfs2_add_links_count(dirfe, 1);
387 status = ocfs2_journal_dirty(handle, parent_fe_bh); 386 ocfs2_journal_dirty(handle, parent_fe_bh);
388 if (status < 0) {
389 mlog_errno(status);
390 goto leave;
391 }
392 inc_nlink(dir); 387 inc_nlink(dir);
393 } 388 }
394 389
@@ -439,6 +434,8 @@ leave:
439 ocfs2_commit_trans(osb, handle); 434 ocfs2_commit_trans(osb, handle);
440 435
441 ocfs2_inode_unlock(dir, 1); 436 ocfs2_inode_unlock(dir, 1);
437 if (did_block_signals)
438 ocfs2_unblock_signals(&oldset);
442 439
443 if (status == -ENOSPC) 440 if (status == -ENOSPC)
444 mlog(0, "Disk is full\n"); 441 mlog(0, "Disk is full\n");
@@ -487,14 +484,15 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
487 int status = 0; 484 int status = 0;
488 struct ocfs2_dinode *fe = NULL; 485 struct ocfs2_dinode *fe = NULL;
489 struct ocfs2_extent_list *fel; 486 struct ocfs2_extent_list *fel;
490 u64 fe_blkno = 0; 487 u64 suballoc_loc, fe_blkno = 0;
491 u16 suballoc_bit; 488 u16 suballoc_bit;
492 u16 feat; 489 u16 feat;
493 490
494 *new_fe_bh = NULL; 491 *new_fe_bh = NULL;
495 492
496 status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh, 493 status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
497 inode_ac, &suballoc_bit, &fe_blkno); 494 inode_ac, &suballoc_loc,
495 &suballoc_bit, &fe_blkno);
498 if (status < 0) { 496 if (status < 0) {
499 mlog_errno(status); 497 mlog_errno(status);
500 goto leave; 498 goto leave;
@@ -531,6 +529,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
531 fe->i_generation = cpu_to_le32(inode->i_generation); 529 fe->i_generation = cpu_to_le32(inode->i_generation);
532 fe->i_fs_generation = cpu_to_le32(osb->fs_generation); 530 fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
533 fe->i_blkno = cpu_to_le64(fe_blkno); 531 fe->i_blkno = cpu_to_le64(fe_blkno);
532 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
534 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 533 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
535 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); 534 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
536 fe->i_uid = cpu_to_le32(inode->i_uid); 535 fe->i_uid = cpu_to_le32(inode->i_uid);
@@ -567,11 +566,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
567 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb)); 566 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
568 } 567 }
569 568
570 status = ocfs2_journal_dirty(handle, *new_fe_bh); 569 ocfs2_journal_dirty(handle, *new_fe_bh);
571 if (status < 0) {
572 mlog_errno(status);
573 goto leave;
574 }
575 570
576 ocfs2_populate_inode(inode, fe, 1); 571 ocfs2_populate_inode(inode, fe, 1);
577 ocfs2_ci_set_new(osb, INODE_CACHE(inode)); 572 ocfs2_ci_set_new(osb, INODE_CACHE(inode));
@@ -637,6 +632,7 @@ static int ocfs2_link(struct dentry *old_dentry,
637 struct ocfs2_dinode *fe = NULL; 632 struct ocfs2_dinode *fe = NULL;
638 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 633 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
639 struct ocfs2_dir_lookup_result lookup = { NULL, }; 634 struct ocfs2_dir_lookup_result lookup = { NULL, };
635 sigset_t oldset;
640 636
641 mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, 637 mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
642 old_dentry->d_name.len, old_dentry->d_name.name, 638 old_dentry->d_name.len, old_dentry->d_name.name,
@@ -693,6 +689,9 @@ static int ocfs2_link(struct dentry *old_dentry,
693 goto out_unlock_inode; 689 goto out_unlock_inode;
694 } 690 }
695 691
692 /* Starting to change things, restart is no longer possible. */
693 ocfs2_block_signals(&oldset);
694
696 err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh, 695 err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
697 OCFS2_JOURNAL_ACCESS_WRITE); 696 OCFS2_JOURNAL_ACCESS_WRITE);
698 if (err < 0) { 697 if (err < 0) {
@@ -705,14 +704,7 @@ static int ocfs2_link(struct dentry *old_dentry,
705 ocfs2_set_links_count(fe, inode->i_nlink); 704 ocfs2_set_links_count(fe, inode->i_nlink);
706 fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 705 fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
707 fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 706 fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
708 707 ocfs2_journal_dirty(handle, fe_bh);
709 err = ocfs2_journal_dirty(handle, fe_bh);
710 if (err < 0) {
711 ocfs2_add_links_count(fe, -1);
712 drop_nlink(inode);
713 mlog_errno(err);
714 goto out_commit;
715 }
716 708
717 err = ocfs2_add_entry(handle, dentry, inode, 709 err = ocfs2_add_entry(handle, dentry, inode,
718 OCFS2_I(inode)->ip_blkno, 710 OCFS2_I(inode)->ip_blkno,
@@ -736,6 +728,7 @@ static int ocfs2_link(struct dentry *old_dentry,
736 728
737out_commit: 729out_commit:
738 ocfs2_commit_trans(osb, handle); 730 ocfs2_commit_trans(osb, handle);
731 ocfs2_unblock_signals(&oldset);
739out_unlock_inode: 732out_unlock_inode:
740 ocfs2_inode_unlock(inode, 1); 733 ocfs2_inode_unlock(inode, 1);
741 734
@@ -909,12 +902,7 @@ static int ocfs2_unlink(struct inode *dir,
909 drop_nlink(inode); 902 drop_nlink(inode);
910 drop_nlink(inode); 903 drop_nlink(inode);
911 ocfs2_set_links_count(fe, inode->i_nlink); 904 ocfs2_set_links_count(fe, inode->i_nlink);
912 905 ocfs2_journal_dirty(handle, fe_bh);
913 status = ocfs2_journal_dirty(handle, fe_bh);
914 if (status < 0) {
915 mlog_errno(status);
916 goto leave;
917 }
918 906
919 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 907 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
920 if (S_ISDIR(inode->i_mode)) 908 if (S_ISDIR(inode->i_mode))
@@ -1332,12 +1320,7 @@ static int ocfs2_rename(struct inode *old_dir,
1332 ocfs2_set_links_count(newfe, 0); 1320 ocfs2_set_links_count(newfe, 0);
1333 else 1321 else
1334 ocfs2_add_links_count(newfe, -1); 1322 ocfs2_add_links_count(newfe, -1);
1335 1323 ocfs2_journal_dirty(handle, newfe_bh);
1336 status = ocfs2_journal_dirty(handle, newfe_bh);
1337 if (status < 0) {
1338 mlog_errno(status);
1339 goto bail;
1340 }
1341 } else { 1324 } else {
1342 /* if the name was not found in new_dir, add it now */ 1325 /* if the name was not found in new_dir, add it now */
1343 status = ocfs2_add_entry(handle, new_dentry, old_inode, 1326 status = ocfs2_add_entry(handle, new_dentry, old_inode,
@@ -1356,10 +1339,7 @@ static int ocfs2_rename(struct inode *old_dir,
1356 1339
1357 old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec); 1340 old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
1358 old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec); 1341 old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
1359 1342 ocfs2_journal_dirty(handle, old_inode_bh);
1360 status = ocfs2_journal_dirty(handle, old_inode_bh);
1361 if (status < 0)
1362 mlog_errno(status);
1363 } else 1343 } else
1364 mlog_errno(status); 1344 mlog_errno(status);
1365 1345
@@ -1431,7 +1411,7 @@ static int ocfs2_rename(struct inode *old_dir,
1431 OCFS2_JOURNAL_ACCESS_WRITE); 1411 OCFS2_JOURNAL_ACCESS_WRITE);
1432 fe = (struct ocfs2_dinode *) old_dir_bh->b_data; 1412 fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1433 ocfs2_set_links_count(fe, old_dir->i_nlink); 1413 ocfs2_set_links_count(fe, old_dir->i_nlink);
1434 status = ocfs2_journal_dirty(handle, old_dir_bh); 1414 ocfs2_journal_dirty(handle, old_dir_bh);
1435 } 1415 }
1436 } 1416 }
1437 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir); 1417 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
@@ -1563,11 +1543,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1563 (bytes_left > sb->s_blocksize) ? sb->s_blocksize : 1543 (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
1564 bytes_left); 1544 bytes_left);
1565 1545
1566 status = ocfs2_journal_dirty(handle, bhs[virtual]); 1546 ocfs2_journal_dirty(handle, bhs[virtual]);
1567 if (status < 0) {
1568 mlog_errno(status);
1569 goto bail;
1570 }
1571 1547
1572 virtual++; 1548 virtual++;
1573 p_blkno++; 1549 p_blkno++;
@@ -1611,6 +1587,8 @@ static int ocfs2_symlink(struct inode *dir,
1611 }; 1587 };
1612 int did_quota = 0, did_quota_inode = 0; 1588 int did_quota = 0, did_quota_inode = 0;
1613 struct ocfs2_dir_lookup_result lookup = { NULL, }; 1589 struct ocfs2_dir_lookup_result lookup = { NULL, };
1590 sigset_t oldset;
1591 int did_block_signals = 0;
1614 1592
1615 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, 1593 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1616 dentry, symname, dentry->d_name.len, dentry->d_name.name); 1594 dentry, symname, dentry->d_name.len, dentry->d_name.name);
@@ -1706,6 +1684,10 @@ static int ocfs2_symlink(struct inode *dir,
1706 goto bail; 1684 goto bail;
1707 } 1685 }
1708 1686
1687 /* Starting to change things, restart is no longer possible. */
1688 ocfs2_block_signals(&oldset);
1689 did_block_signals = 1;
1690
1709 status = dquot_alloc_inode(inode); 1691 status = dquot_alloc_inode(inode);
1710 if (status) 1692 if (status)
1711 goto bail; 1693 goto bail;
@@ -1814,6 +1796,8 @@ bail:
1814 ocfs2_commit_trans(osb, handle); 1796 ocfs2_commit_trans(osb, handle);
1815 1797
1816 ocfs2_inode_unlock(dir, 1); 1798 ocfs2_inode_unlock(dir, 1);
1799 if (did_block_signals)
1800 ocfs2_unblock_signals(&oldset);
1817 1801
1818 brelse(new_fe_bh); 1802 brelse(new_fe_bh);
1819 brelse(parent_fe_bh); 1803 brelse(parent_fe_bh);
@@ -1961,12 +1945,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1961 if (S_ISDIR(inode->i_mode)) 1945 if (S_ISDIR(inode->i_mode))
1962 ocfs2_add_links_count(orphan_fe, 1); 1946 ocfs2_add_links_count(orphan_fe, 1);
1963 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 1947 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
1964 1948 ocfs2_journal_dirty(handle, orphan_dir_bh);
1965 status = ocfs2_journal_dirty(handle, orphan_dir_bh);
1966 if (status < 0) {
1967 mlog_errno(status);
1968 goto leave;
1969 }
1970 1949
1971 status = __ocfs2_add_entry(handle, orphan_dir_inode, name, 1950 status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
1972 OCFS2_ORPHAN_NAMELEN, inode, 1951 OCFS2_ORPHAN_NAMELEN, inode,
@@ -2065,12 +2044,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2065 if (S_ISDIR(inode->i_mode)) 2044 if (S_ISDIR(inode->i_mode))
2066 ocfs2_add_links_count(orphan_fe, -1); 2045 ocfs2_add_links_count(orphan_fe, -1);
2067 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 2046 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
2068 2047 ocfs2_journal_dirty(handle, orphan_dir_bh);
2069 status = ocfs2_journal_dirty(handle, orphan_dir_bh);
2070 if (status < 0) {
2071 mlog_errno(status);
2072 goto leave;
2073 }
2074 2048
2075leave: 2049leave:
2076 ocfs2_free_dir_lookup_result(&lookup); 2050 ocfs2_free_dir_lookup_result(&lookup);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index adf5e2ebc2c4..c67003b6b5a2 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -47,6 +47,7 @@
47/* For struct ocfs2_blockcheck_stats */ 47/* For struct ocfs2_blockcheck_stats */
48#include "blockcheck.h" 48#include "blockcheck.h"
49 49
50#include "reservations.h"
50 51
51/* Caching of metadata buffers */ 52/* Caching of metadata buffers */
52 53
@@ -341,6 +342,9 @@ struct ocfs2_super
341 */ 342 */
342 unsigned int local_alloc_bits; 343 unsigned int local_alloc_bits;
343 unsigned int local_alloc_default_bits; 344 unsigned int local_alloc_default_bits;
345 /* osb_clusters_at_boot can become stale! Do not trust it to
346 * be up to date. */
347 unsigned int osb_clusters_at_boot;
344 348
345 enum ocfs2_local_alloc_state local_alloc_state; /* protected 349 enum ocfs2_local_alloc_state local_alloc_state; /* protected
346 * by osb_lock */ 350 * by osb_lock */
@@ -349,6 +353,11 @@ struct ocfs2_super
349 353
350 u64 la_last_gd; 354 u64 la_last_gd;
351 355
356 struct ocfs2_reservation_map osb_la_resmap;
357
358 unsigned int osb_resv_level;
359 unsigned int osb_dir_resv_level;
360
352 /* Next three fields are for local node slot recovery during 361 /* Next three fields are for local node slot recovery during
353 * mount. */ 362 * mount. */
354 int dirty; 363 int dirty;
@@ -482,6 +491,13 @@ static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
482 return 0; 491 return 0;
483} 492}
484 493
494static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb)
495{
496 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
497 return 1;
498 return 0;
499}
500
485static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb) 501static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
486{ 502{
487 if (ocfs2_supports_indexed_dirs(osb)) 503 if (ocfs2_supports_indexed_dirs(osb))
@@ -763,6 +779,12 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
763 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); 779 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
764} 780}
765 781
782static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb,
783 unsigned int clusters)
784{
785 return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits);
786}
787
766static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) 788static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
767{ 789{
768 ext2_set_bit(bit, bitmap); 790 ext2_set_bit(bit, bitmap);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index bb37218a7978..33f1c9a8258d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -100,7 +100,8 @@
100 | OCFS2_FEATURE_INCOMPAT_XATTR \ 100 | OCFS2_FEATURE_INCOMPAT_XATTR \
101 | OCFS2_FEATURE_INCOMPAT_META_ECC \ 101 | OCFS2_FEATURE_INCOMPAT_META_ECC \
102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ 102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) 103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
104#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 105#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
105 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 106 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
106 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) 107 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
@@ -165,6 +166,9 @@
165/* Refcount tree support */ 166/* Refcount tree support */
166#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000 167#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000
167 168
169/* Discontigous block groups */
170#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000
171
168/* 172/*
169 * backup superblock flag is used to indicate that this volume 173 * backup superblock flag is used to indicate that this volume
170 * has backup superblocks. 174 * has backup superblocks.
@@ -283,14 +287,6 @@
283#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) 287#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
284 288
285/* 289/*
286 * Default local alloc size (in megabytes)
287 *
288 * The value chosen should be such that most allocations, including new
289 * block groups, use local alloc.
290 */
291#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8
292
293/*
294 * Inline extended attribute size (in bytes) 290 * Inline extended attribute size (in bytes)
295 * The value chosen should be aligned to 16 byte boundaries. 291 * The value chosen should be aligned to 16 byte boundaries.
296 */ 292 */
@@ -512,7 +508,10 @@ struct ocfs2_extent_block
512 block group */ 508 block group */
513 __le32 h_fs_generation; /* Must match super block */ 509 __le32 h_fs_generation; /* Must match super block */
514 __le64 h_blkno; /* Offset on disk, in blocks */ 510 __le64 h_blkno; /* Offset on disk, in blocks */
515/*20*/ __le64 h_reserved3; 511/*20*/ __le64 h_suballoc_loc; /* Suballocator block group this
512 eb belongs to. Only valid
513 if allocated from a
514 discontiguous block group */
516 __le64 h_next_leaf_blk; /* Offset on disk, in blocks, 515 __le64 h_next_leaf_blk; /* Offset on disk, in blocks,
517 of next leaf header pointing 516 of next leaf header pointing
518 to data */ 517 to data */
@@ -679,7 +678,11 @@ struct ocfs2_dinode {
679/*80*/ struct ocfs2_block_check i_check; /* Error checking */ 678/*80*/ struct ocfs2_block_check i_check; /* Error checking */
680/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */ 679/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */
681/*90*/ __le64 i_refcount_loc; 680/*90*/ __le64 i_refcount_loc;
682 __le64 i_reserved2[4]; 681 __le64 i_suballoc_loc; /* Suballocator block group this
682 inode belongs to. Only valid
683 if allocated from a
684 discontiguous block group */
685/*A0*/ __le64 i_reserved2[3];
683/*B8*/ union { 686/*B8*/ union {
684 __le64 i_pad1; /* Generic way to refer to this 687 __le64 i_pad1; /* Generic way to refer to this
685 64bit union */ 688 64bit union */
@@ -814,7 +817,12 @@ struct ocfs2_dx_root_block {
814 __le32 dr_reserved2; 817 __le32 dr_reserved2;
815 __le64 dr_free_blk; /* Pointer to head of free 818 __le64 dr_free_blk; /* Pointer to head of free
816 * unindexed block list. */ 819 * unindexed block list. */
817 __le64 dr_reserved3[15]; 820 __le64 dr_suballoc_loc; /* Suballocator block group
821 this root belongs to.
822 Only valid if allocated
823 from a discontiguous
824 block group */
825 __le64 dr_reserved3[14];
818 union { 826 union {
819 struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 827 struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
820 * bits for maximum space 828 * bits for maximum space
@@ -840,6 +848,13 @@ struct ocfs2_dx_leaf {
840}; 848};
841 849
842/* 850/*
851 * Largest bitmap for a block (suballocator) group in bytes. This limit
852 * does not affect cluster groups (global allocator). Cluster group
853 * bitmaps run to the end of the block.
854 */
855#define OCFS2_MAX_BG_BITMAP_SIZE 256
856
857/*
843 * On disk allocator group structure for OCFS2 858 * On disk allocator group structure for OCFS2
844 */ 859 */
845struct ocfs2_group_desc 860struct ocfs2_group_desc
@@ -860,7 +875,29 @@ struct ocfs2_group_desc
860 __le64 bg_blkno; /* Offset on disk, in blocks */ 875 __le64 bg_blkno; /* Offset on disk, in blocks */
861/*30*/ struct ocfs2_block_check bg_check; /* Error checking */ 876/*30*/ struct ocfs2_block_check bg_check; /* Error checking */
862 __le64 bg_reserved2; 877 __le64 bg_reserved2;
863/*40*/ __u8 bg_bitmap[0]; 878/*40*/ union {
879 __u8 bg_bitmap[0];
880 struct {
881 /*
882 * Block groups may be discontiguous when
883 * OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG is set.
884 * The extents of a discontigous block group are
885 * stored in bg_list. It is a flat list.
886 * l_tree_depth must always be zero. A
887 * discontiguous group is signified by a non-zero
888 * bg_list->l_next_free_rec. Only block groups
889 * can be discontiguous; Cluster groups cannot.
890 * We've never made a block group with more than
891 * 2048 blocks (256 bytes of bg_bitmap). This
892 * codifies that limit so that we can fit bg_list.
893 * bg_size of a discontiguous block group will
894 * be 256 to match bg_bitmap_filler.
895 */
896 __u8 bg_bitmap_filler[OCFS2_MAX_BG_BITMAP_SIZE];
897/*140*/ struct ocfs2_extent_list bg_list;
898 };
899 };
900/* Actual on-disk size is one block */
864}; 901};
865 902
866struct ocfs2_refcount_rec { 903struct ocfs2_refcount_rec {
@@ -905,7 +942,11 @@ struct ocfs2_refcount_block {
905/*40*/ __le32 rf_generation; /* generation number. all be the same 942/*40*/ __le32 rf_generation; /* generation number. all be the same
906 * for the same refcount tree. */ 943 * for the same refcount tree. */
907 __le32 rf_reserved0; 944 __le32 rf_reserved0;
908 __le64 rf_reserved1[7]; 945 __le64 rf_suballoc_loc; /* Suballocator block group this
946 refcount block belongs to. Only
947 valid if allocated from a
948 discontiguous block group */
949/*50*/ __le64 rf_reserved1[6];
909/*80*/ union { 950/*80*/ union {
910 struct ocfs2_refcount_list rf_records; /* List of refcount 951 struct ocfs2_refcount_list rf_records; /* List of refcount
911 records */ 952 records */
@@ -1017,7 +1058,10 @@ struct ocfs2_xattr_block {
1017 real xattr or a xattr tree. */ 1058 real xattr or a xattr tree. */
1018 __le16 xb_reserved0; 1059 __le16 xb_reserved0;
1019 __le32 xb_reserved1; 1060 __le32 xb_reserved1;
1020 __le64 xb_reserved2; 1061 __le64 xb_suballoc_loc; /* Suballocator block group this
1062 xattr block belongs to. Only
1063 valid if allocated from a
1064 discontiguous block group */
1021/*30*/ union { 1065/*30*/ union {
1022 struct ocfs2_xattr_header xb_header; /* xattr header if this 1066 struct ocfs2_xattr_header xb_header; /* xattr header if this
1023 block contains xattr */ 1067 block contains xattr */
@@ -1254,6 +1298,16 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
1254 return size / sizeof(struct ocfs2_extent_rec); 1298 return size / sizeof(struct ocfs2_extent_rec);
1255} 1299}
1256 1300
1301static inline u16 ocfs2_extent_recs_per_gd(struct super_block *sb)
1302{
1303 int size;
1304
1305 size = sb->s_blocksize -
1306 offsetof(struct ocfs2_group_desc, bg_list.l_recs);
1307
1308 return size / sizeof(struct ocfs2_extent_rec);
1309}
1310
1257static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb) 1311static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
1258{ 1312{
1259 int size; 1313 int size;
@@ -1284,13 +1338,23 @@ static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
1284 return size; 1338 return size;
1285} 1339}
1286 1340
1287static inline int ocfs2_group_bitmap_size(struct super_block *sb) 1341static inline int ocfs2_group_bitmap_size(struct super_block *sb,
1342 int suballocator,
1343 u32 feature_incompat)
1288{ 1344{
1289 int size; 1345 int size = sb->s_blocksize -
1290
1291 size = sb->s_blocksize -
1292 offsetof(struct ocfs2_group_desc, bg_bitmap); 1346 offsetof(struct ocfs2_group_desc, bg_bitmap);
1293 1347
1348 /*
1349 * The cluster allocator uses the entire block. Suballocators have
1350 * never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older
1351 * code expects bg_size set to the maximum. Thus we must keep
1352 * bg_size as-is unless discontig_bg is enabled.
1353 */
1354 if (suballocator &&
1355 (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
1356 size = OCFS2_MAX_BG_BITMAP_SIZE;
1357
1294 return size; 1358 return size;
1295} 1359}
1296 1360
@@ -1402,23 +1466,43 @@ static inline int ocfs2_extent_recs_per_eb(int blocksize)
1402 return size / sizeof(struct ocfs2_extent_rec); 1466 return size / sizeof(struct ocfs2_extent_rec);
1403} 1467}
1404 1468
1405static inline int ocfs2_local_alloc_size(int blocksize) 1469static inline int ocfs2_extent_recs_per_gd(int blocksize)
1406{ 1470{
1407 int size; 1471 int size;
1408 1472
1409 size = blocksize - 1473 size = blocksize -
1410 offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap); 1474 offsetof(struct ocfs2_group_desc, bg_list.l_recs);
1411 1475
1412 return size; 1476 return size / sizeof(struct ocfs2_extent_rec);
1413} 1477}
1414 1478
1415static inline int ocfs2_group_bitmap_size(int blocksize) 1479static inline int ocfs2_local_alloc_size(int blocksize)
1416{ 1480{
1417 int size; 1481 int size;
1418 1482
1419 size = blocksize - 1483 size = blocksize -
1484 offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap);
1485
1486 return size;
1487}
1488
1489static inline int ocfs2_group_bitmap_size(int blocksize,
1490 int suballocator,
1491 uint32_t feature_incompat)
1492{
1493 int size = sb->s_blocksize -
1420 offsetof(struct ocfs2_group_desc, bg_bitmap); 1494 offsetof(struct ocfs2_group_desc, bg_bitmap);
1421 1495
1496 /*
1497 * The cluster allocator uses the entire block. Suballocators have
1498 * never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older
1499 * code expects bg_size set to the maximum. Thus we must keep
1500 * bg_size as-is unless discontig_bg is enabled.
1501 */
1502 if (suballocator &&
1503 (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
1504 size = OCFS2_MAX_BG_BITMAP_SIZE;
1505
1422 return size; 1506 return size;
1423} 1507}
1424 1508
@@ -1491,5 +1575,19 @@ static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de,
1491 de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 1575 de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
1492} 1576}
1493 1577
1578static inline int ocfs2_gd_is_discontig(struct ocfs2_group_desc *gd)
1579{
1580 if ((offsetof(struct ocfs2_group_desc, bg_bitmap) +
1581 le16_to_cpu(gd->bg_size)) !=
1582 offsetof(struct ocfs2_group_desc, bg_list))
1583 return 0;
1584 /*
1585 * Only valid to check l_next_free_rec if
1586 * bg_bitmap + bg_size == bg_list.
1587 */
1588 if (!gd->bg_list.l_next_free_rec)
1589 return 0;
1590 return 1;
1591}
1494#endif /* _OCFS2_FS_H */ 1592#endif /* _OCFS2_FS_H */
1495 1593
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 123bc520a2c0..196fcb52d95d 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -23,6 +23,7 @@
23struct ocfs2_dquot { 23struct ocfs2_dquot {
24 struct dquot dq_dquot; /* Generic VFS dquot */ 24 struct dquot dq_dquot; /* Generic VFS dquot */
25 loff_t dq_local_off; /* Offset in the local quota file */ 25 loff_t dq_local_off; /* Offset in the local quota file */
26 u64 dq_local_phys_blk; /* Physical block carrying quota structure */
26 struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */ 27 struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */
27 unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ 28 unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */
28 s64 dq_origspace; /* Last globally synced space usage */ 29 s64 dq_origspace; /* Last globally synced space usage */
@@ -51,8 +52,9 @@ struct ocfs2_mem_dqinfo {
51 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ 52 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
52 struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */ 53 struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */
53 int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */ 54 int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */
55 u64 dqi_giblk; /* Number of block with global information header */
54 struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */ 56 struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */
55 struct buffer_head *dqi_ibh; /* Buffer with information header */ 57 struct buffer_head *dqi_libh; /* Buffer with local information header */
56 struct qtree_mem_dqinfo dqi_gi; /* Info about global file */ 58 struct qtree_mem_dqinfo dqi_gi; /* Info about global file */
57 struct delayed_work dqi_sync_work; /* Work for syncing dquots */ 59 struct delayed_work dqi_sync_work; /* Work for syncing dquots */
58 struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery 60 struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery
@@ -102,8 +104,12 @@ static inline int ocfs2_global_release_dquot(struct dquot *dquot)
102 104
103int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); 105int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
104void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); 106void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
105int ocfs2_read_quota_block(struct inode *inode, u64 v_block, 107int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh);
106 struct buffer_head **bh); 108int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block,
109 struct buffer_head **bh);
110int ocfs2_create_local_dquot(struct dquot *dquot);
111int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot);
112int ocfs2_local_write_dquot(struct dquot *dquot);
107 113
108extern const struct dquot_operations ocfs2_quota_operations; 114extern const struct dquot_operations ocfs2_quota_operations;
109extern struct quota_format_type ocfs2_quota_format; 115extern struct quota_format_type ocfs2_quota_format;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index ab42a74c7539..2bb35fe00511 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -25,8 +25,44 @@
25#include "dlmglue.h" 25#include "dlmglue.h"
26#include "uptodate.h" 26#include "uptodate.h"
27#include "super.h" 27#include "super.h"
28#include "buffer_head_io.h"
28#include "quota.h" 29#include "quota.h"
29 30
31/*
32 * Locking of quotas with OCFS2 is rather complex. Here are rules that
33 * should be obeyed by all the functions:
34 * - any write of quota structure (either to local or global file) is protected
35 * by dqio_mutex or dquot->dq_lock.
36 * - any modification of global quota file holds inode cluster lock, i_mutex,
37 * and ip_alloc_sem of the global quota file (achieved by
38 * ocfs2_lock_global_qf). It also has to hold qinfo_lock.
39 * - an allocation of new blocks for local quota file is protected by
40 * its ip_alloc_sem
41 *
42 * A rough sketch of locking dependencies (lf = local file, gf = global file):
43 * Normal filesystem operation:
44 * start_trans -> dqio_mutex -> write to lf
45 * Syncing of local and global file:
46 * ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock ->
47 * write to gf
48 * -> write to lf
49 * Acquire dquot for the first time:
50 * dq_lock -> ocfs2_lock_global_qf -> qinfo_lock -> read from gf
51 * -> alloc space for gf
52 * -> start_trans -> qinfo_lock -> write to gf
53 * -> ip_alloc_sem of lf -> alloc space for lf
54 * -> write to lf
55 * Release last reference to dquot:
56 * dq_lock -> ocfs2_lock_global_qf -> start_trans -> qinfo_lock -> write to gf
57 * -> write to lf
58 * Note that all the above operations also hold the inode cluster lock of lf.
59 * Recovery:
60 * inode cluster lock of recovered lf
61 * -> read bitmaps -> ip_alloc_sem of lf
62 * -> ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock ->
63 * write to gf
64 */
65
30static struct workqueue_struct *ocfs2_quota_wq = NULL; 66static struct workqueue_struct *ocfs2_quota_wq = NULL;
31 67
32static void qsync_work_fn(struct work_struct *work); 68static void qsync_work_fn(struct work_struct *work);
@@ -91,8 +127,7 @@ struct qtree_fmt_operations ocfs2_global_ops = {
91 .is_id = ocfs2_global_is_id, 127 .is_id = ocfs2_global_is_id,
92}; 128};
93 129
94static int ocfs2_validate_quota_block(struct super_block *sb, 130int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh)
95 struct buffer_head *bh)
96{ 131{
97 struct ocfs2_disk_dqtrailer *dqt = 132 struct ocfs2_disk_dqtrailer *dqt =
98 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); 133 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data);
@@ -110,54 +145,19 @@ static int ocfs2_validate_quota_block(struct super_block *sb,
110 return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check); 145 return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check);
111} 146}
112 147
113int ocfs2_read_quota_block(struct inode *inode, u64 v_block, 148int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block,
114 struct buffer_head **bh) 149 struct buffer_head **bhp)
115{ 150{
116 int rc = 0; 151 int rc;
117 struct buffer_head *tmp = *bh; 152
118 153 *bhp = NULL;
119 if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { 154 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, 1, bhp, 0,
120 ocfs2_error(inode->i_sb, 155 ocfs2_validate_quota_block);
121 "Quota file %llu is probably corrupted! Requested "
122 "to read block %Lu but file has size only %Lu\n",
123 (unsigned long long)OCFS2_I(inode)->ip_blkno,
124 (unsigned long long)v_block,
125 (unsigned long long)i_size_read(inode));
126 return -EIO;
127 }
128 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
129 ocfs2_validate_quota_block);
130 if (rc) 156 if (rc)
131 mlog_errno(rc); 157 mlog_errno(rc);
132
133 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
134 if (!rc && !*bh)
135 *bh = tmp;
136
137 return rc; 158 return rc;
138} 159}
139 160
140static int ocfs2_get_quota_block(struct inode *inode, int block,
141 struct buffer_head **bh)
142{
143 u64 pblock, pcount;
144 int err;
145
146 down_read(&OCFS2_I(inode)->ip_alloc_sem);
147 err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL);
148 up_read(&OCFS2_I(inode)->ip_alloc_sem);
149 if (err) {
150 mlog_errno(err);
151 return err;
152 }
153 *bh = sb_getblk(inode->i_sb, pblock);
154 if (!*bh) {
155 err = -EIO;
156 mlog_errno(err);
157 }
158 return err;
159}
160
161/* Read data from global quotafile - avoid pagecache and such because we cannot 161/* Read data from global quotafile - avoid pagecache and such because we cannot
162 * afford acquiring the locks... We use quota cluster lock to serialize 162 * afford acquiring the locks... We use quota cluster lock to serialize
163 * operations. Caller is responsible for acquiring it. */ 163 * operations. Caller is responsible for acquiring it. */
@@ -172,6 +172,7 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
172 int err = 0; 172 int err = 0;
173 struct buffer_head *bh; 173 struct buffer_head *bh;
174 size_t toread, tocopy; 174 size_t toread, tocopy;
175 u64 pblock = 0, pcount = 0;
175 176
176 if (off > i_size) 177 if (off > i_size)
177 return 0; 178 return 0;
@@ -180,8 +181,19 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
180 toread = len; 181 toread = len;
181 while (toread > 0) { 182 while (toread > 0) {
182 tocopy = min_t(size_t, (sb->s_blocksize - offset), toread); 183 tocopy = min_t(size_t, (sb->s_blocksize - offset), toread);
184 if (!pcount) {
185 err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock,
186 &pcount, NULL);
187 if (err) {
188 mlog_errno(err);
189 return err;
190 }
191 } else {
192 pcount--;
193 pblock++;
194 }
183 bh = NULL; 195 bh = NULL;
184 err = ocfs2_read_quota_block(gqinode, blk, &bh); 196 err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh);
185 if (err) { 197 if (err) {
186 mlog_errno(err); 198 mlog_errno(err);
187 return err; 199 return err;
@@ -209,6 +221,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
209 int err = 0, new = 0, ja_type; 221 int err = 0, new = 0, ja_type;
210 struct buffer_head *bh = NULL; 222 struct buffer_head *bh = NULL;
211 handle_t *handle = journal_current_handle(); 223 handle_t *handle = journal_current_handle();
224 u64 pblock, pcount;
212 225
213 if (!handle) { 226 if (!handle) {
214 mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled " 227 mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled "
@@ -221,12 +234,11 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
221 len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; 234 len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset;
222 } 235 }
223 236
224 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
225 if (gqinode->i_size < off + len) { 237 if (gqinode->i_size < off + len) {
226 loff_t rounded_end = 238 loff_t rounded_end =
227 ocfs2_align_bytes_to_blocks(sb, off + len); 239 ocfs2_align_bytes_to_blocks(sb, off + len);
228 240
229 /* Space is already allocated in ocfs2_global_read_dquot() */ 241 /* Space is already allocated in ocfs2_acquire_dquot() */
230 err = ocfs2_simple_size_update(gqinode, 242 err = ocfs2_simple_size_update(gqinode,
231 oinfo->dqi_gqi_bh, 243 oinfo->dqi_gqi_bh,
232 rounded_end); 244 rounded_end);
@@ -234,13 +246,20 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 goto out; 246 goto out;
235 new = 1; 247 new = 1;
236 } 248 }
249 err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock, &pcount, NULL);
250 if (err) {
251 mlog_errno(err);
252 goto out;
253 }
237 /* Not rewriting whole block? */ 254 /* Not rewriting whole block? */
238 if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && 255 if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) &&
239 !new) { 256 !new) {
240 err = ocfs2_read_quota_block(gqinode, blk, &bh); 257 err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh);
241 ja_type = OCFS2_JOURNAL_ACCESS_WRITE; 258 ja_type = OCFS2_JOURNAL_ACCESS_WRITE;
242 } else { 259 } else {
243 err = ocfs2_get_quota_block(gqinode, blk, &bh); 260 bh = sb_getblk(sb, pblock);
261 if (!bh)
262 err = -ENOMEM;
244 ja_type = OCFS2_JOURNAL_ACCESS_CREATE; 263 ja_type = OCFS2_JOURNAL_ACCESS_CREATE;
245 } 264 }
246 if (err) { 265 if (err) {
@@ -261,19 +280,15 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
261 brelse(bh); 280 brelse(bh);
262 goto out; 281 goto out;
263 } 282 }
264 err = ocfs2_journal_dirty(handle, bh); 283 ocfs2_journal_dirty(handle, bh);
265 brelse(bh); 284 brelse(bh);
266 if (err < 0)
267 goto out;
268out: 285out:
269 if (err) { 286 if (err) {
270 mutex_unlock(&gqinode->i_mutex);
271 mlog_errno(err); 287 mlog_errno(err);
272 return err; 288 return err;
273 } 289 }
274 gqinode->i_version++; 290 gqinode->i_version++;
275 ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); 291 ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh);
276 mutex_unlock(&gqinode->i_mutex);
277 return len; 292 return len;
278} 293}
279 294
@@ -291,11 +306,23 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
291 else 306 else
292 WARN_ON(bh != oinfo->dqi_gqi_bh); 307 WARN_ON(bh != oinfo->dqi_gqi_bh);
293 spin_unlock(&dq_data_lock); 308 spin_unlock(&dq_data_lock);
309 if (ex) {
310 mutex_lock(&oinfo->dqi_gqinode->i_mutex);
311 down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
312 } else {
313 down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
314 }
294 return 0; 315 return 0;
295} 316}
296 317
297void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) 318void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
298{ 319{
320 if (ex) {
321 up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
322 mutex_unlock(&oinfo->dqi_gqinode->i_mutex);
323 } else {
324 up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
325 }
299 ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); 326 ocfs2_inode_unlock(oinfo->dqi_gqinode, ex);
300 brelse(oinfo->dqi_gqi_bh); 327 brelse(oinfo->dqi_gqi_bh);
301 spin_lock(&dq_data_lock); 328 spin_lock(&dq_data_lock);
@@ -313,6 +340,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
313 struct ocfs2_global_disk_dqinfo dinfo; 340 struct ocfs2_global_disk_dqinfo dinfo;
314 struct mem_dqinfo *info = sb_dqinfo(sb, type); 341 struct mem_dqinfo *info = sb_dqinfo(sb, type);
315 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; 342 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
343 u64 pcount;
316 int status; 344 int status;
317 345
318 mlog_entry_void(); 346 mlog_entry_void();
@@ -339,9 +367,19 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
339 mlog_errno(status); 367 mlog_errno(status);
340 goto out_err; 368 goto out_err;
341 } 369 }
370
371 status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk,
372 &pcount, NULL);
373 if (status < 0)
374 goto out_unlock;
375
376 status = ocfs2_qinfo_lock(oinfo, 0);
377 if (status < 0)
378 goto out_unlock;
342 status = sb->s_op->quota_read(sb, type, (char *)&dinfo, 379 status = sb->s_op->quota_read(sb, type, (char *)&dinfo,
343 sizeof(struct ocfs2_global_disk_dqinfo), 380 sizeof(struct ocfs2_global_disk_dqinfo),
344 OCFS2_GLOBAL_INFO_OFF); 381 OCFS2_GLOBAL_INFO_OFF);
382 ocfs2_qinfo_unlock(oinfo, 0);
345 ocfs2_unlock_global_qf(oinfo, 0); 383 ocfs2_unlock_global_qf(oinfo, 0);
346 if (status != sizeof(struct ocfs2_global_disk_dqinfo)) { 384 if (status != sizeof(struct ocfs2_global_disk_dqinfo)) {
347 mlog(ML_ERROR, "Cannot read global quota info (%d).\n", 385 mlog(ML_ERROR, "Cannot read global quota info (%d).\n",
@@ -368,6 +406,10 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
368out_err: 406out_err:
369 mlog_exit(status); 407 mlog_exit(status);
370 return status; 408 return status;
409out_unlock:
410 ocfs2_unlock_global_qf(oinfo, 0);
411 mlog_errno(status);
412 goto out_err;
371} 413}
372 414
373/* Write information to global quota file. Expects exlusive lock on quota 415/* Write information to global quota file. Expects exlusive lock on quota
@@ -426,78 +468,10 @@ static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
426 468
427static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type) 469static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
428{ 470{
429 /* We modify all the allocated blocks, tree root, and info block */ 471 /* We modify all the allocated blocks, tree root, info block and
472 * the inode */
430 return (ocfs2_global_qinit_alloc(sb, type) + 2) * 473 return (ocfs2_global_qinit_alloc(sb, type) + 2) *
431 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS; 474 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS + 1;
432}
433
434/* Read in information from global quota file and acquire a reference to it.
435 * dquot_acquire() has already started the transaction and locked quota file */
436int ocfs2_global_read_dquot(struct dquot *dquot)
437{
438 int err, err2, ex = 0;
439 struct super_block *sb = dquot->dq_sb;
440 int type = dquot->dq_type;
441 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
442 struct ocfs2_super *osb = OCFS2_SB(sb);
443 struct inode *gqinode = info->dqi_gqinode;
444 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
445 handle_t *handle = NULL;
446
447 err = ocfs2_qinfo_lock(info, 0);
448 if (err < 0)
449 goto out;
450 err = qtree_read_dquot(&info->dqi_gi, dquot);
451 if (err < 0)
452 goto out_qlock;
453 OCFS2_DQUOT(dquot)->dq_use_count++;
454 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
455 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
456 ocfs2_qinfo_unlock(info, 0);
457
458 if (!dquot->dq_off) { /* No real quota entry? */
459 ex = 1;
460 /*
461 * Add blocks to quota file before we start a transaction since
462 * locking allocators ranks above a transaction start
463 */
464 WARN_ON(journal_current_handle());
465 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
466 err = ocfs2_extend_no_holes(gqinode,
467 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
468 gqinode->i_size);
469 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
470 if (err < 0)
471 goto out;
472 }
473
474 handle = ocfs2_start_trans(osb,
475 ocfs2_calc_global_qinit_credits(sb, type));
476 if (IS_ERR(handle)) {
477 err = PTR_ERR(handle);
478 goto out;
479 }
480 err = ocfs2_qinfo_lock(info, ex);
481 if (err < 0)
482 goto out_trans;
483 err = qtree_write_dquot(&info->dqi_gi, dquot);
484 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
485 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
486 if (!err)
487 err = err2;
488 }
489out_qlock:
490 if (ex)
491 ocfs2_qinfo_unlock(info, 1);
492 else
493 ocfs2_qinfo_unlock(info, 0);
494out_trans:
495 if (handle)
496 ocfs2_commit_trans(osb, handle);
497out:
498 if (err < 0)
499 mlog_errno(err);
500 return err;
501} 475}
502 476
503/* Sync local information about quota modifications with global quota file. 477/* Sync local information about quota modifications with global quota file.
@@ -638,14 +612,13 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
638 } 612 }
639 mutex_lock(&sb_dqopt(sb)->dqio_mutex); 613 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
640 status = ocfs2_sync_dquot(dquot); 614 status = ocfs2_sync_dquot(dquot);
641 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
642 if (status < 0) 615 if (status < 0)
643 mlog_errno(status); 616 mlog_errno(status);
644 /* We have to write local structure as well... */ 617 /* We have to write local structure as well... */
645 dquot_mark_dquot_dirty(dquot); 618 status = ocfs2_local_write_dquot(dquot);
646 status = dquot_commit(dquot);
647 if (status < 0) 619 if (status < 0)
648 mlog_errno(status); 620 mlog_errno(status);
621 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
649 ocfs2_commit_trans(osb, handle); 622 ocfs2_commit_trans(osb, handle);
650out_ilock: 623out_ilock:
651 ocfs2_unlock_global_qf(oinfo, 1); 624 ocfs2_unlock_global_qf(oinfo, 1);
@@ -684,7 +657,9 @@ static int ocfs2_write_dquot(struct dquot *dquot)
684 mlog_errno(status); 657 mlog_errno(status);
685 goto out; 658 goto out;
686 } 659 }
687 status = dquot_commit(dquot); 660 mutex_lock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
661 status = ocfs2_local_write_dquot(dquot);
662 mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
688 ocfs2_commit_trans(osb, handle); 663 ocfs2_commit_trans(osb, handle);
689out: 664out:
690 mlog_exit(status); 665 mlog_exit(status);
@@ -715,6 +690,10 @@ static int ocfs2_release_dquot(struct dquot *dquot)
715 690
716 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 691 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
717 692
693 mutex_lock(&dquot->dq_lock);
694 /* Check whether we are not racing with some other dqget() */
695 if (atomic_read(&dquot->dq_count) > 1)
696 goto out;
718 status = ocfs2_lock_global_qf(oinfo, 1); 697 status = ocfs2_lock_global_qf(oinfo, 1);
719 if (status < 0) 698 if (status < 0)
720 goto out; 699 goto out;
@@ -725,30 +704,113 @@ static int ocfs2_release_dquot(struct dquot *dquot)
725 mlog_errno(status); 704 mlog_errno(status);
726 goto out_ilock; 705 goto out_ilock;
727 } 706 }
728 status = dquot_release(dquot); 707
708 status = ocfs2_global_release_dquot(dquot);
709 if (status < 0) {
710 mlog_errno(status);
711 goto out_trans;
712 }
713 status = ocfs2_local_release_dquot(handle, dquot);
714 /*
715 * If we fail here, we cannot do much as global structure is
716 * already released. So just complain...
717 */
718 if (status < 0)
719 mlog_errno(status);
720 clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
721out_trans:
729 ocfs2_commit_trans(osb, handle); 722 ocfs2_commit_trans(osb, handle);
730out_ilock: 723out_ilock:
731 ocfs2_unlock_global_qf(oinfo, 1); 724 ocfs2_unlock_global_qf(oinfo, 1);
732out: 725out:
726 mutex_unlock(&dquot->dq_lock);
733 mlog_exit(status); 727 mlog_exit(status);
734 return status; 728 return status;
735} 729}
736 730
731/*
732 * Read global dquot structure from disk or create it if it does
733 * not exist. Also update use count of the global structure and
734 * create structure in node-local quota file.
735 */
737static int ocfs2_acquire_dquot(struct dquot *dquot) 736static int ocfs2_acquire_dquot(struct dquot *dquot)
738{ 737{
739 struct ocfs2_mem_dqinfo *oinfo = 738 int status = 0, err;
740 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 739 int ex = 0;
741 int status = 0; 740 struct super_block *sb = dquot->dq_sb;
741 struct ocfs2_super *osb = OCFS2_SB(sb);
742 int type = dquot->dq_type;
743 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
744 struct inode *gqinode = info->dqi_gqinode;
745 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
746 handle_t *handle;
742 747
743 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 748 mlog_entry("id=%u, type=%d", dquot->dq_id, type);
744 /* We need an exclusive lock, because we're going to update use count 749 mutex_lock(&dquot->dq_lock);
745 * and instantiate possibly new dquot structure */ 750 /*
746 status = ocfs2_lock_global_qf(oinfo, 1); 751 * We need an exclusive lock, because we're going to update use count
752 * and instantiate possibly new dquot structure
753 */
754 status = ocfs2_lock_global_qf(info, 1);
747 if (status < 0) 755 if (status < 0)
748 goto out; 756 goto out;
749 status = dquot_acquire(dquot); 757 if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
750 ocfs2_unlock_global_qf(oinfo, 1); 758 status = ocfs2_qinfo_lock(info, 0);
759 if (status < 0)
760 goto out_dq;
761 status = qtree_read_dquot(&info->dqi_gi, dquot);
762 ocfs2_qinfo_unlock(info, 0);
763 if (status < 0)
764 goto out_dq;
765 }
766 set_bit(DQ_READ_B, &dquot->dq_flags);
767
768 OCFS2_DQUOT(dquot)->dq_use_count++;
769 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
770 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
771 if (!dquot->dq_off) { /* No real quota entry? */
772 ex = 1;
773 /*
774 * Add blocks to quota file before we start a transaction since
775 * locking allocators ranks above a transaction start
776 */
777 WARN_ON(journal_current_handle());
778 status = ocfs2_extend_no_holes(gqinode,
779 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
780 gqinode->i_size);
781 if (status < 0)
782 goto out_dq;
783 }
784
785 handle = ocfs2_start_trans(osb,
786 ocfs2_calc_global_qinit_credits(sb, type));
787 if (IS_ERR(handle)) {
788 status = PTR_ERR(handle);
789 goto out_dq;
790 }
791 status = ocfs2_qinfo_lock(info, ex);
792 if (status < 0)
793 goto out_trans;
794 status = qtree_write_dquot(&info->dqi_gi, dquot);
795 if (ex && info_dirty(sb_dqinfo(sb, type))) {
796 err = __ocfs2_global_write_info(sb, type);
797 if (!status)
798 status = err;
799 }
800 ocfs2_qinfo_unlock(info, ex);
801out_trans:
802 ocfs2_commit_trans(osb, handle);
803out_dq:
804 ocfs2_unlock_global_qf(info, 1);
805 if (status < 0)
806 goto out;
807
808 status = ocfs2_create_local_dquot(dquot);
809 if (status < 0)
810 goto out;
811 set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
751out: 812out:
813 mutex_unlock(&dquot->dq_lock);
752 mlog_exit(status); 814 mlog_exit(status);
753 return status; 815 return status;
754} 816}
@@ -770,7 +832,6 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
770 struct ocfs2_super *osb = OCFS2_SB(sb); 832 struct ocfs2_super *osb = OCFS2_SB(sb);
771 833
772 mlog_entry("id=%u, type=%d", dquot->dq_id, type); 834 mlog_entry("id=%u, type=%d", dquot->dq_id, type);
773 dquot_mark_dquot_dirty(dquot);
774 835
775 /* In case user set some limits, sync dquot immediately to global 836 /* In case user set some limits, sync dquot immediately to global
776 * quota file so that information propagates quicker */ 837 * quota file so that information propagates quicker */
@@ -793,14 +854,16 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
793 mlog_errno(status); 854 mlog_errno(status);
794 goto out_ilock; 855 goto out_ilock;
795 } 856 }
857 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
796 status = ocfs2_sync_dquot(dquot); 858 status = ocfs2_sync_dquot(dquot);
797 if (status < 0) { 859 if (status < 0) {
798 mlog_errno(status); 860 mlog_errno(status);
799 goto out_trans; 861 goto out_dlock;
800 } 862 }
801 /* Now write updated local dquot structure */ 863 /* Now write updated local dquot structure */
802 status = dquot_commit(dquot); 864 status = ocfs2_local_write_dquot(dquot);
803out_trans: 865out_dlock:
866 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
804 ocfs2_commit_trans(osb, handle); 867 ocfs2_commit_trans(osb, handle);
805out_ilock: 868out_ilock:
806 ocfs2_unlock_global_qf(oinfo, 1); 869 ocfs2_unlock_global_qf(oinfo, 1);
@@ -852,7 +915,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
852} 915}
853 916
854const struct dquot_operations ocfs2_quota_operations = { 917const struct dquot_operations ocfs2_quota_operations = {
855 .write_dquot = ocfs2_write_dquot, 918 /* We never make dquot dirty so .write_dquot is never called */
856 .acquire_dquot = ocfs2_acquire_dquot, 919 .acquire_dquot = ocfs2_acquire_dquot,
857 .release_dquot = ocfs2_release_dquot, 920 .release_dquot = ocfs2_release_dquot,
858 .mark_dirty = ocfs2_mark_dquot_dirty, 921 .mark_dirty = ocfs2_mark_dquot_dirty,
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 9ad49305f450..8bd70d4d184d 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -22,6 +22,7 @@
22#include "dlmglue.h" 22#include "dlmglue.h"
23#include "quota.h" 23#include "quota.h"
24#include "uptodate.h" 24#include "uptodate.h"
25#include "super.h"
25 26
26/* Number of local quota structures per block */ 27/* Number of local quota structures per block */
27static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 28static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -119,12 +120,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
119 lock_buffer(bh); 120 lock_buffer(bh);
120 modify(bh, private); 121 modify(bh, private);
121 unlock_buffer(bh); 122 unlock_buffer(bh);
122 status = ocfs2_journal_dirty(handle, bh); 123 ocfs2_journal_dirty(handle, bh);
123 if (status < 0) { 124
124 mlog_errno(status);
125 ocfs2_commit_trans(OCFS2_SB(sb), handle);
126 return status;
127 }
128 status = ocfs2_commit_trans(OCFS2_SB(sb), handle); 125 status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
129 if (status < 0) { 126 if (status < 0) {
130 mlog_errno(status); 127 mlog_errno(status);
@@ -133,6 +130,39 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
133 return 0; 130 return 0;
134} 131}
135 132
133/*
134 * Read quota block from a given logical offset.
135 *
136 * This function acquires ip_alloc_sem and thus it must not be called with a
137 * transaction started.
138 */
139static int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
140 struct buffer_head **bh)
141{
142 int rc = 0;
143 struct buffer_head *tmp = *bh;
144
145 if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) {
146 ocfs2_error(inode->i_sb,
147 "Quota file %llu is probably corrupted! Requested "
148 "to read block %Lu but file has size only %Lu\n",
149 (unsigned long long)OCFS2_I(inode)->ip_blkno,
150 (unsigned long long)v_block,
151 (unsigned long long)i_size_read(inode));
152 return -EIO;
153 }
154 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
155 ocfs2_validate_quota_block);
156 if (rc)
157 mlog_errno(rc);
158
159 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
160 if (!rc && !*bh)
161 *bh = tmp;
162
163 return rc;
164}
165
136/* Check whether we understand format of quota files */ 166/* Check whether we understand format of quota files */
137static int ocfs2_local_check_quota_file(struct super_block *sb, int type) 167static int ocfs2_local_check_quota_file(struct super_block *sb, int type)
138{ 168{
@@ -523,9 +553,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
523 ocfs2_clear_bit(bit, dchunk->dqc_bitmap); 553 ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
524 le32_add_cpu(&dchunk->dqc_free, 1); 554 le32_add_cpu(&dchunk->dqc_free, 1);
525 unlock_buffer(qbh); 555 unlock_buffer(qbh);
526 status = ocfs2_journal_dirty(handle, qbh); 556 ocfs2_journal_dirty(handle, qbh);
527 if (status < 0)
528 mlog_errno(status);
529out_commit: 557out_commit:
530 mutex_unlock(&sb_dqopt(sb)->dqio_mutex); 558 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
531 ocfs2_commit_trans(OCFS2_SB(sb), handle); 559 ocfs2_commit_trans(OCFS2_SB(sb), handle);
@@ -631,9 +659,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
631 lock_buffer(bh); 659 lock_buffer(bh);
632 ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN); 660 ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
633 unlock_buffer(bh); 661 unlock_buffer(bh);
634 status = ocfs2_journal_dirty(handle, bh); 662 ocfs2_journal_dirty(handle, bh);
635 if (status < 0)
636 mlog_errno(status);
637out_trans: 663out_trans:
638 ocfs2_commit_trans(osb, handle); 664 ocfs2_commit_trans(osb, handle);
639out_bh: 665out_bh:
@@ -679,7 +705,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
679 INIT_LIST_HEAD(&oinfo->dqi_chunk); 705 INIT_LIST_HEAD(&oinfo->dqi_chunk);
680 oinfo->dqi_rec = NULL; 706 oinfo->dqi_rec = NULL;
681 oinfo->dqi_lqi_bh = NULL; 707 oinfo->dqi_lqi_bh = NULL;
682 oinfo->dqi_ibh = NULL; 708 oinfo->dqi_libh = NULL;
683 709
684 status = ocfs2_global_read_info(sb, type); 710 status = ocfs2_global_read_info(sb, type);
685 if (status < 0) 711 if (status < 0)
@@ -705,7 +731,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
705 info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); 731 info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
706 oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); 732 oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks);
707 oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); 733 oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks);
708 oinfo->dqi_ibh = bh; 734 oinfo->dqi_libh = bh;
709 735
710 /* We crashed when using local quota file? */ 736 /* We crashed when using local quota file? */
711 if (!(info->dqi_flags & OLQF_CLEAN)) { 737 if (!(info->dqi_flags & OLQF_CLEAN)) {
@@ -767,7 +793,7 @@ static int ocfs2_local_write_info(struct super_block *sb, int type)
767{ 793{
768 struct mem_dqinfo *info = sb_dqinfo(sb, type); 794 struct mem_dqinfo *info = sb_dqinfo(sb, type);
769 struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv) 795 struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv)
770 ->dqi_ibh; 796 ->dqi_libh;
771 int status; 797 int status;
772 798
773 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info, 799 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info,
@@ -790,10 +816,6 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
790 int mark_clean = 1, len; 816 int mark_clean = 1, len;
791 int status; 817 int status;
792 818
793 /* At this point we know there are no more dquots and thus
794 * even if there's some sync in the pdflush queue, it won't
795 * find any dquots and return without doing anything */
796 cancel_delayed_work_sync(&oinfo->dqi_sync_work);
797 iput(oinfo->dqi_gqinode); 819 iput(oinfo->dqi_gqinode);
798 ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); 820 ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
799 ocfs2_lock_res_free(&oinfo->dqi_gqlock); 821 ocfs2_lock_res_free(&oinfo->dqi_gqlock);
@@ -828,7 +850,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
828 /* Mark local file as clean */ 850 /* Mark local file as clean */
829 info->dqi_flags |= OLQF_CLEAN; 851 info->dqi_flags |= OLQF_CLEAN;
830 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], 852 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type],
831 oinfo->dqi_ibh, 853 oinfo->dqi_libh,
832 olq_update_info, 854 olq_update_info,
833 info); 855 info);
834 if (status < 0) { 856 if (status < 0) {
@@ -838,7 +860,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
838 860
839out: 861out:
840 ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1); 862 ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1);
841 brelse(oinfo->dqi_ibh); 863 brelse(oinfo->dqi_libh);
842 brelse(oinfo->dqi_lqi_bh); 864 brelse(oinfo->dqi_lqi_bh);
843 kfree(oinfo); 865 kfree(oinfo);
844 return 0; 866 return 0;
@@ -866,22 +888,21 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
866} 888}
867 889
868/* Write dquot to local quota file */ 890/* Write dquot to local quota file */
869static int ocfs2_local_write_dquot(struct dquot *dquot) 891int ocfs2_local_write_dquot(struct dquot *dquot)
870{ 892{
871 struct super_block *sb = dquot->dq_sb; 893 struct super_block *sb = dquot->dq_sb;
872 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); 894 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
873 struct buffer_head *bh = NULL; 895 struct buffer_head *bh;
896 struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_type];
874 int status; 897 int status;
875 898
876 status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type], 899 status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk,
877 ol_dqblk_file_block(sb, od->dq_local_off), 900 &bh);
878 &bh);
879 if (status) { 901 if (status) {
880 mlog_errno(status); 902 mlog_errno(status);
881 goto out; 903 goto out;
882 } 904 }
883 status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh, 905 status = ocfs2_modify_bh(lqinode, bh, olq_set_dquot, od);
884 olq_set_dquot, od);
885 if (status < 0) { 906 if (status < 0) {
886 mlog_errno(status); 907 mlog_errno(status);
887 goto out; 908 goto out;
@@ -981,10 +1002,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
981 } 1002 }
982 1003
983 /* Initialize chunk header */ 1004 /* Initialize chunk header */
984 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
985 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 1005 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
986 &p_blkno, NULL, NULL); 1006 &p_blkno, NULL, NULL);
987 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
988 if (status < 0) { 1007 if (status < 0) {
989 mlog_errno(status); 1008 mlog_errno(status);
990 goto out_trans; 1009 goto out_trans;
@@ -1009,17 +1028,11 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1009 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1028 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1010 OCFS2_QBLK_RESERVED_SPACE); 1029 OCFS2_QBLK_RESERVED_SPACE);
1011 unlock_buffer(bh); 1030 unlock_buffer(bh);
1012 status = ocfs2_journal_dirty(handle, bh); 1031 ocfs2_journal_dirty(handle, bh);
1013 if (status < 0) {
1014 mlog_errno(status);
1015 goto out_trans;
1016 }
1017 1032
1018 /* Initialize new block with structures */ 1033 /* Initialize new block with structures */
1019 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1020 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1, 1034 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
1021 &p_blkno, NULL, NULL); 1035 &p_blkno, NULL, NULL);
1022 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1023 if (status < 0) { 1036 if (status < 0) {
1024 mlog_errno(status); 1037 mlog_errno(status);
1025 goto out_trans; 1038 goto out_trans;
@@ -1040,11 +1053,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1040 lock_buffer(dbh); 1053 lock_buffer(dbh);
1041 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE); 1054 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1042 unlock_buffer(dbh); 1055 unlock_buffer(dbh);
1043 status = ocfs2_journal_dirty(handle, dbh); 1056 ocfs2_journal_dirty(handle, dbh);
1044 if (status < 0) {
1045 mlog_errno(status);
1046 goto out_trans;
1047 }
1048 1057
1049 /* Update local quotafile info */ 1058 /* Update local quotafile info */
1050 oinfo->dqi_blocks += 2; 1059 oinfo->dqi_blocks += 2;
@@ -1120,10 +1129,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1120 } 1129 }
1121 1130
1122 /* Get buffer from the just added block */ 1131 /* Get buffer from the just added block */
1123 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1124 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 1132 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
1125 &p_blkno, NULL, NULL); 1133 &p_blkno, NULL, NULL);
1126 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1127 if (status < 0) { 1134 if (status < 0) {
1128 mlog_errno(status); 1135 mlog_errno(status);
1129 goto out; 1136 goto out;
@@ -1155,11 +1162,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1155 lock_buffer(bh); 1162 lock_buffer(bh);
1156 memset(bh->b_data, 0, sb->s_blocksize); 1163 memset(bh->b_data, 0, sb->s_blocksize);
1157 unlock_buffer(bh); 1164 unlock_buffer(bh);
1158 status = ocfs2_journal_dirty(handle, bh); 1165 ocfs2_journal_dirty(handle, bh);
1159 if (status < 0) { 1166
1160 mlog_errno(status);
1161 goto out_trans;
1162 }
1163 /* Update chunk header */ 1167 /* Update chunk header */
1164 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), 1168 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
1165 chunk->qc_headerbh, 1169 chunk->qc_headerbh,
@@ -1173,11 +1177,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1173 lock_buffer(chunk->qc_headerbh); 1177 lock_buffer(chunk->qc_headerbh);
1174 le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb)); 1178 le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
1175 unlock_buffer(chunk->qc_headerbh); 1179 unlock_buffer(chunk->qc_headerbh);
1176 status = ocfs2_journal_dirty(handle, chunk->qc_headerbh); 1180 ocfs2_journal_dirty(handle, chunk->qc_headerbh);
1177 if (status < 0) { 1181
1178 mlog_errno(status);
1179 goto out_trans;
1180 }
1181 /* Update file header */ 1182 /* Update file header */
1182 oinfo->dqi_blocks++; 1183 oinfo->dqi_blocks++;
1183 status = ocfs2_local_write_info(sb, type); 1184 status = ocfs2_local_write_info(sb, type);
@@ -1210,7 +1211,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private)
1210} 1211}
1211 1212
1212/* Create dquot in the local file for given id */ 1213/* Create dquot in the local file for given id */
1213static int ocfs2_create_local_dquot(struct dquot *dquot) 1214int ocfs2_create_local_dquot(struct dquot *dquot)
1214{ 1215{
1215 struct super_block *sb = dquot->dq_sb; 1216 struct super_block *sb = dquot->dq_sb;
1216 int type = dquot->dq_type; 1217 int type = dquot->dq_type;
@@ -1219,17 +1220,27 @@ static int ocfs2_create_local_dquot(struct dquot *dquot)
1219 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); 1220 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
1220 int offset; 1221 int offset;
1221 int status; 1222 int status;
1223 u64 pcount;
1222 1224
1225 down_write(&OCFS2_I(lqinode)->ip_alloc_sem);
1223 chunk = ocfs2_find_free_entry(sb, type, &offset); 1226 chunk = ocfs2_find_free_entry(sb, type, &offset);
1224 if (!chunk) { 1227 if (!chunk) {
1225 chunk = ocfs2_extend_local_quota_file(sb, type, &offset); 1228 chunk = ocfs2_extend_local_quota_file(sb, type, &offset);
1226 if (IS_ERR(chunk)) 1229 if (IS_ERR(chunk)) {
1227 return PTR_ERR(chunk); 1230 status = PTR_ERR(chunk);
1231 goto out;
1232 }
1228 } else if (IS_ERR(chunk)) { 1233 } else if (IS_ERR(chunk)) {
1229 return PTR_ERR(chunk); 1234 status = PTR_ERR(chunk);
1235 goto out;
1230 } 1236 }
1231 od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset); 1237 od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset);
1232 od->dq_chunk = chunk; 1238 od->dq_chunk = chunk;
1239 status = ocfs2_extent_map_get_blocks(lqinode,
1240 ol_dqblk_block(sb, chunk->qc_num, offset),
1241 &od->dq_local_phys_blk,
1242 &pcount,
1243 NULL);
1233 1244
1234 /* Initialize dquot structure on disk */ 1245 /* Initialize dquot structure on disk */
1235 status = ocfs2_local_write_dquot(dquot); 1246 status = ocfs2_local_write_dquot(dquot);
@@ -1246,39 +1257,15 @@ static int ocfs2_create_local_dquot(struct dquot *dquot)
1246 goto out; 1257 goto out;
1247 } 1258 }
1248out: 1259out:
1260 up_write(&OCFS2_I(lqinode)->ip_alloc_sem);
1249 return status; 1261 return status;
1250} 1262}
1251 1263
1252/* Create entry in local file for dquot, load data from the global file */ 1264/*
1253static int ocfs2_local_read_dquot(struct dquot *dquot) 1265 * Release dquot structure from local quota file. ocfs2_release_dquot() has
1254{ 1266 * already started a transaction and written all changes to global quota file
1255 int status; 1267 */
1256 1268int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot)
1257 mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type);
1258
1259 status = ocfs2_global_read_dquot(dquot);
1260 if (status < 0) {
1261 mlog_errno(status);
1262 goto out_err;
1263 }
1264
1265 /* Now create entry in the local quota file */
1266 status = ocfs2_create_local_dquot(dquot);
1267 if (status < 0) {
1268 mlog_errno(status);
1269 goto out_err;
1270 }
1271 mlog_exit(0);
1272 return 0;
1273out_err:
1274 mlog_exit(status);
1275 return status;
1276}
1277
1278/* Release dquot structure from local quota file. ocfs2_release_dquot() has
1279 * already started a transaction and obtained exclusive lock for global
1280 * quota file. */
1281static int ocfs2_local_release_dquot(struct dquot *dquot)
1282{ 1269{
1283 int status; 1270 int status;
1284 int type = dquot->dq_type; 1271 int type = dquot->dq_type;
@@ -1286,15 +1273,6 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
1286 struct super_block *sb = dquot->dq_sb; 1273 struct super_block *sb = dquot->dq_sb;
1287 struct ocfs2_local_disk_chunk *dchunk; 1274 struct ocfs2_local_disk_chunk *dchunk;
1288 int offset; 1275 int offset;
1289 handle_t *handle = journal_current_handle();
1290
1291 BUG_ON(!handle);
1292 /* First write all local changes to global file */
1293 status = ocfs2_global_release_dquot(dquot);
1294 if (status < 0) {
1295 mlog_errno(status);
1296 goto out;
1297 }
1298 1276
1299 status = ocfs2_journal_access_dq(handle, 1277 status = ocfs2_journal_access_dq(handle,
1300 INODE_CACHE(sb_dqopt(sb)->files[type]), 1278 INODE_CACHE(sb_dqopt(sb)->files[type]),
@@ -1312,12 +1290,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
1312 ocfs2_clear_bit(offset, dchunk->dqc_bitmap); 1290 ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
1313 le32_add_cpu(&dchunk->dqc_free, 1); 1291 le32_add_cpu(&dchunk->dqc_free, 1);
1314 unlock_buffer(od->dq_chunk->qc_headerbh); 1292 unlock_buffer(od->dq_chunk->qc_headerbh);
1315 status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); 1293 ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
1316 if (status < 0) { 1294
1317 mlog_errno(status);
1318 goto out;
1319 }
1320 status = 0;
1321out: 1295out:
1322 /* Clear the read bit so that next time someone uses this 1296 /* Clear the read bit so that next time someone uses this
1323 * dquot he reads fresh info from disk and allocates local 1297 * dquot he reads fresh info from disk and allocates local
@@ -1331,9 +1305,6 @@ static const struct quota_format_ops ocfs2_format_ops = {
1331 .read_file_info = ocfs2_local_read_info, 1305 .read_file_info = ocfs2_local_read_info,
1332 .write_file_info = ocfs2_global_write_info, 1306 .write_file_info = ocfs2_global_write_info,
1333 .free_file_info = ocfs2_local_free_info, 1307 .free_file_info = ocfs2_local_free_info,
1334 .read_dqblk = ocfs2_local_read_dquot,
1335 .commit_dqblk = ocfs2_local_write_dquot,
1336 .release_dqblk = ocfs2_local_release_dquot,
1337}; 1308};
1338 1309
1339struct quota_format_type ocfs2_quota_format = { 1310struct quota_format_type ocfs2_quota_format = {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5cbcd0f008fc..4793f36f6518 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -570,7 +570,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
570 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; 570 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
571 u16 suballoc_bit_start; 571 u16 suballoc_bit_start;
572 u32 num_got; 572 u32 num_got;
573 u64 first_blkno; 573 u64 suballoc_loc, first_blkno;
574 574
575 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 575 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
576 576
@@ -596,7 +596,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
596 goto out_commit; 596 goto out_commit;
597 } 597 }
598 598
599 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, 599 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
600 &suballoc_bit_start, &num_got, 600 &suballoc_bit_start, &num_got,
601 &first_blkno); 601 &first_blkno);
602 if (ret) { 602 if (ret) {
@@ -626,6 +626,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
626 memset(rb, 0, inode->i_sb->s_blocksize); 626 memset(rb, 0, inode->i_sb->s_blocksize);
627 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 627 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
628 rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 628 rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
629 rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
629 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 630 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
630 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); 631 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
631 rb->rf_blkno = cpu_to_le64(first_blkno); 632 rb->rf_blkno = cpu_to_le64(first_blkno);
@@ -790,7 +791,10 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
790 if (le32_to_cpu(rb->rf_count) == 1) { 791 if (le32_to_cpu(rb->rf_count) == 1) {
791 blk = le64_to_cpu(rb->rf_blkno); 792 blk = le64_to_cpu(rb->rf_blkno);
792 bit = le16_to_cpu(rb->rf_suballoc_bit); 793 bit = le16_to_cpu(rb->rf_suballoc_bit);
793 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 794 if (rb->rf_suballoc_loc)
795 bg_blkno = le64_to_cpu(rb->rf_suballoc_loc);
796 else
797 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
794 798
795 alloc_inode = ocfs2_get_system_file_inode(osb, 799 alloc_inode = ocfs2_get_system_file_inode(osb,
796 EXTENT_ALLOC_SYSTEM_INODE, 800 EXTENT_ALLOC_SYSTEM_INODE,
@@ -1268,9 +1272,7 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
1268 } else if (merge) 1272 } else if (merge)
1269 ocfs2_refcount_rec_merge(rb, index); 1273 ocfs2_refcount_rec_merge(rb, index);
1270 1274
1271 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1275 ocfs2_journal_dirty(handle, ref_leaf_bh);
1272 if (ret)
1273 mlog_errno(ret);
1274out: 1276out:
1275 return ret; 1277 return ret;
1276} 1278}
@@ -1284,7 +1286,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1284 int ret; 1286 int ret;
1285 u16 suballoc_bit_start; 1287 u16 suballoc_bit_start;
1286 u32 num_got; 1288 u32 num_got;
1287 u64 blkno; 1289 u64 suballoc_loc, blkno;
1288 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1290 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1289 struct buffer_head *new_bh = NULL; 1291 struct buffer_head *new_bh = NULL;
1290 struct ocfs2_refcount_block *new_rb; 1292 struct ocfs2_refcount_block *new_rb;
@@ -1298,7 +1300,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1298 goto out; 1300 goto out;
1299 } 1301 }
1300 1302
1301 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1303 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
1302 &suballoc_bit_start, &num_got, 1304 &suballoc_bit_start, &num_got,
1303 &blkno); 1305 &blkno);
1304 if (ret) { 1306 if (ret) {
@@ -1330,6 +1332,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1330 1332
1331 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1333 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
1332 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 1334 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
1335 new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
1333 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1336 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1334 new_rb->rf_blkno = cpu_to_le64(blkno); 1337 new_rb->rf_blkno = cpu_to_le64(blkno);
1335 new_rb->rf_cpos = cpu_to_le32(0); 1338 new_rb->rf_cpos = cpu_to_le32(0);
@@ -1524,7 +1527,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1524 int ret; 1527 int ret;
1525 u16 suballoc_bit_start; 1528 u16 suballoc_bit_start;
1526 u32 num_got, new_cpos; 1529 u32 num_got, new_cpos;
1527 u64 blkno; 1530 u64 suballoc_loc, blkno;
1528 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1531 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1529 struct ocfs2_refcount_block *root_rb = 1532 struct ocfs2_refcount_block *root_rb =
1530 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1533 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
@@ -1548,7 +1551,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1548 goto out; 1551 goto out;
1549 } 1552 }
1550 1553
1551 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1554 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
1552 &suballoc_bit_start, &num_got, 1555 &suballoc_bit_start, &num_got,
1553 &blkno); 1556 &blkno);
1554 if (ret) { 1557 if (ret) {
@@ -1576,6 +1579,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1576 memset(new_rb, 0, sb->s_blocksize); 1579 memset(new_rb, 0, sb->s_blocksize);
1577 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 1580 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
1578 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 1581 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
1582 new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
1579 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1583 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1580 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 1584 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
1581 new_rb->rf_blkno = cpu_to_le64(blkno); 1585 new_rb->rf_blkno = cpu_to_le64(blkno);
@@ -1694,7 +1698,7 @@ static int ocfs2_adjust_refcount_rec(handle_t *handle,
1694 * 2 more credits, one for the leaf refcount block, one for 1698 * 2 more credits, one for the leaf refcount block, one for
1695 * the extent block contains the extent rec. 1699 * the extent block contains the extent rec.
1696 */ 1700 */
1697 ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2); 1701 ret = ocfs2_extend_trans(handle, 2);
1698 if (ret < 0) { 1702 if (ret < 0) {
1699 mlog_errno(ret); 1703 mlog_errno(ret);
1700 goto out; 1704 goto out;
@@ -1802,11 +1806,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
1802 if (merge) 1806 if (merge)
1803 ocfs2_refcount_rec_merge(rb, index); 1807 ocfs2_refcount_rec_merge(rb, index);
1804 1808
1805 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1809 ocfs2_journal_dirty(handle, ref_leaf_bh);
1806 if (ret) {
1807 mlog_errno(ret);
1808 goto out;
1809 }
1810 1810
1811 if (index == 0) { 1811 if (index == 0) {
1812 ret = ocfs2_adjust_refcount_rec(handle, ci, 1812 ret = ocfs2_adjust_refcount_rec(handle, ci,
@@ -1977,9 +1977,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1977 ocfs2_refcount_rec_merge(rb, index); 1977 ocfs2_refcount_rec_merge(rb, index);
1978 } 1978 }
1979 1979
1980 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1980 ocfs2_journal_dirty(handle, ref_leaf_bh);
1981 if (ret)
1982 mlog_errno(ret);
1983 1981
1984out: 1982out:
1985 brelse(new_bh); 1983 brelse(new_bh);
@@ -2112,6 +2110,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
2112 */ 2110 */
2113 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, 2111 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
2114 le16_to_cpu(rb->rf_suballoc_slot), 2112 le16_to_cpu(rb->rf_suballoc_slot),
2113 le64_to_cpu(rb->rf_suballoc_loc),
2115 le64_to_cpu(rb->rf_blkno), 2114 le64_to_cpu(rb->rf_blkno),
2116 le16_to_cpu(rb->rf_suballoc_bit)); 2115 le16_to_cpu(rb->rf_suballoc_bit));
2117 if (ret) { 2116 if (ret) {
@@ -2516,20 +2515,19 @@ out:
2516 * 2515 *
2517 * Normally the refcount blocks store these refcount should be 2516 * Normally the refcount blocks store these refcount should be
2518 * contiguous also, so that we can get the number easily. 2517 * contiguous also, so that we can get the number easily.
2519 * As for meta_ac, we will at most add split 2 refcount record and 2518 * We will at most add split 2 refcount records and 2 more
2520 * 2 more refcount block, so just check it in a rough way. 2519 * refcount blocks, so just check it in a rough way.
2521 * 2520 *
2522 * Caller must hold refcount tree lock. 2521 * Caller must hold refcount tree lock.
2523 */ 2522 */
2524int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 2523int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2525 struct buffer_head *di_bh, 2524 u64 refcount_loc,
2526 u64 phys_blkno, 2525 u64 phys_blkno,
2527 u32 clusters, 2526 u32 clusters,
2528 int *credits, 2527 int *credits,
2529 struct ocfs2_alloc_context **meta_ac) 2528 int *ref_blocks)
2530{ 2529{
2531 int ret, ref_blocks = 0; 2530 int ret;
2532 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2533 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2531 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2534 struct buffer_head *ref_root_bh = NULL; 2532 struct buffer_head *ref_root_bh = NULL;
2535 struct ocfs2_refcount_tree *tree; 2533 struct ocfs2_refcount_tree *tree;
@@ -2546,14 +2544,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2546 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 2544 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2547 2545
2548 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), 2546 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
2549 le64_to_cpu(di->i_refcount_loc), &tree); 2547 refcount_loc, &tree);
2550 if (ret) { 2548 if (ret) {
2551 mlog_errno(ret); 2549 mlog_errno(ret);
2552 goto out; 2550 goto out;
2553 } 2551 }
2554 2552
2555 ret = ocfs2_read_refcount_block(&tree->rf_ci, 2553 ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
2556 le64_to_cpu(di->i_refcount_loc),
2557 &ref_root_bh); 2554 &ref_root_bh);
2558 if (ret) { 2555 if (ret) {
2559 mlog_errno(ret); 2556 mlog_errno(ret);
@@ -2564,21 +2561,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2564 &tree->rf_ci, 2561 &tree->rf_ci,
2565 ref_root_bh, 2562 ref_root_bh,
2566 start_cpos, clusters, 2563 start_cpos, clusters,
2567 &ref_blocks, credits); 2564 ref_blocks, credits);
2568 if (ret) { 2565 if (ret) {
2569 mlog_errno(ret); 2566 mlog_errno(ret);
2570 goto out; 2567 goto out;
2571 } 2568 }
2572 2569
2573 mlog(0, "reserve new metadata %d, credits = %d\n", 2570 mlog(0, "reserve new metadata %d blocks, credits = %d\n",
2574 ref_blocks, *credits); 2571 *ref_blocks, *credits);
2575
2576 if (ref_blocks) {
2577 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2578 ref_blocks, meta_ac);
2579 if (ret)
2580 mlog_errno(ret);
2581 }
2582 2572
2583out: 2573out:
2584 brelse(ref_root_bh); 2574 brelse(ref_root_bh);
@@ -3040,11 +3030,7 @@ static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
3040 } 3030 }
3041 3031
3042 memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize); 3032 memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
3043 ret = ocfs2_journal_dirty(handle, new_bh); 3033 ocfs2_journal_dirty(handle, new_bh);
3044 if (ret) {
3045 mlog_errno(ret);
3046 break;
3047 }
3048 3034
3049 brelse(new_bh); 3035 brelse(new_bh);
3050 brelse(old_bh); 3036 brelse(old_bh);
@@ -3282,7 +3268,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3282 } else { 3268 } else {
3283 delete = 1; 3269 delete = 1;
3284 3270
3285 ret = __ocfs2_claim_clusters(osb, handle, 3271 ret = __ocfs2_claim_clusters(handle,
3286 context->data_ac, 3272 context->data_ac,
3287 1, set_len, 3273 1, set_len,
3288 &new_bit, &new_len); 3274 &new_bit, &new_len);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index c1d19b1d3ecc..9983ba1570e2 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -47,11 +47,11 @@ int ocfs2_decrease_refcount(struct inode *inode,
47 struct ocfs2_cached_dealloc_ctxt *dealloc, 47 struct ocfs2_cached_dealloc_ctxt *dealloc,
48 int delete); 48 int delete);
49int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 49int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
50 struct buffer_head *di_bh, 50 u64 refcount_loc,
51 u64 phys_blkno, 51 u64 phys_blkno,
52 u32 clusters, 52 u32 clusters,
53 int *credits, 53 int *credits,
54 struct ocfs2_alloc_context **meta_ac); 54 int *ref_blocks);
55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, 55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
56 u32 cpos, u32 write_len, u32 max_cpos); 56 u32 cpos, u32 write_len, u32 max_cpos);
57 57
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
new file mode 100644
index 000000000000..40650021fc24
--- /dev/null
+++ b/fs/ocfs2/reservations.c
@@ -0,0 +1,847 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * reservations.c
5 *
6 * Allocation reservations implementation
7 *
8 * Some code borrowed from fs/ext3/balloc.c and is:
9 *
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 *
15 * The rest is copyright (C) 2010 Novell. All rights reserved.
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public
19 * License version 2 as published by the Free Software Foundation.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * General Public License for more details.
25 */
26
27#include <linux/fs.h>
28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h>
31#include <linux/bitops.h>
32#include <linux/list.h>
33
34#define MLOG_MASK_PREFIX ML_RESERVATIONS
35#include <cluster/masklog.h>
36
37#include "ocfs2.h"
38
39#ifdef CONFIG_OCFS2_DEBUG_FS
40#define OCFS2_CHECK_RESERVATIONS
41#endif
42
43DEFINE_SPINLOCK(resv_lock);
44
45#define OCFS2_MIN_RESV_WINDOW_BITS 8
46#define OCFS2_MAX_RESV_WINDOW_BITS 1024
47
48int ocfs2_dir_resv_allowed(struct ocfs2_super *osb)
49{
50 return (osb->osb_resv_level && osb->osb_dir_resv_level);
51}
52
53static unsigned int ocfs2_resv_window_bits(struct ocfs2_reservation_map *resmap,
54 struct ocfs2_alloc_reservation *resv)
55{
56 struct ocfs2_super *osb = resmap->m_osb;
57 unsigned int bits;
58
59 if (!(resv->r_flags & OCFS2_RESV_FLAG_DIR)) {
60 /* 8, 16, 32, 64, 128, 256, 512, 1024 */
61 bits = 4 << osb->osb_resv_level;
62 } else {
63 bits = 4 << osb->osb_dir_resv_level;
64 }
65 return bits;
66}
67
68static inline unsigned int ocfs2_resv_end(struct ocfs2_alloc_reservation *resv)
69{
70 if (resv->r_len)
71 return resv->r_start + resv->r_len - 1;
72 return resv->r_start;
73}
74
75static inline int ocfs2_resv_empty(struct ocfs2_alloc_reservation *resv)
76{
77 return !!(resv->r_len == 0);
78}
79
80static inline int ocfs2_resmap_disabled(struct ocfs2_reservation_map *resmap)
81{
82 if (resmap->m_osb->osb_resv_level == 0)
83 return 1;
84 return 0;
85}
86
87static void ocfs2_dump_resv(struct ocfs2_reservation_map *resmap)
88{
89 struct ocfs2_super *osb = resmap->m_osb;
90 struct rb_node *node;
91 struct ocfs2_alloc_reservation *resv;
92 int i = 0;
93
94 mlog(ML_NOTICE, "Dumping resmap for device %s. Bitmap length: %u\n",
95 osb->dev_str, resmap->m_bitmap_len);
96
97 node = rb_first(&resmap->m_reservations);
98 while (node) {
99 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
100
101 mlog(ML_NOTICE, "start: %u\tend: %u\tlen: %u\tlast_start: %u"
102 "\tlast_len: %u\n", resv->r_start,
103 ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
104 resv->r_last_len);
105
106 node = rb_next(node);
107 i++;
108 }
109
110 mlog(ML_NOTICE, "%d reservations found. LRU follows\n", i);
111
112 i = 0;
113 list_for_each_entry(resv, &resmap->m_lru, r_lru) {
114 mlog(ML_NOTICE, "LRU(%d) start: %u\tend: %u\tlen: %u\t"
115 "last_start: %u\tlast_len: %u\n", i, resv->r_start,
116 ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
117 resv->r_last_len);
118
119 i++;
120 }
121}
122
123#ifdef OCFS2_CHECK_RESERVATIONS
124static int ocfs2_validate_resmap_bits(struct ocfs2_reservation_map *resmap,
125 int i,
126 struct ocfs2_alloc_reservation *resv)
127{
128 char *disk_bitmap = resmap->m_disk_bitmap;
129 unsigned int start = resv->r_start;
130 unsigned int end = ocfs2_resv_end(resv);
131
132 while (start <= end) {
133 if (ocfs2_test_bit(start, disk_bitmap)) {
134 mlog(ML_ERROR,
135 "reservation %d covers an allocated area "
136 "starting at bit %u!\n", i, start);
137 return 1;
138 }
139
140 start++;
141 }
142 return 0;
143}
144
145static void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
146{
147 unsigned int off = 0;
148 int i = 0;
149 struct rb_node *node;
150 struct ocfs2_alloc_reservation *resv;
151
152 node = rb_first(&resmap->m_reservations);
153 while (node) {
154 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
155
156 if (i > 0 && resv->r_start <= off) {
157 mlog(ML_ERROR, "reservation %d has bad start off!\n",
158 i);
159 goto bad;
160 }
161
162 if (resv->r_len == 0) {
163 mlog(ML_ERROR, "reservation %d has no length!\n",
164 i);
165 goto bad;
166 }
167
168 if (resv->r_start > ocfs2_resv_end(resv)) {
169 mlog(ML_ERROR, "reservation %d has invalid range!\n",
170 i);
171 goto bad;
172 }
173
174 if (ocfs2_resv_end(resv) >= resmap->m_bitmap_len) {
175 mlog(ML_ERROR, "reservation %d extends past bitmap!\n",
176 i);
177 goto bad;
178 }
179
180 if (ocfs2_validate_resmap_bits(resmap, i, resv))
181 goto bad;
182
183 off = ocfs2_resv_end(resv);
184 node = rb_next(node);
185
186 i++;
187 }
188 return;
189
190bad:
191 ocfs2_dump_resv(resmap);
192 BUG();
193}
194#else
195static inline void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
196{
197
198}
199#endif
200
201void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv)
202{
203 memset(resv, 0, sizeof(*resv));
204 INIT_LIST_HEAD(&resv->r_lru);
205}
206
207void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
208 unsigned int flags)
209{
210 BUG_ON(flags & ~OCFS2_RESV_TYPES);
211
212 resv->r_flags |= flags;
213}
214
215int ocfs2_resmap_init(struct ocfs2_super *osb,
216 struct ocfs2_reservation_map *resmap)
217{
218 memset(resmap, 0, sizeof(*resmap));
219
220 resmap->m_osb = osb;
221 resmap->m_reservations = RB_ROOT;
222 /* m_bitmap_len is initialized to zero by the above memset. */
223 INIT_LIST_HEAD(&resmap->m_lru);
224
225 return 0;
226}
227
228static void ocfs2_resv_mark_lru(struct ocfs2_reservation_map *resmap,
229 struct ocfs2_alloc_reservation *resv)
230{
231 assert_spin_locked(&resv_lock);
232
233 if (!list_empty(&resv->r_lru))
234 list_del_init(&resv->r_lru);
235
236 list_add_tail(&resv->r_lru, &resmap->m_lru);
237}
238
239static void __ocfs2_resv_trunc(struct ocfs2_alloc_reservation *resv)
240{
241 resv->r_len = 0;
242 resv->r_start = 0;
243}
244
245static void ocfs2_resv_remove(struct ocfs2_reservation_map *resmap,
246 struct ocfs2_alloc_reservation *resv)
247{
248 if (resv->r_flags & OCFS2_RESV_FLAG_INUSE) {
249 list_del_init(&resv->r_lru);
250 rb_erase(&resv->r_node, &resmap->m_reservations);
251 resv->r_flags &= ~OCFS2_RESV_FLAG_INUSE;
252 }
253}
254
255static void __ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
256 struct ocfs2_alloc_reservation *resv)
257{
258 assert_spin_locked(&resv_lock);
259
260 __ocfs2_resv_trunc(resv);
261 /*
262 * last_len and last_start no longer make sense if
263 * we're changing the range of our allocations.
264 */
265 resv->r_last_len = resv->r_last_start = 0;
266
267 ocfs2_resv_remove(resmap, resv);
268}
269
270/* does nothing if 'resv' is null */
271void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
272 struct ocfs2_alloc_reservation *resv)
273{
274 if (resv) {
275 spin_lock(&resv_lock);
276 __ocfs2_resv_discard(resmap, resv);
277 spin_unlock(&resv_lock);
278 }
279}
280
281static void ocfs2_resmap_clear_all_resv(struct ocfs2_reservation_map *resmap)
282{
283 struct rb_node *node;
284 struct ocfs2_alloc_reservation *resv;
285
286 assert_spin_locked(&resv_lock);
287
288 while ((node = rb_last(&resmap->m_reservations)) != NULL) {
289 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
290
291 __ocfs2_resv_discard(resmap, resv);
292 }
293}
294
295void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
296 unsigned int clen, char *disk_bitmap)
297{
298 if (ocfs2_resmap_disabled(resmap))
299 return;
300
301 spin_lock(&resv_lock);
302
303 ocfs2_resmap_clear_all_resv(resmap);
304 resmap->m_bitmap_len = clen;
305 resmap->m_disk_bitmap = disk_bitmap;
306
307 spin_unlock(&resv_lock);
308}
309
310void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap)
311{
312 /* Does nothing for now. Keep this around for API symmetry */
313}
314
315static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap,
316 struct ocfs2_alloc_reservation *new)
317{
318 struct rb_root *root = &resmap->m_reservations;
319 struct rb_node *parent = NULL;
320 struct rb_node **p = &root->rb_node;
321 struct ocfs2_alloc_reservation *tmp;
322
323 assert_spin_locked(&resv_lock);
324
325 mlog(0, "Insert reservation start: %u len: %u\n", new->r_start,
326 new->r_len);
327
328 while (*p) {
329 parent = *p;
330
331 tmp = rb_entry(parent, struct ocfs2_alloc_reservation, r_node);
332
333 if (new->r_start < tmp->r_start) {
334 p = &(*p)->rb_left;
335
336 /*
337 * This is a good place to check for
338 * overlapping reservations.
339 */
340 BUG_ON(ocfs2_resv_end(new) >= tmp->r_start);
341 } else if (new->r_start > ocfs2_resv_end(tmp)) {
342 p = &(*p)->rb_right;
343 } else {
344 /* This should never happen! */
345 mlog(ML_ERROR, "Duplicate reservation window!\n");
346 BUG();
347 }
348 }
349
350 rb_link_node(&new->r_node, parent, p);
351 rb_insert_color(&new->r_node, root);
352 new->r_flags |= OCFS2_RESV_FLAG_INUSE;
353
354 ocfs2_resv_mark_lru(resmap, new);
355
356 ocfs2_check_resmap(resmap);
357}
358
359/**
360 * ocfs2_find_resv_lhs() - find the window which contains goal
361 * @resmap: reservation map to search
362 * @goal: which bit to search for
363 *
364 * If a window containing that goal is not found, we return the window
365 * which comes before goal. Returns NULL on empty rbtree or no window
366 * before goal.
367 */
368static struct ocfs2_alloc_reservation *
369ocfs2_find_resv_lhs(struct ocfs2_reservation_map *resmap, unsigned int goal)
370{
371 struct ocfs2_alloc_reservation *resv = NULL;
372 struct ocfs2_alloc_reservation *prev_resv = NULL;
373 struct rb_node *node = resmap->m_reservations.rb_node;
374
375 assert_spin_locked(&resv_lock);
376
377 if (!node)
378 return NULL;
379
380 node = rb_first(&resmap->m_reservations);
381 while (node) {
382 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
383
384 if (resv->r_start <= goal && ocfs2_resv_end(resv) >= goal)
385 break;
386
387 /* Check if we overshot the reservation just before goal? */
388 if (resv->r_start > goal) {
389 resv = prev_resv;
390 break;
391 }
392
393 prev_resv = resv;
394 node = rb_next(node);
395 }
396
397 return resv;
398}
399
400/*
401 * We are given a range within the bitmap, which corresponds to a gap
402 * inside the reservations tree (search_start, search_len). The range
403 * can be anything from the whole bitmap, to a gap between
404 * reservations.
405 *
406 * The start value of *rstart is insignificant.
407 *
408 * This function searches the bitmap range starting at search_start
409 * with length search_len for a set of contiguous free bits. We try
410 * to find up to 'wanted' bits, but can sometimes return less.
411 *
412 * Returns the length of allocation, 0 if no free bits are found.
413 *
414 * *cstart and *clen will also be populated with the result.
415 */
416static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
417 unsigned int wanted,
418 unsigned int search_start,
419 unsigned int search_len,
420 unsigned int *rstart,
421 unsigned int *rlen)
422{
423 void *bitmap = resmap->m_disk_bitmap;
424 unsigned int best_start, best_len = 0;
425 int offset, start, found;
426
427 mlog(0, "Find %u bits within range (%u, len %u) resmap len: %u\n",
428 wanted, search_start, search_len, resmap->m_bitmap_len);
429
430 found = best_start = best_len = 0;
431
432 start = search_start;
433 while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len,
434 start)) != -1) {
435 /* Search reached end of the region */
436 if (offset >= (search_start + search_len))
437 break;
438
439 if (offset == start) {
440 /* we found a zero */
441 found++;
442 /* move start to the next bit to test */
443 start++;
444 } else {
445 /* got a zero after some ones */
446 found = 1;
447 start = offset + 1;
448 }
449 if (found > best_len) {
450 best_len = found;
451 best_start = start - found;
452 }
453
454 if (found >= wanted)
455 break;
456 }
457
458 if (best_len == 0)
459 return 0;
460
461 if (best_len >= wanted)
462 best_len = wanted;
463
464 *rlen = best_len;
465 *rstart = best_start;
466
467 mlog(0, "Found start: %u len: %u\n", best_start, best_len);
468
469 return *rlen;
470}
471
472static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
473 struct ocfs2_alloc_reservation *resv,
474 unsigned int goal, unsigned int wanted)
475{
476 struct rb_root *root = &resmap->m_reservations;
477 unsigned int gap_start, gap_end, gap_len;
478 struct ocfs2_alloc_reservation *prev_resv, *next_resv;
479 struct rb_node *prev, *next;
480 unsigned int cstart, clen;
481 unsigned int best_start = 0, best_len = 0;
482
483 /*
484 * Nasty cases to consider:
485 *
486 * - rbtree is empty
487 * - our window should be first in all reservations
488 * - our window should be last in all reservations
489 * - need to make sure we don't go past end of bitmap
490 */
491
492 mlog(0, "resv start: %u resv end: %u goal: %u wanted: %u\n",
493 resv->r_start, ocfs2_resv_end(resv), goal, wanted);
494
495 assert_spin_locked(&resv_lock);
496
497 if (RB_EMPTY_ROOT(root)) {
498 /*
499 * Easiest case - empty tree. We can just take
500 * whatever window of free bits we want.
501 */
502
503 mlog(0, "Empty root\n");
504
505 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
506 resmap->m_bitmap_len - goal,
507 &cstart, &clen);
508
509 /*
510 * This should never happen - the local alloc window
511 * will always have free bits when we're called.
512 */
513 BUG_ON(goal == 0 && clen == 0);
514
515 if (clen == 0)
516 return;
517
518 resv->r_start = cstart;
519 resv->r_len = clen;
520
521 ocfs2_resv_insert(resmap, resv);
522 return;
523 }
524
525 prev_resv = ocfs2_find_resv_lhs(resmap, goal);
526
527 if (prev_resv == NULL) {
528 mlog(0, "Goal on LHS of leftmost window\n");
529
530 /*
531 * A NULL here means that the search code couldn't
532 * find a window that starts before goal.
533 *
534 * However, we can take the first window after goal,
535 * which is also by definition, the leftmost window in
536 * the entire tree. If we can find free bits in the
537 * gap between goal and the LHS window, then the
538 * reservation can safely be placed there.
539 *
540 * Otherwise we fall back to a linear search, checking
541 * the gaps in between windows for a place to
542 * allocate.
543 */
544
545 next = rb_first(root);
546 next_resv = rb_entry(next, struct ocfs2_alloc_reservation,
547 r_node);
548
549 /*
550 * The search should never return such a window. (see
551 * comment above
552 */
553 if (next_resv->r_start <= goal) {
554 mlog(ML_ERROR, "goal: %u next_resv: start %u len %u\n",
555 goal, next_resv->r_start, next_resv->r_len);
556 ocfs2_dump_resv(resmap);
557 BUG();
558 }
559
560 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
561 next_resv->r_start - goal,
562 &cstart, &clen);
563 if (clen) {
564 best_len = clen;
565 best_start = cstart;
566 if (best_len == wanted)
567 goto out_insert;
568 }
569
570 prev_resv = next_resv;
571 next_resv = NULL;
572 }
573
574 prev = &prev_resv->r_node;
575
576 /* Now we do a linear search for a window, starting at 'prev_rsv' */
577 while (1) {
578 next = rb_next(prev);
579 if (next) {
580 mlog(0, "One more resv found in linear search\n");
581 next_resv = rb_entry(next,
582 struct ocfs2_alloc_reservation,
583 r_node);
584
585 gap_start = ocfs2_resv_end(prev_resv) + 1;
586 gap_end = next_resv->r_start - 1;
587 gap_len = gap_end - gap_start + 1;
588 } else {
589 mlog(0, "No next node\n");
590 /*
591 * We're at the rightmost edge of the
592 * tree. See if a reservation between this
593 * window and the end of the bitmap will work.
594 */
595 gap_start = ocfs2_resv_end(prev_resv) + 1;
596 gap_len = resmap->m_bitmap_len - gap_start;
597 gap_end = resmap->m_bitmap_len - 1;
598 }
599
600 /*
601 * No need to check this gap if we have already found
602 * a larger region of free bits.
603 */
604 if (gap_len <= best_len)
605 goto next_resv;
606
607 clen = ocfs2_resmap_find_free_bits(resmap, wanted, gap_start,
608 gap_len, &cstart, &clen);
609 if (clen == wanted) {
610 best_len = clen;
611 best_start = cstart;
612 goto out_insert;
613 } else if (clen > best_len) {
614 best_len = clen;
615 best_start = cstart;
616 }
617
618next_resv:
619 if (!next)
620 break;
621
622 prev = next;
623 prev_resv = rb_entry(prev, struct ocfs2_alloc_reservation,
624 r_node);
625 }
626
627out_insert:
628 if (best_len) {
629 resv->r_start = best_start;
630 resv->r_len = best_len;
631 ocfs2_resv_insert(resmap, resv);
632 }
633}
634
635static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap,
636 struct ocfs2_alloc_reservation *resv,
637 unsigned int wanted)
638{
639 struct ocfs2_alloc_reservation *lru_resv;
640 int tmpwindow = !!(resv->r_flags & OCFS2_RESV_FLAG_TMP);
641 unsigned int min_bits;
642
643 if (!tmpwindow)
644 min_bits = ocfs2_resv_window_bits(resmap, resv) >> 1;
645 else
646 min_bits = wanted; /* We at know the temp window will use all
647 * of these bits */
648
649 /*
650 * Take the first reservation off the LRU as our 'target'. We
651 * don't try to be smart about it. There might be a case for
652 * searching based on size but I don't have enough data to be
653 * sure. --Mark (3/16/2010)
654 */
655 lru_resv = list_first_entry(&resmap->m_lru,
656 struct ocfs2_alloc_reservation, r_lru);
657
658 mlog(0, "lru resv: start: %u len: %u end: %u\n", lru_resv->r_start,
659 lru_resv->r_len, ocfs2_resv_end(lru_resv));
660
661 /*
662 * Cannibalize (some or all) of the target reservation and
663 * feed it to the current window.
664 */
665 if (lru_resv->r_len <= min_bits) {
666 /*
667 * Discard completely if size is less than or equal to a
668 * reasonable threshold - 50% of window bits for non temporary
669 * windows.
670 */
671 resv->r_start = lru_resv->r_start;
672 resv->r_len = lru_resv->r_len;
673
674 __ocfs2_resv_discard(resmap, lru_resv);
675 } else {
676 unsigned int shrink;
677 if (tmpwindow)
678 shrink = min_bits;
679 else
680 shrink = lru_resv->r_len / 2;
681
682 lru_resv->r_len -= shrink;
683
684 resv->r_start = ocfs2_resv_end(lru_resv) + 1;
685 resv->r_len = shrink;
686 }
687
688 mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
689 "r_len: %u r_last_start: %u r_last_len: %u\n",
690 resv->r_start, ocfs2_resv_end(resv), resv->r_len,
691 resv->r_last_start, resv->r_last_len);
692
693 ocfs2_resv_insert(resmap, resv);
694}
695
696static void ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
697 struct ocfs2_alloc_reservation *resv,
698 unsigned int wanted)
699{
700 unsigned int goal = 0;
701
702 BUG_ON(!ocfs2_resv_empty(resv));
703
704 /*
705 * Begin by trying to get a window as close to the previous
706 * one as possible. Using the most recent allocation as a
707 * start goal makes sense.
708 */
709 if (resv->r_last_len) {
710 goal = resv->r_last_start + resv->r_last_len;
711 if (goal >= resmap->m_bitmap_len)
712 goal = 0;
713 }
714
715 __ocfs2_resv_find_window(resmap, resv, goal, wanted);
716
717 /* Search from last alloc didn't work, try once more from beginning. */
718 if (ocfs2_resv_empty(resv) && goal != 0)
719 __ocfs2_resv_find_window(resmap, resv, 0, wanted);
720
721 if (ocfs2_resv_empty(resv)) {
722 /*
723 * Still empty? Pull oldest one off the LRU, remove it from
724 * tree, put this one in it's place.
725 */
726 ocfs2_cannibalize_resv(resmap, resv, wanted);
727 }
728
729 BUG_ON(ocfs2_resv_empty(resv));
730}
731
732int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
733 struct ocfs2_alloc_reservation *resv,
734 int *cstart, int *clen)
735{
736 unsigned int wanted = *clen;
737
738 if (resv == NULL || ocfs2_resmap_disabled(resmap))
739 return -ENOSPC;
740
741 spin_lock(&resv_lock);
742
743 /*
744 * We don't want to over-allocate for temporary
745 * windows. Otherwise, we run the risk of fragmenting the
746 * allocation space.
747 */
748 wanted = ocfs2_resv_window_bits(resmap, resv);
749 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
750 wanted = *clen;
751
752 if (ocfs2_resv_empty(resv)) {
753 mlog(0, "empty reservation, find new window\n");
754
755 /*
756 * Try to get a window here. If it works, we must fall
757 * through and test the bitmap . This avoids some
758 * ping-ponging of windows due to non-reserved space
759 * being allocation before we initialize a window for
760 * that inode.
761 */
762 ocfs2_resv_find_window(resmap, resv, wanted);
763 }
764
765 BUG_ON(ocfs2_resv_empty(resv));
766
767 *cstart = resv->r_start;
768 *clen = resv->r_len;
769
770 spin_unlock(&resv_lock);
771 return 0;
772}
773
774static void
775 ocfs2_adjust_resv_from_alloc(struct ocfs2_reservation_map *resmap,
776 struct ocfs2_alloc_reservation *resv,
777 unsigned int start, unsigned int end)
778{
779 unsigned int rhs = 0;
780 unsigned int old_end = ocfs2_resv_end(resv);
781
782 BUG_ON(start != resv->r_start || old_end < end);
783
784 /*
785 * Completely used? We can remove it then.
786 */
787 if (old_end == end) {
788 __ocfs2_resv_discard(resmap, resv);
789 return;
790 }
791
792 rhs = old_end - end;
793
794 /*
795 * This should have been trapped above.
796 */
797 BUG_ON(rhs == 0);
798
799 resv->r_start = end + 1;
800 resv->r_len = old_end - resv->r_start + 1;
801}
802
803void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
804 struct ocfs2_alloc_reservation *resv,
805 u32 cstart, u32 clen)
806{
807 unsigned int cend = cstart + clen - 1;
808
809 if (resmap == NULL || ocfs2_resmap_disabled(resmap))
810 return;
811
812 if (resv == NULL)
813 return;
814
815 BUG_ON(cstart != resv->r_start);
816
817 spin_lock(&resv_lock);
818
819 mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u "
820 "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n",
821 cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv),
822 resv->r_len, resv->r_last_start, resv->r_last_len);
823
824 BUG_ON(cstart < resv->r_start);
825 BUG_ON(cstart > ocfs2_resv_end(resv));
826 BUG_ON(cend > ocfs2_resv_end(resv));
827
828 ocfs2_adjust_resv_from_alloc(resmap, resv, cstart, cend);
829 resv->r_last_start = cstart;
830 resv->r_last_len = clen;
831
832 /*
833 * May have been discarded above from
834 * ocfs2_adjust_resv_from_alloc().
835 */
836 if (!ocfs2_resv_empty(resv))
837 ocfs2_resv_mark_lru(resmap, resv);
838
839 mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
840 "r_len: %u r_last_start: %u r_last_len: %u\n",
841 resv->r_start, ocfs2_resv_end(resv), resv->r_len,
842 resv->r_last_start, resv->r_last_len);
843
844 ocfs2_check_resmap(resmap);
845
846 spin_unlock(&resv_lock);
847}
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
new file mode 100644
index 000000000000..1e49cc29d06c
--- /dev/null
+++ b/fs/ocfs2/reservations.h
@@ -0,0 +1,159 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * reservations.h
5 *
6 * Allocation reservations function prototypes and structures.
7 *
8 * Copyright (C) 2010 Novell. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License version 2 as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_RESERVATIONS_H
21#define OCFS2_RESERVATIONS_H
22
23#include <linux/rbtree.h>
24
25#define OCFS2_DEFAULT_RESV_LEVEL 2
26#define OCFS2_MAX_RESV_LEVEL 9
27#define OCFS2_MIN_RESV_LEVEL 0
28
29struct ocfs2_alloc_reservation {
30 struct rb_node r_node;
31
32 unsigned int r_start; /* Begining of current window */
33 unsigned int r_len; /* Length of the window */
34
35 unsigned int r_last_len; /* Length of most recent alloc */
36 unsigned int r_last_start; /* Start of most recent alloc */
37 struct list_head r_lru; /* LRU list head */
38
39 unsigned int r_flags;
40};
41
42#define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */
43#define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be
44 * destroyed immedately after use */
45#define OCFS2_RESV_FLAG_DIR 0x04 /* Reservation is for an unindexed
46 * directory btree */
47
48struct ocfs2_reservation_map {
49 struct rb_root m_reservations;
50 char *m_disk_bitmap;
51
52 struct ocfs2_super *m_osb;
53
54 /* The following are not initialized to meaningful values until a disk
55 * bitmap is provided. */
56 u32 m_bitmap_len; /* Number of valid
57 * bits available */
58
59 struct list_head m_lru; /* LRU of reservations
60 * structures. */
61
62};
63
64void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv);
65
66#define OCFS2_RESV_TYPES (OCFS2_RESV_FLAG_TMP|OCFS2_RESV_FLAG_DIR)
67void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
68 unsigned int flags);
69
70int ocfs2_dir_resv_allowed(struct ocfs2_super *osb);
71
72/**
73 * ocfs2_resv_discard() - truncate a reservation
74 * @resmap:
75 * @resv: the reservation to truncate.
76 *
77 * After this function is called, the reservation will be empty, and
78 * unlinked from the rbtree.
79 */
80void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
81 struct ocfs2_alloc_reservation *resv);
82
83
84/**
85 * ocfs2_resmap_init() - Initialize fields of a reservations bitmap
86 * @resmap: struct ocfs2_reservation_map to initialize
87 * @obj: unused for now
88 * @ops: unused for now
89 * @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
90 *
91 * Only possible return value other than '0' is -ENOMEM for failure to
92 * allocation mirror bitmap.
93 */
94int ocfs2_resmap_init(struct ocfs2_super *osb,
95 struct ocfs2_reservation_map *resmap);
96
97/**
98 * ocfs2_resmap_restart() - "restart" a reservation bitmap
99 * @resmap: reservations bitmap
100 * @clen: Number of valid bits in the bitmap
101 * @disk_bitmap: the disk bitmap this resmap should refer to.
102 *
103 * Re-initialize the parameters of a reservation bitmap. This is
104 * useful for local alloc window slides.
105 *
106 * This function will call ocfs2_trunc_resv against all existing
107 * reservations. A future version will recalculate existing
108 * reservations based on the new bitmap.
109 */
110void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
111 unsigned int clen, char *disk_bitmap);
112
113/**
114 * ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure
115 * @resmap: the struct ocfs2_reservation_map to uninitialize
116 */
117void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap);
118
119/**
120 * ocfs2_resmap_resv_bits() - Return still-valid reservation bits
121 * @resmap: reservations bitmap
122 * @resv: reservation to base search from
123 * @cstart: start of proposed allocation
124 * @clen: length (in clusters) of proposed allocation
125 *
126 * Using the reservation data from resv, this function will compare
127 * resmap and resmap->m_disk_bitmap to determine what part (if any) of
128 * the reservation window is still clear to use. If resv is empty,
129 * this function will try to allocate a window for it.
130 *
131 * On success, zero is returned and the valid allocation area is set in cstart
132 * and clen.
133 *
134 * Returns -ENOSPC if reservations are disabled.
135 */
136int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
137 struct ocfs2_alloc_reservation *resv,
138 int *cstart, int *clen);
139
140/**
141 * ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
142 * @resmap: reservations bitmap
143 * @resv: optional reservation to recalulate based on new bitmap
144 * @cstart: start of allocation in clusters
145 * @clen: end of allocation in clusters.
146 *
147 * Tell the reservation code that bits were used to fulfill allocation in
148 * resmap. The bits don't have to have been part of any existing
149 * reservation. But we must always call this function when bits are claimed.
150 * Internally, the reservations code will use this information to mark the
151 * reservations bitmap. If resv is passed, it's next allocation window will be
152 * calculated. It also expects that 'cstart' is the same as we passed back
153 * from ocfs2_resmap_resv_bits().
154 */
155void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
156 struct ocfs2_alloc_reservation *resv,
157 u32 cstart, u32 clen);
158
159#endif /* OCFS2_RESERVATIONS_H */
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 3c3d673a4d20..dacd553d8617 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -134,11 +134,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
134 le16_add_cpu(&group->bg_free_bits_count, -1 * backups); 134 le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
135 } 135 }
136 136
137 ret = ocfs2_journal_dirty(handle, group_bh); 137 ocfs2_journal_dirty(handle, group_bh);
138 if (ret < 0) {
139 mlog_errno(ret);
140 goto out_rollback;
141 }
142 138
143 /* update the inode accordingly. */ 139 /* update the inode accordingly. */
144 ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh, 140 ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
@@ -319,7 +315,8 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
319 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); 315 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
320 316
321 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != 317 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
322 ocfs2_group_bitmap_size(osb->sb) * 8) { 318 ocfs2_group_bitmap_size(osb->sb, 0,
319 osb->s_feature_incompat) * 8) {
323 mlog(ML_ERROR, "The disk is too old and small. " 320 mlog(ML_ERROR, "The disk is too old and small. "
324 "Force to do offline resize."); 321 "Force to do offline resize.");
325 ret = -EINVAL; 322 ret = -EINVAL;
@@ -500,7 +497,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
500 fe = (struct ocfs2_dinode *)main_bm_bh->b_data; 497 fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
501 498
502 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != 499 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
503 ocfs2_group_bitmap_size(osb->sb) * 8) { 500 ocfs2_group_bitmap_size(osb->sb, 0,
501 osb->s_feature_incompat) * 8) {
504 mlog(ML_ERROR, "The disk is too old and small." 502 mlog(ML_ERROR, "The disk is too old and small."
505 " Force to do offline resize."); 503 " Force to do offline resize.");
506 ret = -EINVAL; 504 ret = -EINVAL;
@@ -545,12 +543,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
545 543
546 group = (struct ocfs2_group_desc *)group_bh->b_data; 544 group = (struct ocfs2_group_desc *)group_bh->b_data;
547 group->bg_next_group = cr->c_blkno; 545 group->bg_next_group = cr->c_blkno;
548 546 ocfs2_journal_dirty(handle, group_bh);
549 ret = ocfs2_journal_dirty(handle, group_bh);
550 if (ret < 0) {
551 mlog_errno(ret);
552 goto out_commit;
553 }
554 547
555 ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode), 548 ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
556 main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE); 549 main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 19ba00f28547..f4c2a9eb8c4d 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -53,6 +53,15 @@
53 53
54#define OCFS2_MAX_TO_STEAL 1024 54#define OCFS2_MAX_TO_STEAL 1024
55 55
56struct ocfs2_suballoc_result {
57 u64 sr_bg_blkno; /* The bg we allocated from. Set
58 to 0 when a block group is
59 contiguous. */
60 u64 sr_blkno; /* The first allocated block */
61 unsigned int sr_bit_offset; /* The bit in the bg */
62 unsigned int sr_bits; /* How many bits we claimed */
63};
64
56static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 65static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
57static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 66static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
58static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 67static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -60,6 +69,7 @@ static int ocfs2_block_group_fill(handle_t *handle,
60 struct inode *alloc_inode, 69 struct inode *alloc_inode,
61 struct buffer_head *bg_bh, 70 struct buffer_head *bg_bh,
62 u64 group_blkno, 71 u64 group_blkno,
72 unsigned int group_clusters,
63 u16 my_chain, 73 u16 my_chain,
64 struct ocfs2_chain_list *cl); 74 struct ocfs2_chain_list *cl);
65static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 75static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
@@ -73,20 +83,17 @@ static int ocfs2_cluster_group_search(struct inode *inode,
73 struct buffer_head *group_bh, 83 struct buffer_head *group_bh,
74 u32 bits_wanted, u32 min_bits, 84 u32 bits_wanted, u32 min_bits,
75 u64 max_block, 85 u64 max_block,
76 u16 *bit_off, u16 *bits_found); 86 struct ocfs2_suballoc_result *res);
77static int ocfs2_block_group_search(struct inode *inode, 87static int ocfs2_block_group_search(struct inode *inode,
78 struct buffer_head *group_bh, 88 struct buffer_head *group_bh,
79 u32 bits_wanted, u32 min_bits, 89 u32 bits_wanted, u32 min_bits,
80 u64 max_block, 90 u64 max_block,
81 u16 *bit_off, u16 *bits_found); 91 struct ocfs2_suballoc_result *res);
82static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 92static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
83 struct ocfs2_alloc_context *ac,
84 handle_t *handle, 93 handle_t *handle,
85 u32 bits_wanted, 94 u32 bits_wanted,
86 u32 min_bits, 95 u32 min_bits,
87 u16 *bit_off, 96 struct ocfs2_suballoc_result *res);
88 unsigned int *num_bits,
89 u64 *bg_blkno);
90static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 97static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
91 int nr); 98 int nr);
92static inline int ocfs2_block_group_set_bits(handle_t *handle, 99static inline int ocfs2_block_group_set_bits(handle_t *handle,
@@ -130,6 +137,7 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
130 } 137 }
131 brelse(ac->ac_bh); 138 brelse(ac->ac_bh);
132 ac->ac_bh = NULL; 139 ac->ac_bh = NULL;
140 ac->ac_resv = NULL;
133} 141}
134 142
135void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 143void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -325,14 +333,38 @@ out:
325 return rc; 333 return rc;
326} 334}
327 335
336static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
337 struct ocfs2_group_desc *bg,
338 struct ocfs2_chain_list *cl,
339 u64 p_blkno, u32 clusters)
340{
341 struct ocfs2_extent_list *el = &bg->bg_list;
342 struct ocfs2_extent_rec *rec;
343
344 BUG_ON(!ocfs2_supports_discontig_bg(osb));
345 if (!el->l_next_free_rec)
346 el->l_count = cpu_to_le16(ocfs2_extent_recs_per_gd(osb->sb));
347 rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec)];
348 rec->e_blkno = cpu_to_le64(p_blkno);
349 rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
350 le16_to_cpu(cl->cl_bpc));
351 rec->e_leaf_clusters = cpu_to_le32(clusters);
352 le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
353 le16_add_cpu(&bg->bg_free_bits_count,
354 clusters * le16_to_cpu(cl->cl_bpc));
355 le16_add_cpu(&el->l_next_free_rec, 1);
356}
357
328static int ocfs2_block_group_fill(handle_t *handle, 358static int ocfs2_block_group_fill(handle_t *handle,
329 struct inode *alloc_inode, 359 struct inode *alloc_inode,
330 struct buffer_head *bg_bh, 360 struct buffer_head *bg_bh,
331 u64 group_blkno, 361 u64 group_blkno,
362 unsigned int group_clusters,
332 u16 my_chain, 363 u16 my_chain,
333 struct ocfs2_chain_list *cl) 364 struct ocfs2_chain_list *cl)
334{ 365{
335 int status = 0; 366 int status = 0;
367 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
336 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 368 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
337 struct super_block * sb = alloc_inode->i_sb; 369 struct super_block * sb = alloc_inode->i_sb;
338 370
@@ -359,19 +391,23 @@ static int ocfs2_block_group_fill(handle_t *handle,
359 memset(bg, 0, sb->s_blocksize); 391 memset(bg, 0, sb->s_blocksize);
360 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); 392 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
361 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 393 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
362 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); 394 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1,
363 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); 395 osb->s_feature_incompat));
364 bg->bg_chain = cpu_to_le16(my_chain); 396 bg->bg_chain = cpu_to_le16(my_chain);
365 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; 397 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
366 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); 398 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
367 bg->bg_blkno = cpu_to_le64(group_blkno); 399 bg->bg_blkno = cpu_to_le64(group_blkno);
400 if (group_clusters == le16_to_cpu(cl->cl_cpg))
401 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
402 else
403 ocfs2_bg_discontig_add_extent(osb, bg, cl, group_blkno,
404 group_clusters);
405
368 /* set the 1st bit in the bitmap to account for the descriptor block */ 406 /* set the 1st bit in the bitmap to account for the descriptor block */
369 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); 407 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
370 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); 408 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
371 409
372 status = ocfs2_journal_dirty(handle, bg_bh); 410 ocfs2_journal_dirty(handle, bg_bh);
373 if (status < 0)
374 mlog_errno(status);
375 411
376 /* There is no need to zero out or otherwise initialize the 412 /* There is no need to zero out or otherwise initialize the
377 * other blocks in a group - All valid FS metadata in a block 413 * other blocks in a group - All valid FS metadata in a block
@@ -397,6 +433,238 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
397 return best; 433 return best;
398} 434}
399 435
436static struct buffer_head *
437ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
438 struct inode *alloc_inode,
439 struct ocfs2_alloc_context *ac,
440 struct ocfs2_chain_list *cl)
441{
442 int status;
443 u32 bit_off, num_bits;
444 u64 bg_blkno;
445 struct buffer_head *bg_bh;
446 unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
447
448 status = ocfs2_claim_clusters(handle, ac,
449 le16_to_cpu(cl->cl_cpg), &bit_off,
450 &num_bits);
451 if (status < 0) {
452 if (status != -ENOSPC)
453 mlog_errno(status);
454 goto bail;
455 }
456
457 /* setup the group */
458 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
459 mlog(0, "new descriptor, record %u, at block %llu\n",
460 alloc_rec, (unsigned long long)bg_blkno);
461
462 bg_bh = sb_getblk(osb->sb, bg_blkno);
463 if (!bg_bh) {
464 status = -EIO;
465 mlog_errno(status);
466 goto bail;
467 }
468 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
469
470 status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
471 bg_blkno, num_bits, alloc_rec, cl);
472 if (status < 0) {
473 brelse(bg_bh);
474 mlog_errno(status);
475 }
476
477bail:
478 return status ? ERR_PTR(status) : bg_bh;
479}
480
481static int ocfs2_block_group_claim_bits(struct ocfs2_super *osb,
482 handle_t *handle,
483 struct ocfs2_alloc_context *ac,
484 unsigned int min_bits,
485 u32 *bit_off, u32 *num_bits)
486{
487 int status = 0;
488
489 while (min_bits) {
490 status = ocfs2_claim_clusters(handle, ac, min_bits,
491 bit_off, num_bits);
492 if (status != -ENOSPC)
493 break;
494
495 min_bits >>= 1;
496 }
497
498 return status;
499}
500
501static int ocfs2_block_group_grow_discontig(handle_t *handle,
502 struct inode *alloc_inode,
503 struct buffer_head *bg_bh,
504 struct ocfs2_alloc_context *ac,
505 struct ocfs2_chain_list *cl,
506 unsigned int min_bits)
507{
508 int status;
509 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
510 struct ocfs2_group_desc *bg =
511 (struct ocfs2_group_desc *)bg_bh->b_data;
512 unsigned int needed = le16_to_cpu(cl->cl_cpg) -
513 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
514 u32 p_cpos, clusters;
515 u64 p_blkno;
516 struct ocfs2_extent_list *el = &bg->bg_list;
517
518 status = ocfs2_journal_access_gd(handle,
519 INODE_CACHE(alloc_inode),
520 bg_bh,
521 OCFS2_JOURNAL_ACCESS_CREATE);
522 if (status < 0) {
523 mlog_errno(status);
524 goto bail;
525 }
526
527 while ((needed > 0) && (le16_to_cpu(el->l_next_free_rec) <
528 le16_to_cpu(el->l_count))) {
529 if (min_bits > needed)
530 min_bits = needed;
531 status = ocfs2_block_group_claim_bits(osb, handle, ac,
532 min_bits, &p_cpos,
533 &clusters);
534 if (status < 0) {
535 if (status != -ENOSPC)
536 mlog_errno(status);
537 goto bail;
538 }
539 p_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cpos);
540 ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno,
541 clusters);
542
543 min_bits = clusters;
544 needed = le16_to_cpu(cl->cl_cpg) -
545 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
546 }
547
548 if (needed > 0) {
549 /*
550 * We have used up all the extent rec but can't fill up
551 * the cpg. So bail out.
552 */
553 status = -ENOSPC;
554 goto bail;
555 }
556
557 ocfs2_journal_dirty(handle, bg_bh);
558
559bail:
560 return status;
561}
562
563static void ocfs2_bg_alloc_cleanup(handle_t *handle,
564 struct ocfs2_alloc_context *cluster_ac,
565 struct inode *alloc_inode,
566 struct buffer_head *bg_bh)
567{
568 int i, ret;
569 struct ocfs2_group_desc *bg;
570 struct ocfs2_extent_list *el;
571 struct ocfs2_extent_rec *rec;
572
573 if (!bg_bh)
574 return;
575
576 bg = (struct ocfs2_group_desc *)bg_bh->b_data;
577 el = &bg->bg_list;
578 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
579 rec = &el->l_recs[i];
580 ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
581 cluster_ac->ac_bh,
582 le64_to_cpu(rec->e_blkno),
583 le32_to_cpu(rec->e_leaf_clusters));
584 if (ret)
585 mlog_errno(ret);
586 /* Try all the clusters to free */
587 }
588
589 ocfs2_remove_from_cache(INODE_CACHE(alloc_inode), bg_bh);
590 brelse(bg_bh);
591}
592
593static struct buffer_head *
594ocfs2_block_group_alloc_discontig(handle_t *handle,
595 struct inode *alloc_inode,
596 struct ocfs2_alloc_context *ac,
597 struct ocfs2_chain_list *cl)
598{
599 int status;
600 u32 bit_off, num_bits;
601 u64 bg_blkno;
602 unsigned int min_bits = le16_to_cpu(cl->cl_cpg) >> 1;
603 struct buffer_head *bg_bh = NULL;
604 unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
605 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
606
607 if (!ocfs2_supports_discontig_bg(osb)) {
608 status = -ENOSPC;
609 goto bail;
610 }
611
612 status = ocfs2_extend_trans(handle,
613 ocfs2_calc_bg_discontig_credits(osb->sb));
614 if (status) {
615 mlog_errno(status);
616 goto bail;
617 }
618
619 /*
620 * We're going to be grabbing from multiple cluster groups.
621 * We don't have enough credits to relink them all, and the
622 * cluster groups will be staying in cache for the duration of
623 * this operation.
624 */
625 ac->ac_allow_chain_relink = 0;
626
627 /* Claim the first region */
628 status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
629 &bit_off, &num_bits);
630 if (status < 0) {
631 if (status != -ENOSPC)
632 mlog_errno(status);
633 goto bail;
634 }
635 min_bits = num_bits;
636
637 /* setup the group */
638 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
639 mlog(0, "new descriptor, record %u, at block %llu\n",
640 alloc_rec, (unsigned long long)bg_blkno);
641
642 bg_bh = sb_getblk(osb->sb, bg_blkno);
643 if (!bg_bh) {
644 status = -EIO;
645 mlog_errno(status);
646 goto bail;
647 }
648 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
649
650 status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
651 bg_blkno, num_bits, alloc_rec, cl);
652 if (status < 0) {
653 mlog_errno(status);
654 goto bail;
655 }
656
657 status = ocfs2_block_group_grow_discontig(handle, alloc_inode,
658 bg_bh, ac, cl, min_bits);
659 if (status)
660 mlog_errno(status);
661
662bail:
663 if (status)
664 ocfs2_bg_alloc_cleanup(handle, ac, alloc_inode, bg_bh);
665 return status ? ERR_PTR(status) : bg_bh;
666}
667
400/* 668/*
401 * We expect the block group allocator to already be locked. 669 * We expect the block group allocator to already be locked.
402 */ 670 */
@@ -412,9 +680,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
412 struct ocfs2_chain_list *cl; 680 struct ocfs2_chain_list *cl;
413 struct ocfs2_alloc_context *ac = NULL; 681 struct ocfs2_alloc_context *ac = NULL;
414 handle_t *handle = NULL; 682 handle_t *handle = NULL;
415 u32 bit_off, num_bits;
416 u16 alloc_rec; 683 u16 alloc_rec;
417 u64 bg_blkno;
418 struct buffer_head *bg_bh = NULL; 684 struct buffer_head *bg_bh = NULL;
419 struct ocfs2_group_desc *bg; 685 struct ocfs2_group_desc *bg;
420 686
@@ -447,44 +713,20 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
447 (unsigned long long)*last_alloc_group); 713 (unsigned long long)*last_alloc_group);
448 ac->ac_last_group = *last_alloc_group; 714 ac->ac_last_group = *last_alloc_group;
449 } 715 }
450 status = ocfs2_claim_clusters(osb, 716
451 handle, 717 bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
452 ac, 718 ac, cl);
453 le16_to_cpu(cl->cl_cpg), 719 if (IS_ERR(bg_bh) && (PTR_ERR(bg_bh) == -ENOSPC))
454 &bit_off, 720 bg_bh = ocfs2_block_group_alloc_discontig(handle,
455 &num_bits); 721 alloc_inode,
456 if (status < 0) { 722 ac, cl);
723 if (IS_ERR(bg_bh)) {
724 status = PTR_ERR(bg_bh);
725 bg_bh = NULL;
457 if (status != -ENOSPC) 726 if (status != -ENOSPC)
458 mlog_errno(status); 727 mlog_errno(status);
459 goto bail; 728 goto bail;
460 } 729 }
461
462 alloc_rec = ocfs2_find_smallest_chain(cl);
463
464 /* setup the group */
465 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
466 mlog(0, "new descriptor, record %u, at block %llu\n",
467 alloc_rec, (unsigned long long)bg_blkno);
468
469 bg_bh = sb_getblk(osb->sb, bg_blkno);
470 if (!bg_bh) {
471 status = -EIO;
472 mlog_errno(status);
473 goto bail;
474 }
475 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
476
477 status = ocfs2_block_group_fill(handle,
478 alloc_inode,
479 bg_bh,
480 bg_blkno,
481 alloc_rec,
482 cl);
483 if (status < 0) {
484 mlog_errno(status);
485 goto bail;
486 }
487
488 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 730 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
489 731
490 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), 732 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
@@ -494,10 +736,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
494 goto bail; 736 goto bail;
495 } 737 }
496 738
739 alloc_rec = le16_to_cpu(bg->bg_chain);
497 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, 740 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
498 le16_to_cpu(bg->bg_free_bits_count)); 741 le16_to_cpu(bg->bg_free_bits_count));
499 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); 742 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
500 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); 743 le16_to_cpu(bg->bg_bits));
744 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno);
501 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 745 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
502 le16_add_cpu(&cl->cl_next_free_rec, 1); 746 le16_add_cpu(&cl->cl_next_free_rec, 1);
503 747
@@ -506,11 +750,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
506 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); 750 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
507 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); 751 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
508 752
509 status = ocfs2_journal_dirty(handle, bh); 753 ocfs2_journal_dirty(handle, bh);
510 if (status < 0) {
511 mlog_errno(status);
512 goto bail;
513 }
514 754
515 spin_lock(&OCFS2_I(alloc_inode)->ip_lock); 755 spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
516 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 756 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
@@ -760,7 +1000,7 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
760 status = ocfs2_reserve_suballoc_bits(osb, (*ac), 1000 status = ocfs2_reserve_suballoc_bits(osb, (*ac),
761 EXTENT_ALLOC_SYSTEM_INODE, 1001 EXTENT_ALLOC_SYSTEM_INODE,
762 (u32)osb->slot_num, NULL, 1002 (u32)osb->slot_num, NULL,
763 ALLOC_NEW_GROUP); 1003 ALLOC_GROUPS_FROM_GLOBAL|ALLOC_NEW_GROUP);
764 1004
765 1005
766 if (status >= 0) { 1006 if (status >= 0) {
@@ -946,11 +1186,7 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
946 status = ocfs2_reserve_local_alloc_bits(osb, 1186 status = ocfs2_reserve_local_alloc_bits(osb,
947 bits_wanted, 1187 bits_wanted,
948 *ac); 1188 *ac);
949 if (status == -EFBIG) { 1189 if ((status < 0) && (status != -ENOSPC)) {
950 /* The local alloc window is outside ac_max_block.
951 * use the main bitmap. */
952 status = -ENOSPC;
953 } else if ((status < 0) && (status != -ENOSPC)) {
954 mlog_errno(status); 1190 mlog_errno(status);
955 goto bail; 1191 goto bail;
956 } 1192 }
@@ -1033,8 +1269,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
1033 struct buffer_head *bg_bh, 1269 struct buffer_head *bg_bh,
1034 unsigned int bits_wanted, 1270 unsigned int bits_wanted,
1035 unsigned int total_bits, 1271 unsigned int total_bits,
1036 u16 *bit_off, 1272 struct ocfs2_suballoc_result *res)
1037 u16 *bits_found)
1038{ 1273{
1039 void *bitmap; 1274 void *bitmap;
1040 u16 best_offset, best_size; 1275 u16 best_offset, best_size;
@@ -1078,14 +1313,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
1078 } 1313 }
1079 } 1314 }
1080 1315
1081 /* XXX: I think the first clause is equivalent to the second 1316 if (best_size) {
1082 * - jlbec */ 1317 res->sr_bit_offset = best_offset;
1083 if (found == bits_wanted) { 1318 res->sr_bits = best_size;
1084 *bit_off = start - found;
1085 *bits_found = found;
1086 } else if (best_size) {
1087 *bit_off = best_offset;
1088 *bits_found = best_size;
1089 } else { 1319 } else {
1090 status = -ENOSPC; 1320 status = -ENOSPC;
1091 /* No error log here -- see the comment above 1321 /* No error log here -- see the comment above
@@ -1129,16 +1359,10 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1129 } 1359 }
1130 1360
1131 le16_add_cpu(&bg->bg_free_bits_count, -num_bits); 1361 le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
1132
1133 while(num_bits--) 1362 while(num_bits--)
1134 ocfs2_set_bit(bit_off++, bitmap); 1363 ocfs2_set_bit(bit_off++, bitmap);
1135 1364
1136 status = ocfs2_journal_dirty(handle, 1365 ocfs2_journal_dirty(handle, group_bh);
1137 group_bh);
1138 if (status < 0) {
1139 mlog_errno(status);
1140 goto bail;
1141 }
1142 1366
1143bail: 1367bail:
1144 mlog_exit(status); 1368 mlog_exit(status);
@@ -1202,12 +1426,7 @@ static int ocfs2_relink_block_group(handle_t *handle,
1202 } 1426 }
1203 1427
1204 prev_bg->bg_next_group = bg->bg_next_group; 1428 prev_bg->bg_next_group = bg->bg_next_group;
1205 1429 ocfs2_journal_dirty(handle, prev_bg_bh);
1206 status = ocfs2_journal_dirty(handle, prev_bg_bh);
1207 if (status < 0) {
1208 mlog_errno(status);
1209 goto out_rollback;
1210 }
1211 1430
1212 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), 1431 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1213 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); 1432 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1217,12 +1436,7 @@ static int ocfs2_relink_block_group(handle_t *handle,
1217 } 1436 }
1218 1437
1219 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; 1438 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
1220 1439 ocfs2_journal_dirty(handle, bg_bh);
1221 status = ocfs2_journal_dirty(handle, bg_bh);
1222 if (status < 0) {
1223 mlog_errno(status);
1224 goto out_rollback;
1225 }
1226 1440
1227 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), 1441 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
1228 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); 1442 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1232,14 +1446,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
1232 } 1446 }
1233 1447
1234 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; 1448 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
1449 ocfs2_journal_dirty(handle, fe_bh);
1235 1450
1236 status = ocfs2_journal_dirty(handle, fe_bh);
1237 if (status < 0) {
1238 mlog_errno(status);
1239 goto out_rollback;
1240 }
1241
1242 status = 0;
1243out_rollback: 1451out_rollback:
1244 if (status < 0) { 1452 if (status < 0) {
1245 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); 1453 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
@@ -1263,14 +1471,13 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1263 struct buffer_head *group_bh, 1471 struct buffer_head *group_bh,
1264 u32 bits_wanted, u32 min_bits, 1472 u32 bits_wanted, u32 min_bits,
1265 u64 max_block, 1473 u64 max_block,
1266 u16 *bit_off, u16 *bits_found) 1474 struct ocfs2_suballoc_result *res)
1267{ 1475{
1268 int search = -ENOSPC; 1476 int search = -ENOSPC;
1269 int ret; 1477 int ret;
1270 u64 blkoff; 1478 u64 blkoff;
1271 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; 1479 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1272 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1480 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1273 u16 tmp_off, tmp_found;
1274 unsigned int max_bits, gd_cluster_off; 1481 unsigned int max_bits, gd_cluster_off;
1275 1482
1276 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1483 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
@@ -1297,15 +1504,15 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1297 1504
1298 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1505 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1299 group_bh, bits_wanted, 1506 group_bh, bits_wanted,
1300 max_bits, 1507 max_bits, res);
1301 &tmp_off, &tmp_found);
1302 if (ret) 1508 if (ret)
1303 return ret; 1509 return ret;
1304 1510
1305 if (max_block) { 1511 if (max_block) {
1306 blkoff = ocfs2_clusters_to_blocks(inode->i_sb, 1512 blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1307 gd_cluster_off + 1513 gd_cluster_off +
1308 tmp_off + tmp_found); 1514 res->sr_bit_offset +
1515 res->sr_bits);
1309 mlog(0, "Checking %llu against %llu\n", 1516 mlog(0, "Checking %llu against %llu\n",
1310 (unsigned long long)blkoff, 1517 (unsigned long long)blkoff,
1311 (unsigned long long)max_block); 1518 (unsigned long long)max_block);
@@ -1317,16 +1524,14 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1317 * return success, but we still want to return 1524 * return success, but we still want to return
1318 * -ENOSPC unless it found the minimum number 1525 * -ENOSPC unless it found the minimum number
1319 * of bits. */ 1526 * of bits. */
1320 if (min_bits <= tmp_found) { 1527 if (min_bits <= res->sr_bits)
1321 *bit_off = tmp_off;
1322 *bits_found = tmp_found;
1323 search = 0; /* success */ 1528 search = 0; /* success */
1324 } else if (tmp_found) { 1529 else if (res->sr_bits) {
1325 /* 1530 /*
1326 * Don't show bits which we'll be returning 1531 * Don't show bits which we'll be returning
1327 * for allocation to the local alloc bitmap. 1532 * for allocation to the local alloc bitmap.
1328 */ 1533 */
1329 ocfs2_local_alloc_seen_free_bits(osb, tmp_found); 1534 ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
1330 } 1535 }
1331 } 1536 }
1332 1537
@@ -1337,7 +1542,7 @@ static int ocfs2_block_group_search(struct inode *inode,
1337 struct buffer_head *group_bh, 1542 struct buffer_head *group_bh,
1338 u32 bits_wanted, u32 min_bits, 1543 u32 bits_wanted, u32 min_bits,
1339 u64 max_block, 1544 u64 max_block,
1340 u16 *bit_off, u16 *bits_found) 1545 struct ocfs2_suballoc_result *res)
1341{ 1546{
1342 int ret = -ENOSPC; 1547 int ret = -ENOSPC;
1343 u64 blkoff; 1548 u64 blkoff;
@@ -1350,10 +1555,10 @@ static int ocfs2_block_group_search(struct inode *inode,
1350 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1555 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1351 group_bh, bits_wanted, 1556 group_bh, bits_wanted,
1352 le16_to_cpu(bg->bg_bits), 1557 le16_to_cpu(bg->bg_bits),
1353 bit_off, bits_found); 1558 res);
1354 if (!ret && max_block) { 1559 if (!ret && max_block) {
1355 blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off + 1560 blkoff = le64_to_cpu(bg->bg_blkno) +
1356 *bits_found; 1561 res->sr_bit_offset + res->sr_bits;
1357 mlog(0, "Checking %llu against %llu\n", 1562 mlog(0, "Checking %llu against %llu\n",
1358 (unsigned long long)blkoff, 1563 (unsigned long long)blkoff,
1359 (unsigned long long)max_block); 1564 (unsigned long long)max_block);
@@ -1386,33 +1591,76 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1386 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); 1591 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1387 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); 1592 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1388 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); 1593 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1389 1594 ocfs2_journal_dirty(handle, di_bh);
1390 ret = ocfs2_journal_dirty(handle, di_bh);
1391 if (ret < 0)
1392 mlog_errno(ret);
1393 1595
1394out: 1596out:
1395 return ret; 1597 return ret;
1396} 1598}
1397 1599
1600static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
1601 struct ocfs2_extent_rec *rec,
1602 struct ocfs2_chain_list *cl)
1603{
1604 unsigned int bpc = le16_to_cpu(cl->cl_bpc);
1605 unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
1606 unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc;
1607
1608 if (res->sr_bit_offset < bitoff)
1609 return 0;
1610 if (res->sr_bit_offset >= (bitoff + bitcount))
1611 return 0;
1612 res->sr_blkno = le64_to_cpu(rec->e_blkno) +
1613 (res->sr_bit_offset - bitoff);
1614 if ((res->sr_bit_offset + res->sr_bits) > (bitoff + bitcount))
1615 res->sr_bits = (bitoff + bitcount) - res->sr_bit_offset;
1616 return 1;
1617}
1618
1619static void ocfs2_bg_discontig_fix_result(struct ocfs2_alloc_context *ac,
1620 struct ocfs2_group_desc *bg,
1621 struct ocfs2_suballoc_result *res)
1622{
1623 int i;
1624 u64 bg_blkno = res->sr_bg_blkno; /* Save off */
1625 struct ocfs2_extent_rec *rec;
1626 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1627 struct ocfs2_chain_list *cl = &di->id2.i_chain;
1628
1629 if (ocfs2_is_cluster_bitmap(ac->ac_inode)) {
1630 res->sr_blkno = 0;
1631 return;
1632 }
1633
1634 res->sr_blkno = res->sr_bg_blkno + res->sr_bit_offset;
1635 res->sr_bg_blkno = 0; /* Clear it for contig block groups */
1636 if (!ocfs2_supports_discontig_bg(OCFS2_SB(ac->ac_inode->i_sb)) ||
1637 !bg->bg_list.l_next_free_rec)
1638 return;
1639
1640 for (i = 0; i < le16_to_cpu(bg->bg_list.l_next_free_rec); i++) {
1641 rec = &bg->bg_list.l_recs[i];
1642 if (ocfs2_bg_discontig_fix_by_rec(res, rec, cl)) {
1643 res->sr_bg_blkno = bg_blkno; /* Restore */
1644 break;
1645 }
1646 }
1647}
1648
1398static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, 1649static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1399 handle_t *handle, 1650 handle_t *handle,
1400 u32 bits_wanted, 1651 u32 bits_wanted,
1401 u32 min_bits, 1652 u32 min_bits,
1402 u16 *bit_off, 1653 struct ocfs2_suballoc_result *res,
1403 unsigned int *num_bits,
1404 u64 gd_blkno,
1405 u16 *bits_left) 1654 u16 *bits_left)
1406{ 1655{
1407 int ret; 1656 int ret;
1408 u16 found;
1409 struct buffer_head *group_bh = NULL; 1657 struct buffer_head *group_bh = NULL;
1410 struct ocfs2_group_desc *gd; 1658 struct ocfs2_group_desc *gd;
1411 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; 1659 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1412 struct inode *alloc_inode = ac->ac_inode; 1660 struct inode *alloc_inode = ac->ac_inode;
1413 1661
1414 ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno, 1662 ret = ocfs2_read_group_descriptor(alloc_inode, di,
1415 &group_bh); 1663 res->sr_bg_blkno, &group_bh);
1416 if (ret < 0) { 1664 if (ret < 0) {
1417 mlog_errno(ret); 1665 mlog_errno(ret);
1418 return ret; 1666 return ret;
@@ -1420,17 +1668,18 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1420 1668
1421 gd = (struct ocfs2_group_desc *) group_bh->b_data; 1669 gd = (struct ocfs2_group_desc *) group_bh->b_data;
1422 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, 1670 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1423 ac->ac_max_block, bit_off, &found); 1671 ac->ac_max_block, res);
1424 if (ret < 0) { 1672 if (ret < 0) {
1425 if (ret != -ENOSPC) 1673 if (ret != -ENOSPC)
1426 mlog_errno(ret); 1674 mlog_errno(ret);
1427 goto out; 1675 goto out;
1428 } 1676 }
1429 1677
1430 *num_bits = found; 1678 if (!ret)
1679 ocfs2_bg_discontig_fix_result(ac, gd, res);
1431 1680
1432 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, 1681 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1433 *num_bits, 1682 res->sr_bits,
1434 le16_to_cpu(gd->bg_chain)); 1683 le16_to_cpu(gd->bg_chain));
1435 if (ret < 0) { 1684 if (ret < 0) {
1436 mlog_errno(ret); 1685 mlog_errno(ret);
@@ -1438,7 +1687,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1438 } 1687 }
1439 1688
1440 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, 1689 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1441 *bit_off, *num_bits); 1690 res->sr_bit_offset, res->sr_bits);
1442 if (ret < 0) 1691 if (ret < 0)
1443 mlog_errno(ret); 1692 mlog_errno(ret);
1444 1693
@@ -1454,13 +1703,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1454 handle_t *handle, 1703 handle_t *handle,
1455 u32 bits_wanted, 1704 u32 bits_wanted,
1456 u32 min_bits, 1705 u32 min_bits,
1457 u16 *bit_off, 1706 struct ocfs2_suballoc_result *res,
1458 unsigned int *num_bits,
1459 u64 *bg_blkno,
1460 u16 *bits_left) 1707 u16 *bits_left)
1461{ 1708{
1462 int status; 1709 int status;
1463 u16 chain, tmp_bits; 1710 u16 chain;
1464 u32 tmp_used; 1711 u32 tmp_used;
1465 u64 next_group; 1712 u64 next_group;
1466 struct inode *alloc_inode = ac->ac_inode; 1713 struct inode *alloc_inode = ac->ac_inode;
@@ -1489,8 +1736,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1489 * the 1st group with any empty bits. */ 1736 * the 1st group with any empty bits. */
1490 while ((status = ac->ac_group_search(alloc_inode, group_bh, 1737 while ((status = ac->ac_group_search(alloc_inode, group_bh,
1491 bits_wanted, min_bits, 1738 bits_wanted, min_bits,
1492 ac->ac_max_block, bit_off, 1739 ac->ac_max_block,
1493 &tmp_bits)) == -ENOSPC) { 1740 res)) == -ENOSPC) {
1494 if (!bg->bg_next_group) 1741 if (!bg->bg_next_group)
1495 break; 1742 break;
1496 1743
@@ -1515,11 +1762,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1515 } 1762 }
1516 1763
1517 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", 1764 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
1518 tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); 1765 res->sr_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
1519 1766
1520 *num_bits = tmp_bits; 1767 res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno);
1768
1769 BUG_ON(res->sr_bits == 0);
1770 if (!status)
1771 ocfs2_bg_discontig_fix_result(ac, bg, res);
1521 1772
1522 BUG_ON(*num_bits == 0);
1523 1773
1524 /* 1774 /*
1525 * Keep track of previous block descriptor read. When 1775 * Keep track of previous block descriptor read. When
@@ -1536,7 +1786,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1536 */ 1786 */
1537 if (ac->ac_allow_chain_relink && 1787 if (ac->ac_allow_chain_relink &&
1538 (prev_group_bh) && 1788 (prev_group_bh) &&
1539 (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { 1789 (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
1540 status = ocfs2_relink_block_group(handle, alloc_inode, 1790 status = ocfs2_relink_block_group(handle, alloc_inode,
1541 ac->ac_bh, group_bh, 1791 ac->ac_bh, group_bh,
1542 prev_group_bh, chain); 1792 prev_group_bh, chain);
@@ -1558,31 +1808,24 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1558 } 1808 }
1559 1809
1560 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1810 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1561 fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); 1811 fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used);
1562 le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); 1812 le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits);
1563 1813 ocfs2_journal_dirty(handle, ac->ac_bh);
1564 status = ocfs2_journal_dirty(handle,
1565 ac->ac_bh);
1566 if (status < 0) {
1567 mlog_errno(status);
1568 goto bail;
1569 }
1570 1814
1571 status = ocfs2_block_group_set_bits(handle, 1815 status = ocfs2_block_group_set_bits(handle,
1572 alloc_inode, 1816 alloc_inode,
1573 bg, 1817 bg,
1574 group_bh, 1818 group_bh,
1575 *bit_off, 1819 res->sr_bit_offset,
1576 *num_bits); 1820 res->sr_bits);
1577 if (status < 0) { 1821 if (status < 0) {
1578 mlog_errno(status); 1822 mlog_errno(status);
1579 goto bail; 1823 goto bail;
1580 } 1824 }
1581 1825
1582 mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, 1826 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
1583 (unsigned long long)le64_to_cpu(fe->i_blkno)); 1827 (unsigned long long)le64_to_cpu(fe->i_blkno));
1584 1828
1585 *bg_blkno = le64_to_cpu(bg->bg_blkno);
1586 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1829 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1587bail: 1830bail:
1588 brelse(group_bh); 1831 brelse(group_bh);
@@ -1593,19 +1836,15 @@ bail:
1593} 1836}
1594 1837
1595/* will give out up to bits_wanted contiguous bits. */ 1838/* will give out up to bits_wanted contiguous bits. */
1596static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 1839static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1597 struct ocfs2_alloc_context *ac,
1598 handle_t *handle, 1840 handle_t *handle,
1599 u32 bits_wanted, 1841 u32 bits_wanted,
1600 u32 min_bits, 1842 u32 min_bits,
1601 u16 *bit_off, 1843 struct ocfs2_suballoc_result *res)
1602 unsigned int *num_bits,
1603 u64 *bg_blkno)
1604{ 1844{
1605 int status; 1845 int status;
1606 u16 victim, i; 1846 u16 victim, i;
1607 u16 bits_left = 0; 1847 u16 bits_left = 0;
1608 u64 hint_blkno = ac->ac_last_group;
1609 struct ocfs2_chain_list *cl; 1848 struct ocfs2_chain_list *cl;
1610 struct ocfs2_dinode *fe; 1849 struct ocfs2_dinode *fe;
1611 1850
@@ -1623,7 +1862,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1623 1862
1624 if (le32_to_cpu(fe->id1.bitmap1.i_used) >= 1863 if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1625 le32_to_cpu(fe->id1.bitmap1.i_total)) { 1864 le32_to_cpu(fe->id1.bitmap1.i_total)) {
1626 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " 1865 ocfs2_error(ac->ac_inode->i_sb,
1866 "Chain allocator dinode %llu has %u used "
1627 "bits but only %u total.", 1867 "bits but only %u total.",
1628 (unsigned long long)le64_to_cpu(fe->i_blkno), 1868 (unsigned long long)le64_to_cpu(fe->i_blkno),
1629 le32_to_cpu(fe->id1.bitmap1.i_used), 1869 le32_to_cpu(fe->id1.bitmap1.i_used),
@@ -1632,22 +1872,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1632 goto bail; 1872 goto bail;
1633 } 1873 }
1634 1874
1635 if (hint_blkno) { 1875 res->sr_bg_blkno = ac->ac_last_group;
1876 if (res->sr_bg_blkno) {
1636 /* Attempt to short-circuit the usual search mechanism 1877 /* Attempt to short-circuit the usual search mechanism
1637 * by jumping straight to the most recently used 1878 * by jumping straight to the most recently used
1638 * allocation group. This helps us mantain some 1879 * allocation group. This helps us mantain some
1639 * contiguousness across allocations. */ 1880 * contiguousness across allocations. */
1640 status = ocfs2_search_one_group(ac, handle, bits_wanted, 1881 status = ocfs2_search_one_group(ac, handle, bits_wanted,
1641 min_bits, bit_off, num_bits, 1882 min_bits, res, &bits_left);
1642 hint_blkno, &bits_left); 1883 if (!status)
1643 if (!status) {
1644 /* Be careful to update *bg_blkno here as the
1645 * caller is expecting it to be filled in, and
1646 * ocfs2_search_one_group() won't do that for
1647 * us. */
1648 *bg_blkno = hint_blkno;
1649 goto set_hint; 1884 goto set_hint;
1650 }
1651 if (status < 0 && status != -ENOSPC) { 1885 if (status < 0 && status != -ENOSPC) {
1652 mlog_errno(status); 1886 mlog_errno(status);
1653 goto bail; 1887 goto bail;
@@ -1660,8 +1894,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1660 ac->ac_chain = victim; 1894 ac->ac_chain = victim;
1661 ac->ac_allow_chain_relink = 1; 1895 ac->ac_allow_chain_relink = 1;
1662 1896
1663 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off, 1897 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1664 num_bits, bg_blkno, &bits_left); 1898 res, &bits_left);
1665 if (!status) 1899 if (!status)
1666 goto set_hint; 1900 goto set_hint;
1667 if (status < 0 && status != -ENOSPC) { 1901 if (status < 0 && status != -ENOSPC) {
@@ -1685,8 +1919,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1685 1919
1686 ac->ac_chain = i; 1920 ac->ac_chain = i;
1687 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, 1921 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1688 bit_off, num_bits, bg_blkno, 1922 res, &bits_left);
1689 &bits_left);
1690 if (!status) 1923 if (!status)
1691 break; 1924 break;
1692 if (status < 0 && status != -ENOSPC) { 1925 if (status < 0 && status != -ENOSPC) {
@@ -1703,7 +1936,7 @@ set_hint:
1703 if (bits_left < min_bits) 1936 if (bits_left < min_bits)
1704 ac->ac_last_group = 0; 1937 ac->ac_last_group = 0;
1705 else 1938 else
1706 ac->ac_last_group = *bg_blkno; 1939 ac->ac_last_group = res->sr_bg_blkno;
1707 } 1940 }
1708 1941
1709bail: 1942bail:
@@ -1711,37 +1944,37 @@ bail:
1711 return status; 1944 return status;
1712} 1945}
1713 1946
1714int ocfs2_claim_metadata(struct ocfs2_super *osb, 1947int ocfs2_claim_metadata(handle_t *handle,
1715 handle_t *handle,
1716 struct ocfs2_alloc_context *ac, 1948 struct ocfs2_alloc_context *ac,
1717 u32 bits_wanted, 1949 u32 bits_wanted,
1950 u64 *suballoc_loc,
1718 u16 *suballoc_bit_start, 1951 u16 *suballoc_bit_start,
1719 unsigned int *num_bits, 1952 unsigned int *num_bits,
1720 u64 *blkno_start) 1953 u64 *blkno_start)
1721{ 1954{
1722 int status; 1955 int status;
1723 u64 bg_blkno; 1956 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
1724 1957
1725 BUG_ON(!ac); 1958 BUG_ON(!ac);
1726 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); 1959 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
1727 BUG_ON(ac->ac_which != OCFS2_AC_USE_META); 1960 BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
1728 1961
1729 status = ocfs2_claim_suballoc_bits(osb, 1962 status = ocfs2_claim_suballoc_bits(ac,
1730 ac,
1731 handle, 1963 handle,
1732 bits_wanted, 1964 bits_wanted,
1733 1, 1965 1,
1734 suballoc_bit_start, 1966 &res);
1735 num_bits,
1736 &bg_blkno);
1737 if (status < 0) { 1967 if (status < 0) {
1738 mlog_errno(status); 1968 mlog_errno(status);
1739 goto bail; 1969 goto bail;
1740 } 1970 }
1741 atomic_inc(&osb->alloc_stats.bg_allocs); 1971 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
1742 1972
1743 *blkno_start = bg_blkno + (u64) *suballoc_bit_start; 1973 *suballoc_loc = res.sr_bg_blkno;
1744 ac->ac_bits_given += (*num_bits); 1974 *suballoc_bit_start = res.sr_bit_offset;
1975 *blkno_start = res.sr_blkno;
1976 ac->ac_bits_given += res.sr_bits;
1977 *num_bits = res.sr_bits;
1745 status = 0; 1978 status = 0;
1746bail: 1979bail:
1747 mlog_exit(status); 1980 mlog_exit(status);
@@ -1749,10 +1982,10 @@ bail:
1749} 1982}
1750 1983
1751static void ocfs2_init_inode_ac_group(struct inode *dir, 1984static void ocfs2_init_inode_ac_group(struct inode *dir,
1752 struct buffer_head *parent_fe_bh, 1985 struct buffer_head *parent_di_bh,
1753 struct ocfs2_alloc_context *ac) 1986 struct ocfs2_alloc_context *ac)
1754{ 1987{
1755 struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data; 1988 struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_di_bh->b_data;
1756 /* 1989 /*
1757 * Try to allocate inodes from some specific group. 1990 * Try to allocate inodes from some specific group.
1758 * 1991 *
@@ -1766,10 +1999,14 @@ static void ocfs2_init_inode_ac_group(struct inode *dir,
1766 if (OCFS2_I(dir)->ip_last_used_group && 1999 if (OCFS2_I(dir)->ip_last_used_group &&
1767 OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot) 2000 OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
1768 ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group; 2001 ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
1769 else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot) 2002 else if (le16_to_cpu(di->i_suballoc_slot) == ac->ac_alloc_slot) {
1770 ac->ac_last_group = ocfs2_which_suballoc_group( 2003 if (di->i_suballoc_loc)
1771 le64_to_cpu(fe->i_blkno), 2004 ac->ac_last_group = le64_to_cpu(di->i_suballoc_loc);
1772 le16_to_cpu(fe->i_suballoc_bit)); 2005 else
2006 ac->ac_last_group = ocfs2_which_suballoc_group(
2007 le64_to_cpu(di->i_blkno),
2008 le16_to_cpu(di->i_suballoc_bit));
2009 }
1773} 2010}
1774 2011
1775static inline void ocfs2_save_inode_ac_group(struct inode *dir, 2012static inline void ocfs2_save_inode_ac_group(struct inode *dir,
@@ -1779,17 +2016,16 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir,
1779 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; 2016 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
1780} 2017}
1781 2018
1782int ocfs2_claim_new_inode(struct ocfs2_super *osb, 2019int ocfs2_claim_new_inode(handle_t *handle,
1783 handle_t *handle,
1784 struct inode *dir, 2020 struct inode *dir,
1785 struct buffer_head *parent_fe_bh, 2021 struct buffer_head *parent_fe_bh,
1786 struct ocfs2_alloc_context *ac, 2022 struct ocfs2_alloc_context *ac,
2023 u64 *suballoc_loc,
1787 u16 *suballoc_bit, 2024 u16 *suballoc_bit,
1788 u64 *fe_blkno) 2025 u64 *fe_blkno)
1789{ 2026{
1790 int status; 2027 int status;
1791 unsigned int num_bits; 2028 struct ocfs2_suballoc_result res;
1792 u64 bg_blkno;
1793 2029
1794 mlog_entry_void(); 2030 mlog_entry_void();
1795 2031
@@ -1800,23 +2036,22 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1800 2036
1801 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); 2037 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
1802 2038
1803 status = ocfs2_claim_suballoc_bits(osb, 2039 status = ocfs2_claim_suballoc_bits(ac,
1804 ac,
1805 handle, 2040 handle,
1806 1, 2041 1,
1807 1, 2042 1,
1808 suballoc_bit, 2043 &res);
1809 &num_bits,
1810 &bg_blkno);
1811 if (status < 0) { 2044 if (status < 0) {
1812 mlog_errno(status); 2045 mlog_errno(status);
1813 goto bail; 2046 goto bail;
1814 } 2047 }
1815 atomic_inc(&osb->alloc_stats.bg_allocs); 2048 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
1816 2049
1817 BUG_ON(num_bits != 1); 2050 BUG_ON(res.sr_bits != 1);
1818 2051
1819 *fe_blkno = bg_blkno + (u64) (*suballoc_bit); 2052 *suballoc_loc = res.sr_bg_blkno;
2053 *suballoc_bit = res.sr_bit_offset;
2054 *fe_blkno = res.sr_blkno;
1820 ac->ac_bits_given++; 2055 ac->ac_bits_given++;
1821 ocfs2_save_inode_ac_group(dir, ac); 2056 ocfs2_save_inode_ac_group(dir, ac);
1822 status = 0; 2057 status = 0;
@@ -1886,8 +2121,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
1886 * contig. allocation, set to '1' to indicate we can deal with extents 2121 * contig. allocation, set to '1' to indicate we can deal with extents
1887 * of any size. 2122 * of any size.
1888 */ 2123 */
1889int __ocfs2_claim_clusters(struct ocfs2_super *osb, 2124int __ocfs2_claim_clusters(handle_t *handle,
1890 handle_t *handle,
1891 struct ocfs2_alloc_context *ac, 2125 struct ocfs2_alloc_context *ac,
1892 u32 min_clusters, 2126 u32 min_clusters,
1893 u32 max_clusters, 2127 u32 max_clusters,
@@ -1896,8 +2130,8 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1896{ 2130{
1897 int status; 2131 int status;
1898 unsigned int bits_wanted = max_clusters; 2132 unsigned int bits_wanted = max_clusters;
1899 u64 bg_blkno = 0; 2133 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
1900 u16 bg_bit_off; 2134 struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb);
1901 2135
1902 mlog_entry_void(); 2136 mlog_entry_void();
1903 2137
@@ -1907,6 +2141,8 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1907 && ac->ac_which != OCFS2_AC_USE_MAIN); 2141 && ac->ac_which != OCFS2_AC_USE_MAIN);
1908 2142
1909 if (ac->ac_which == OCFS2_AC_USE_LOCAL) { 2143 if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
2144 WARN_ON(min_clusters > 1);
2145
1910 status = ocfs2_claim_local_alloc_bits(osb, 2146 status = ocfs2_claim_local_alloc_bits(osb,
1911 handle, 2147 handle,
1912 ac, 2148 ac,
@@ -1929,20 +2165,19 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1929 if (bits_wanted > (osb->bitmap_cpg - 1)) 2165 if (bits_wanted > (osb->bitmap_cpg - 1))
1930 bits_wanted = osb->bitmap_cpg - 1; 2166 bits_wanted = osb->bitmap_cpg - 1;
1931 2167
1932 status = ocfs2_claim_suballoc_bits(osb, 2168 status = ocfs2_claim_suballoc_bits(ac,
1933 ac,
1934 handle, 2169 handle,
1935 bits_wanted, 2170 bits_wanted,
1936 min_clusters, 2171 min_clusters,
1937 &bg_bit_off, 2172 &res);
1938 num_clusters,
1939 &bg_blkno);
1940 if (!status) { 2173 if (!status) {
2174 BUG_ON(res.sr_blkno); /* cluster alloc can't set */
1941 *cluster_start = 2175 *cluster_start =
1942 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, 2176 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
1943 bg_blkno, 2177 res.sr_bg_blkno,
1944 bg_bit_off); 2178 res.sr_bit_offset);
1945 atomic_inc(&osb->alloc_stats.bitmap_data); 2179 atomic_inc(&osb->alloc_stats.bitmap_data);
2180 *num_clusters = res.sr_bits;
1946 } 2181 }
1947 } 2182 }
1948 if (status < 0) { 2183 if (status < 0) {
@@ -1958,8 +2193,7 @@ bail:
1958 return status; 2193 return status;
1959} 2194}
1960 2195
1961int ocfs2_claim_clusters(struct ocfs2_super *osb, 2196int ocfs2_claim_clusters(handle_t *handle,
1962 handle_t *handle,
1963 struct ocfs2_alloc_context *ac, 2197 struct ocfs2_alloc_context *ac,
1964 u32 min_clusters, 2198 u32 min_clusters,
1965 u32 *cluster_start, 2199 u32 *cluster_start,
@@ -1967,7 +2201,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
1967{ 2201{
1968 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 2202 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1969 2203
1970 return __ocfs2_claim_clusters(osb, handle, ac, min_clusters, 2204 return __ocfs2_claim_clusters(handle, ac, min_clusters,
1971 bits_wanted, cluster_start, num_clusters); 2205 bits_wanted, cluster_start, num_clusters);
1972} 2206}
1973 2207
@@ -2023,9 +2257,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
2023 if (undo_fn) 2257 if (undo_fn)
2024 jbd_unlock_bh_state(group_bh); 2258 jbd_unlock_bh_state(group_bh);
2025 2259
2026 status = ocfs2_journal_dirty(handle, group_bh); 2260 ocfs2_journal_dirty(handle, group_bh);
2027 if (status < 0)
2028 mlog_errno(status);
2029bail: 2261bail:
2030 return status; 2262 return status;
2031} 2263}
@@ -2092,12 +2324,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2092 count); 2324 count);
2093 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 2325 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
2094 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); 2326 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
2095 2327 ocfs2_journal_dirty(handle, alloc_bh);
2096 status = ocfs2_journal_dirty(handle, alloc_bh);
2097 if (status < 0) {
2098 mlog_errno(status);
2099 goto bail;
2100 }
2101 2328
2102bail: 2329bail:
2103 brelse(group_bh); 2330 brelse(group_bh);
@@ -2126,6 +2353,8 @@ int ocfs2_free_dinode(handle_t *handle,
2126 u16 bit = le16_to_cpu(di->i_suballoc_bit); 2353 u16 bit = le16_to_cpu(di->i_suballoc_bit);
2127 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2354 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2128 2355
2356 if (di->i_suballoc_loc)
2357 bg_blkno = le64_to_cpu(di->i_suballoc_loc);
2129 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode, 2358 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
2130 inode_alloc_bh, bit, bg_blkno, 1); 2359 inode_alloc_bh, bit, bg_blkno, 1);
2131} 2360}
@@ -2395,7 +2624,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2395 struct buffer_head *alloc_bh, u64 blkno, 2624 struct buffer_head *alloc_bh, u64 blkno,
2396 u16 bit, int *res) 2625 u16 bit, int *res)
2397{ 2626{
2398 struct ocfs2_dinode *alloc_fe; 2627 struct ocfs2_dinode *alloc_di;
2399 struct ocfs2_group_desc *group; 2628 struct ocfs2_group_desc *group;
2400 struct buffer_head *group_bh = NULL; 2629 struct buffer_head *group_bh = NULL;
2401 u64 bg_blkno; 2630 u64 bg_blkno;
@@ -2404,17 +2633,20 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2404 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, 2633 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
2405 (unsigned int)bit); 2634 (unsigned int)bit);
2406 2635
2407 alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; 2636 alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data;
2408 if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { 2637 if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) {
2409 mlog(ML_ERROR, "suballoc bit %u out of range of %u\n", 2638 mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
2410 (unsigned int)bit, 2639 (unsigned int)bit,
2411 ocfs2_bits_per_group(&alloc_fe->id2.i_chain)); 2640 ocfs2_bits_per_group(&alloc_di->id2.i_chain));
2412 status = -EINVAL; 2641 status = -EINVAL;
2413 goto bail; 2642 goto bail;
2414 } 2643 }
2415 2644
2416 bg_blkno = ocfs2_which_suballoc_group(blkno, bit); 2645 if (alloc_di->i_suballoc_loc)
2417 status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, 2646 bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc);
2647 else
2648 bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
2649 status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
2418 &group_bh); 2650 &group_bh);
2419 if (status < 0) { 2651 if (status < 0) {
2420 mlog(ML_ERROR, "read group %llu failed %d\n", 2652 mlog(ML_ERROR, "read group %llu failed %d\n",
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index e0f46df357e6..a017dd3ee7d9 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -26,13 +26,14 @@
26#ifndef _CHAINALLOC_H_ 26#ifndef _CHAINALLOC_H_
27#define _CHAINALLOC_H_ 27#define _CHAINALLOC_H_
28 28
29struct ocfs2_suballoc_result;
29typedef int (group_search_t)(struct inode *, 30typedef int (group_search_t)(struct inode *,
30 struct buffer_head *, 31 struct buffer_head *,
31 u32, /* bits_wanted */ 32 u32, /* bits_wanted */
32 u32, /* min_bits */ 33 u32, /* min_bits */
33 u64, /* max_block */ 34 u64, /* max_block */
34 u16 *, /* *bit_off */ 35 struct ocfs2_suballoc_result *);
35 u16 *); /* *bits_found */ 36 /* found bits */
36 37
37struct ocfs2_alloc_context { 38struct ocfs2_alloc_context {
38 struct inode *ac_inode; /* which bitmap are we allocating from? */ 39 struct inode *ac_inode; /* which bitmap are we allocating from? */
@@ -54,6 +55,8 @@ struct ocfs2_alloc_context {
54 u64 ac_last_group; 55 u64 ac_last_group;
55 u64 ac_max_block; /* Highest block number to allocate. 0 is 56 u64 ac_max_block; /* Highest block number to allocate. 0 is
56 is the same as ~0 - unlimited */ 57 is the same as ~0 - unlimited */
58
59 struct ocfs2_alloc_reservation *ac_resv;
57}; 60};
58 61
59void ocfs2_init_steal_slots(struct ocfs2_super *osb); 62void ocfs2_init_steal_slots(struct ocfs2_super *osb);
@@ -80,22 +83,21 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
80 u32 bits_wanted, 83 u32 bits_wanted,
81 struct ocfs2_alloc_context **ac); 84 struct ocfs2_alloc_context **ac);
82 85
83int ocfs2_claim_metadata(struct ocfs2_super *osb, 86int ocfs2_claim_metadata(handle_t *handle,
84 handle_t *handle,
85 struct ocfs2_alloc_context *ac, 87 struct ocfs2_alloc_context *ac,
86 u32 bits_wanted, 88 u32 bits_wanted,
89 u64 *suballoc_loc,
87 u16 *suballoc_bit_start, 90 u16 *suballoc_bit_start,
88 u32 *num_bits, 91 u32 *num_bits,
89 u64 *blkno_start); 92 u64 *blkno_start);
90int ocfs2_claim_new_inode(struct ocfs2_super *osb, 93int ocfs2_claim_new_inode(handle_t *handle,
91 handle_t *handle,
92 struct inode *dir, 94 struct inode *dir,
93 struct buffer_head *parent_fe_bh, 95 struct buffer_head *parent_fe_bh,
94 struct ocfs2_alloc_context *ac, 96 struct ocfs2_alloc_context *ac,
97 u64 *suballoc_loc,
95 u16 *suballoc_bit, 98 u16 *suballoc_bit,
96 u64 *fe_blkno); 99 u64 *fe_blkno);
97int ocfs2_claim_clusters(struct ocfs2_super *osb, 100int ocfs2_claim_clusters(handle_t *handle,
98 handle_t *handle,
99 struct ocfs2_alloc_context *ac, 101 struct ocfs2_alloc_context *ac,
100 u32 min_clusters, 102 u32 min_clusters,
101 u32 *cluster_start, 103 u32 *cluster_start,
@@ -104,8 +106,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
104 * Use this variant of ocfs2_claim_clusters to specify a maxiumum 106 * Use this variant of ocfs2_claim_clusters to specify a maxiumum
105 * number of clusters smaller than the allocation reserved. 107 * number of clusters smaller than the allocation reserved.
106 */ 108 */
107int __ocfs2_claim_clusters(struct ocfs2_super *osb, 109int __ocfs2_claim_clusters(handle_t *handle,
108 handle_t *handle,
109 struct ocfs2_alloc_context *ac, 110 struct ocfs2_alloc_context *ac,
110 u32 min_clusters, 111 u32 min_clusters,
111 u32 max_clusters, 112 u32 max_clusters,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index dee03197a494..2c26ce251cb3 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -94,7 +94,9 @@ struct mount_options
94 unsigned long mount_opt; 94 unsigned long mount_opt;
95 unsigned int atime_quantum; 95 unsigned int atime_quantum;
96 signed short slot; 96 signed short slot;
97 unsigned int localalloc_opt; 97 int localalloc_opt;
98 unsigned int resv_level;
99 int dir_resv_level;
98 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; 100 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
99}; 101};
100 102
@@ -176,6 +178,8 @@ enum {
176 Opt_noacl, 178 Opt_noacl,
177 Opt_usrquota, 179 Opt_usrquota,
178 Opt_grpquota, 180 Opt_grpquota,
181 Opt_resv_level,
182 Opt_dir_resv_level,
179 Opt_err, 183 Opt_err,
180}; 184};
181 185
@@ -202,6 +206,8 @@ static const match_table_t tokens = {
202 {Opt_noacl, "noacl"}, 206 {Opt_noacl, "noacl"},
203 {Opt_usrquota, "usrquota"}, 207 {Opt_usrquota, "usrquota"},
204 {Opt_grpquota, "grpquota"}, 208 {Opt_grpquota, "grpquota"},
209 {Opt_resv_level, "resv_level=%u"},
210 {Opt_dir_resv_level, "dir_resv_level=%u"},
205 {Opt_err, NULL} 211 {Opt_err, NULL}
206}; 212};
207 213
@@ -932,12 +938,16 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
932 int type; 938 int type;
933 struct inode *inode; 939 struct inode *inode;
934 struct super_block *sb = osb->sb; 940 struct super_block *sb = osb->sb;
941 struct ocfs2_mem_dqinfo *oinfo;
935 942
936 /* We mostly ignore errors in this function because there's not much 943 /* We mostly ignore errors in this function because there's not much
937 * we can do when we see them */ 944 * we can do when we see them */
938 for (type = 0; type < MAXQUOTAS; type++) { 945 for (type = 0; type < MAXQUOTAS; type++) {
939 if (!sb_has_quota_loaded(sb, type)) 946 if (!sb_has_quota_loaded(sb, type))
940 continue; 947 continue;
948 /* Cancel periodic syncing before we grab dqonoff_mutex */
949 oinfo = sb_dqinfo(sb, type)->dqi_priv;
950 cancel_delayed_work_sync(&oinfo->dqi_sync_work);
941 inode = igrab(sb->s_dquot.files[type]); 951 inode = igrab(sb->s_dquot.files[type]);
942 /* Turn off quotas. This will remove all dquot structures from 952 /* Turn off quotas. This will remove all dquot structures from
943 * memory and so they will be automatically synced to global 953 * memory and so they will be automatically synced to global
@@ -1028,8 +1038,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1028 osb->s_atime_quantum = parsed_options.atime_quantum; 1038 osb->s_atime_quantum = parsed_options.atime_quantum;
1029 osb->preferred_slot = parsed_options.slot; 1039 osb->preferred_slot = parsed_options.slot;
1030 osb->osb_commit_interval = parsed_options.commit_interval; 1040 osb->osb_commit_interval = parsed_options.commit_interval;
1031 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 1041
1032 osb->local_alloc_bits = osb->local_alloc_default_bits; 1042 ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt);
1043 osb->osb_resv_level = parsed_options.resv_level;
1044 osb->osb_dir_resv_level = parsed_options.resv_level;
1045 if (parsed_options.dir_resv_level == -1)
1046 osb->osb_dir_resv_level = parsed_options.resv_level;
1047 else
1048 osb->osb_dir_resv_level = parsed_options.dir_resv_level;
1033 1049
1034 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 1050 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
1035 if (status) 1051 if (status)
@@ -1285,11 +1301,13 @@ static int ocfs2_parse_options(struct super_block *sb,
1285 options ? options : "(none)"); 1301 options ? options : "(none)");
1286 1302
1287 mopt->commit_interval = 0; 1303 mopt->commit_interval = 0;
1288 mopt->mount_opt = 0; 1304 mopt->mount_opt = OCFS2_MOUNT_NOINTR;
1289 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 1305 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
1290 mopt->slot = OCFS2_INVALID_SLOT; 1306 mopt->slot = OCFS2_INVALID_SLOT;
1291 mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 1307 mopt->localalloc_opt = -1;
1292 mopt->cluster_stack[0] = '\0'; 1308 mopt->cluster_stack[0] = '\0';
1309 mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
1310 mopt->dir_resv_level = -1;
1293 1311
1294 if (!options) { 1312 if (!options) {
1295 status = 1; 1313 status = 1;
@@ -1380,7 +1398,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1380 status = 0; 1398 status = 0;
1381 goto bail; 1399 goto bail;
1382 } 1400 }
1383 if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) 1401 if (option >= 0)
1384 mopt->localalloc_opt = option; 1402 mopt->localalloc_opt = option;
1385 break; 1403 break;
1386 case Opt_localflocks: 1404 case Opt_localflocks:
@@ -1433,6 +1451,28 @@ static int ocfs2_parse_options(struct super_block *sb,
1433 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; 1451 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1434 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1452 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1435 break; 1453 break;
1454 case Opt_resv_level:
1455 if (is_remount)
1456 break;
1457 if (match_int(&args[0], &option)) {
1458 status = 0;
1459 goto bail;
1460 }
1461 if (option >= OCFS2_MIN_RESV_LEVEL &&
1462 option < OCFS2_MAX_RESV_LEVEL)
1463 mopt->resv_level = option;
1464 break;
1465 case Opt_dir_resv_level:
1466 if (is_remount)
1467 break;
1468 if (match_int(&args[0], &option)) {
1469 status = 0;
1470 goto bail;
1471 }
1472 if (option >= OCFS2_MIN_RESV_LEVEL &&
1473 option < OCFS2_MAX_RESV_LEVEL)
1474 mopt->dir_resv_level = option;
1475 break;
1436 default: 1476 default:
1437 mlog(ML_ERROR, 1477 mlog(ML_ERROR,
1438 "Unrecognized mount option \"%s\" " 1478 "Unrecognized mount option \"%s\" "
@@ -1487,7 +1527,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1487 (unsigned) (osb->osb_commit_interval / HZ)); 1527 (unsigned) (osb->osb_commit_interval / HZ));
1488 1528
1489 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); 1529 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits);
1490 if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) 1530 if (local_alloc_megs != ocfs2_la_default_mb(osb))
1491 seq_printf(s, ",localalloc=%d", local_alloc_megs); 1531 seq_printf(s, ",localalloc=%d", local_alloc_megs);
1492 1532
1493 if (opts & OCFS2_MOUNT_LOCALFLOCKS) 1533 if (opts & OCFS2_MOUNT_LOCALFLOCKS)
@@ -1514,6 +1554,12 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1514 else 1554 else
1515 seq_printf(s, ",noacl"); 1555 seq_printf(s, ",noacl");
1516 1556
1557 if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL)
1558 seq_printf(s, ",resv_level=%d", osb->osb_resv_level);
1559
1560 if (osb->osb_dir_resv_level != osb->osb_resv_level)
1561 seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level);
1562
1517 return 0; 1563 return 0;
1518} 1564}
1519 1565
@@ -1688,6 +1734,8 @@ static void ocfs2_inode_init_once(void *data)
1688 oi->ip_blkno = 0ULL; 1734 oi->ip_blkno = 0ULL;
1689 oi->ip_clusters = 0; 1735 oi->ip_clusters = 0;
1690 1736
1737 ocfs2_resv_init_once(&oi->ip_la_data_resv);
1738
1691 ocfs2_lock_res_init_once(&oi->ip_rw_lockres); 1739 ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
1692 ocfs2_lock_res_init_once(&oi->ip_inode_lockres); 1740 ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
1693 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 1741 ocfs2_lock_res_init_once(&oi->ip_open_lockres);
@@ -2042,6 +2090,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
2042 2090
2043 init_waitqueue_head(&osb->osb_mount_event); 2091 init_waitqueue_head(&osb->osb_mount_event);
2044 2092
2093 status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
2094 if (status) {
2095 mlog_errno(status);
2096 goto bail;
2097 }
2098
2045 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); 2099 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
2046 if (!osb->vol_label) { 2100 if (!osb->vol_label) {
2047 mlog(ML_ERROR, "unable to alloc vol label\n"); 2101 mlog(ML_ERROR, "unable to alloc vol label\n");
@@ -2224,9 +2278,11 @@ static int ocfs2_initialize_super(struct super_block *sb,
2224 } 2278 }
2225 2279
2226 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; 2280 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
2281 osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters;
2227 iput(inode); 2282 iput(inode);
2228 2283
2229 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; 2284 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb, 0,
2285 osb->s_feature_incompat) * 8;
2230 2286
2231 status = ocfs2_init_slot_info(osb); 2287 status = ocfs2_init_slot_info(osb);
2232 if (status < 0) { 2288 if (status < 0) {
@@ -2509,5 +2565,25 @@ void __ocfs2_abort(struct super_block* sb,
2509 ocfs2_handle_error(sb); 2565 ocfs2_handle_error(sb);
2510} 2566}
2511 2567
2568/*
2569 * Void signal blockers, because in-kernel sigprocmask() only fails
2570 * when SIG_* is wrong.
2571 */
2572void ocfs2_block_signals(sigset_t *oldset)
2573{
2574 int rc;
2575 sigset_t blocked;
2576
2577 sigfillset(&blocked);
2578 rc = sigprocmask(SIG_BLOCK, &blocked, oldset);
2579 BUG_ON(rc);
2580}
2581
2582void ocfs2_unblock_signals(sigset_t *oldset)
2583{
2584 int rc = sigprocmask(SIG_SETMASK, oldset, NULL);
2585 BUG_ON(rc);
2586}
2587
2512module_init(ocfs2_init); 2588module_init(ocfs2_init);
2513module_exit(ocfs2_exit); 2589module_exit(ocfs2_exit);
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index 783f5270f2a1..40c7de084c10 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -45,4 +45,11 @@ void __ocfs2_abort(struct super_block *sb,
45 45
46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) 46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
47 47
48/*
49 * Void signal blockers, because in-kernel sigprocmask() only fails
50 * when SIG_* is wrong.
51 */
52void ocfs2_block_signals(sigset_t *oldset);
53void ocfs2_unblock_signals(sigset_t *oldset);
54
48#endif /* OCFS2_SUPER_H */ 55#endif /* OCFS2_SUPER_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3e7773089b96..e97b34842cfe 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -79,6 +79,7 @@ struct ocfs2_xattr_set_ctxt {
79 struct ocfs2_alloc_context *meta_ac; 79 struct ocfs2_alloc_context *meta_ac;
80 struct ocfs2_alloc_context *data_ac; 80 struct ocfs2_alloc_context *data_ac;
81 struct ocfs2_cached_dealloc_ctxt dealloc; 81 struct ocfs2_cached_dealloc_ctxt dealloc;
82 int set_abort;
82}; 83};
83 84
84#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 85#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
@@ -96,7 +97,7 @@ static struct ocfs2_xattr_def_value_root def_xv = {
96 .xv.xr_list.l_count = cpu_to_le16(1), 97 .xv.xr_list.l_count = cpu_to_le16(1),
97}; 98};
98 99
99struct xattr_handler *ocfs2_xattr_handlers[] = { 100const struct xattr_handler *ocfs2_xattr_handlers[] = {
100 &ocfs2_xattr_user_handler, 101 &ocfs2_xattr_user_handler,
101 &ocfs2_xattr_acl_access_handler, 102 &ocfs2_xattr_acl_access_handler,
102 &ocfs2_xattr_acl_default_handler, 103 &ocfs2_xattr_acl_default_handler,
@@ -105,7 +106,7 @@ struct xattr_handler *ocfs2_xattr_handlers[] = {
105 NULL 106 NULL
106}; 107};
107 108
108static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 109static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
109 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 110 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
110 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111 = &ocfs2_xattr_acl_access_handler, 112 = &ocfs2_xattr_acl_access_handler,
@@ -539,7 +540,7 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
539 540
540static inline const char *ocfs2_xattr_prefix(int name_index) 541static inline const char *ocfs2_xattr_prefix(int name_index)
541{ 542{
542 struct xattr_handler *handler = NULL; 543 const struct xattr_handler *handler = NULL;
543 544
544 if (name_index > 0 && name_index < OCFS2_XATTR_MAX) 545 if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
545 handler = ocfs2_xattr_handler_map[name_index]; 546 handler = ocfs2_xattr_handler_map[name_index];
@@ -739,11 +740,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
739 goto leave; 740 goto leave;
740 } 741 }
741 742
742 status = ocfs2_journal_dirty(handle, vb->vb_bh); 743 ocfs2_journal_dirty(handle, vb->vb_bh);
743 if (status < 0) {
744 mlog_errno(status);
745 goto leave;
746 }
747 744
748 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; 745 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
749 746
@@ -786,12 +783,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
786 } 783 }
787 784
788 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 785 le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
789 786 ocfs2_journal_dirty(handle, vb->vb_bh);
790 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
791 if (ret) {
792 mlog_errno(ret);
793 goto out;
794 }
795 787
796 if (ext_flags & OCFS2_EXT_REFCOUNTED) 788 if (ext_flags & OCFS2_EXT_REFCOUNTED)
797 ret = ocfs2_decrease_refcount(inode, handle, 789 ret = ocfs2_decrease_refcount(inode, handle,
@@ -1374,11 +1366,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1374 memset(bh->b_data + cp_len, 0, 1366 memset(bh->b_data + cp_len, 0,
1375 blocksize - cp_len); 1367 blocksize - cp_len);
1376 1368
1377 ret = ocfs2_journal_dirty(handle, bh); 1369 ocfs2_journal_dirty(handle, bh);
1378 if (ret < 0) {
1379 mlog_errno(ret);
1380 goto out;
1381 }
1382 brelse(bh); 1370 brelse(bh);
1383 bh = NULL; 1371 bh = NULL;
1384 1372
@@ -2148,15 +2136,19 @@ alloc_value:
2148 orig_clusters = ocfs2_xa_value_clusters(loc); 2136 orig_clusters = ocfs2_xa_value_clusters(loc);
2149 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2137 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2150 if (rc < 0) { 2138 if (rc < 0) {
2151 /* 2139 ctxt->set_abort = 1;
2152 * If we tried to grow an existing external value,
2153 * ocfs2_xa_cleanuP-value_truncate() is going to
2154 * let it stand. We have to restore its original
2155 * value size.
2156 */
2157 loc->xl_entry->xe_value_size = orig_value_size;
2158 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2140 ocfs2_xa_cleanup_value_truncate(loc, "growing",
2159 orig_clusters); 2141 orig_clusters);
2142 /*
2143 * If we were growing an existing value,
2144 * ocfs2_xa_cleanup_value_truncate() won't remove
2145 * the entry. We need to restore the original value
2146 * size.
2147 */
2148 if (loc->xl_entry) {
2149 BUG_ON(!orig_value_size);
2150 loc->xl_entry->xe_value_size = orig_value_size;
2151 }
2160 mlog_errno(rc); 2152 mlog_errno(rc);
2161 } 2153 }
2162 } 2154 }
@@ -2479,7 +2471,10 @@ static int ocfs2_xattr_free_block(struct inode *inode,
2479 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2471 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2480 blk = le64_to_cpu(xb->xb_blkno); 2472 blk = le64_to_cpu(xb->xb_blkno);
2481 bit = le16_to_cpu(xb->xb_suballoc_bit); 2473 bit = le16_to_cpu(xb->xb_suballoc_bit);
2482 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2474 if (xb->xb_suballoc_loc)
2475 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2476 else
2477 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2483 2478
2484 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2479 xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2485 EXTENT_ALLOC_SYSTEM_INODE, 2480 EXTENT_ALLOC_SYSTEM_INODE,
@@ -2594,9 +2589,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2594 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2589 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2595 spin_unlock(&oi->ip_lock); 2590 spin_unlock(&oi->ip_lock);
2596 2591
2597 ret = ocfs2_journal_dirty(handle, di_bh); 2592 ocfs2_journal_dirty(handle, di_bh);
2598 if (ret < 0)
2599 mlog_errno(ret);
2600out_commit: 2593out_commit:
2601 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2594 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2602out: 2595out:
@@ -2724,9 +2717,7 @@ static int ocfs2_xattr_ibody_init(struct inode *inode,
2724 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2717 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2725 spin_unlock(&oi->ip_lock); 2718 spin_unlock(&oi->ip_lock);
2726 2719
2727 ret = ocfs2_journal_dirty(ctxt->handle, di_bh); 2720 ocfs2_journal_dirty(ctxt->handle, di_bh);
2728 if (ret < 0)
2729 mlog_errno(ret);
2730 2721
2731out: 2722out:
2732 return ret; 2723 return ret;
@@ -2846,9 +2837,8 @@ static int ocfs2_create_xattr_block(struct inode *inode,
2846 int ret; 2837 int ret;
2847 u16 suballoc_bit_start; 2838 u16 suballoc_bit_start;
2848 u32 num_got; 2839 u32 num_got;
2849 u64 first_blkno; 2840 u64 suballoc_loc, first_blkno;
2850 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2841 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data;
2851 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2852 struct buffer_head *new_bh = NULL; 2842 struct buffer_head *new_bh = NULL;
2853 struct ocfs2_xattr_block *xblk; 2843 struct ocfs2_xattr_block *xblk;
2854 2844
@@ -2859,9 +2849,9 @@ static int ocfs2_create_xattr_block(struct inode *inode,
2859 goto end; 2849 goto end;
2860 } 2850 }
2861 2851
2862 ret = ocfs2_claim_metadata(osb, ctxt->handle, ctxt->meta_ac, 1, 2852 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2863 &suballoc_bit_start, &num_got, 2853 &suballoc_loc, &suballoc_bit_start,
2864 &first_blkno); 2854 &num_got, &first_blkno);
2865 if (ret < 0) { 2855 if (ret < 0) {
2866 mlog_errno(ret); 2856 mlog_errno(ret);
2867 goto end; 2857 goto end;
@@ -2883,8 +2873,10 @@ static int ocfs2_create_xattr_block(struct inode *inode,
2883 memset(xblk, 0, inode->i_sb->s_blocksize); 2873 memset(xblk, 0, inode->i_sb->s_blocksize);
2884 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2874 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2885 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2875 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2876 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2886 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2877 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2887 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); 2878 xblk->xb_fs_generation =
2879 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2888 xblk->xb_blkno = cpu_to_le64(first_blkno); 2880 xblk->xb_blkno = cpu_to_le64(first_blkno);
2889 if (indexed) { 2881 if (indexed) {
2890 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2882 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
@@ -2956,7 +2948,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
2956 ret = ocfs2_xa_set(&loc, xi, ctxt); 2948 ret = ocfs2_xa_set(&loc, xi, ctxt);
2957 if (!ret) 2949 if (!ret)
2958 xs->here = loc.xl_entry; 2950 xs->here = loc.xl_entry;
2959 else if (ret != -ENOSPC) 2951 else if ((ret != -ENOSPC) || ctxt->set_abort)
2960 goto end; 2952 goto end;
2961 else { 2953 else {
2962 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2954 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
@@ -3312,14 +3304,13 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
3312 goto out; 3304 goto out;
3313 } 3305 }
3314 3306
3315 ret = ocfs2_extend_trans(ctxt->handle, credits + 3307 ret = ocfs2_extend_trans(ctxt->handle, credits);
3316 ctxt->handle->h_buffer_credits);
3317 if (ret) { 3308 if (ret) {
3318 mlog_errno(ret); 3309 mlog_errno(ret);
3319 goto out; 3310 goto out;
3320 } 3311 }
3321 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3312 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3322 } else if (ret == -ENOSPC) { 3313 } else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3323 if (di->i_xattr_loc && !xbs->xattr_bh) { 3314 if (di->i_xattr_loc && !xbs->xattr_bh) {
3324 ret = ocfs2_xattr_block_find(inode, 3315 ret = ocfs2_xattr_block_find(inode,
3325 xi->xi_name_index, 3316 xi->xi_name_index,
@@ -3343,8 +3334,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
3343 goto out; 3334 goto out;
3344 } 3335 }
3345 3336
3346 ret = ocfs2_extend_trans(ctxt->handle, credits + 3337 ret = ocfs2_extend_trans(ctxt->handle, credits);
3347 ctxt->handle->h_buffer_credits);
3348 if (ret) { 3338 if (ret) {
3349 mlog_errno(ret); 3339 mlog_errno(ret);
3350 goto out; 3340 goto out;
@@ -3378,8 +3368,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
3378 goto out; 3368 goto out;
3379 } 3369 }
3380 3370
3381 ret = ocfs2_extend_trans(ctxt->handle, credits + 3371 ret = ocfs2_extend_trans(ctxt->handle, credits);
3382 ctxt->handle->h_buffer_credits);
3383 if (ret) { 3372 if (ret) {
3384 mlog_errno(ret); 3373 mlog_errno(ret);
3385 goto out; 3374 goto out;
@@ -4249,7 +4238,6 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4249 u32 bit_off, len; 4238 u32 bit_off, len;
4250 u64 blkno; 4239 u64 blkno;
4251 handle_t *handle = ctxt->handle; 4240 handle_t *handle = ctxt->handle;
4252 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4253 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4241 struct ocfs2_inode_info *oi = OCFS2_I(inode);
4254 struct buffer_head *xb_bh = xs->xattr_bh; 4242 struct buffer_head *xb_bh = xs->xattr_bh;
4255 struct ocfs2_xattr_block *xb = 4243 struct ocfs2_xattr_block *xb =
@@ -4277,7 +4265,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4277 goto out; 4265 goto out;
4278 } 4266 }
4279 4267
4280 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 4268 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4281 1, 1, &bit_off, &len); 4269 1, 1, &bit_off, &len);
4282 if (ret) { 4270 if (ret) {
4283 mlog_errno(ret); 4271 mlog_errno(ret);
@@ -4887,8 +4875,7 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4887 * We need to update the first bucket of the old extent and all 4875 * We need to update the first bucket of the old extent and all
4888 * the buckets going to the new extent. 4876 * the buckets going to the new extent.
4889 */ 4877 */
4890 credits = ((num_buckets + 1) * blks_per_bucket) + 4878 credits = ((num_buckets + 1) * blks_per_bucket);
4891 handle->h_buffer_credits;
4892 ret = ocfs2_extend_trans(handle, credits); 4879 ret = ocfs2_extend_trans(handle, credits);
4893 if (ret) { 4880 if (ret) {
4894 mlog_errno(ret); 4881 mlog_errno(ret);
@@ -4958,7 +4945,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
4958 u32 *first_hash) 4945 u32 *first_hash)
4959{ 4946{
4960 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4947 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4961 int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits; 4948 int ret, credits = 2 * blk_per_bucket;
4962 4949
4963 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4950 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4964 4951
@@ -5099,7 +5086,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5099 goto leave; 5086 goto leave;
5100 } 5087 }
5101 5088
5102 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1, 5089 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5103 clusters_to_add, &bit_off, &num_bits); 5090 clusters_to_add, &bit_off, &num_bits);
5104 if (ret < 0) { 5091 if (ret < 0) {
5105 if (ret != -ENOSPC) 5092 if (ret != -ENOSPC)
@@ -5153,9 +5140,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5153 goto leave; 5140 goto leave;
5154 } 5141 }
5155 5142
5156 ret = ocfs2_journal_dirty(handle, root_bh); 5143 ocfs2_journal_dirty(handle, root_bh);
5157 if (ret < 0)
5158 mlog_errno(ret);
5159 5144
5160leave: 5145leave:
5161 return ret; 5146 return ret;
@@ -5200,8 +5185,7 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
5200 * existing bucket. Then we add the last existing bucket, the 5185 * existing bucket. Then we add the last existing bucket, the
5201 * new bucket, and the first bucket (3 * blk_per_bucket). 5186 * new bucket, and the first bucket (3 * blk_per_bucket).
5202 */ 5187 */
5203 credits = (end_blk - target_blk) + (3 * blk_per_bucket) + 5188 credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5204 handle->h_buffer_credits;
5205 ret = ocfs2_extend_trans(handle, credits); 5189 ret = ocfs2_extend_trans(handle, credits);
5206 if (ret) { 5190 if (ret) {
5207 mlog_errno(ret); 5191 mlog_errno(ret);
@@ -5477,12 +5461,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
5477 } 5461 }
5478 5462
5479 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5463 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5480 5464 ocfs2_journal_dirty(handle, root_bh);
5481 ret = ocfs2_journal_dirty(handle, root_bh);
5482 if (ret) {
5483 mlog_errno(ret);
5484 goto out_commit;
5485 }
5486 5465
5487 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5466 ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5488 if (ret) 5467 if (ret)
@@ -6935,7 +6914,7 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6935 goto out; 6914 goto out;
6936 } 6915 }
6937 6916
6938 ret = ocfs2_claim_clusters(osb, handle, data_ac, 6917 ret = ocfs2_claim_clusters(handle, data_ac,
6939 len, &p_cluster, &num_clusters); 6918 len, &p_cluster, &num_clusters);
6940 if (ret) { 6919 if (ret) {
6941 mlog_errno(ret); 6920 mlog_errno(ret);
@@ -7234,7 +7213,7 @@ int ocfs2_init_security_set(handle_t *handle,
7234 xattr_ac, data_ac); 7213 xattr_ac, data_ac);
7235} 7214}
7236 7215
7237struct xattr_handler ocfs2_xattr_security_handler = { 7216const struct xattr_handler ocfs2_xattr_security_handler = {
7238 .prefix = XATTR_SECURITY_PREFIX, 7217 .prefix = XATTR_SECURITY_PREFIX,
7239 .list = ocfs2_xattr_security_list, 7218 .list = ocfs2_xattr_security_list,
7240 .get = ocfs2_xattr_security_get, 7219 .get = ocfs2_xattr_security_get,
@@ -7278,7 +7257,7 @@ static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7278 name, value, size, flags); 7257 name, value, size, flags);
7279} 7258}
7280 7259
7281struct xattr_handler ocfs2_xattr_trusted_handler = { 7260const struct xattr_handler ocfs2_xattr_trusted_handler = {
7282 .prefix = XATTR_TRUSTED_PREFIX, 7261 .prefix = XATTR_TRUSTED_PREFIX,
7283 .list = ocfs2_xattr_trusted_list, 7262 .list = ocfs2_xattr_trusted_list,
7284 .get = ocfs2_xattr_trusted_get, 7263 .get = ocfs2_xattr_trusted_get,
@@ -7334,7 +7313,7 @@ static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7334 name, value, size, flags); 7313 name, value, size, flags);
7335} 7314}
7336 7315
7337struct xattr_handler ocfs2_xattr_user_handler = { 7316const struct xattr_handler ocfs2_xattr_user_handler = {
7338 .prefix = XATTR_USER_PREFIX, 7317 .prefix = XATTR_USER_PREFIX,
7339 .list = ocfs2_xattr_user_list, 7318 .list = ocfs2_xattr_user_list,
7340 .get = ocfs2_xattr_user_get, 7319 .get = ocfs2_xattr_user_get,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index abd72a47f520..aa64bb37a65b 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -37,12 +37,12 @@ struct ocfs2_security_xattr_info {
37 size_t value_len; 37 size_t value_len;
38}; 38};
39 39
40extern struct xattr_handler ocfs2_xattr_user_handler; 40extern const struct xattr_handler ocfs2_xattr_user_handler;
41extern struct xattr_handler ocfs2_xattr_trusted_handler; 41extern const struct xattr_handler ocfs2_xattr_trusted_handler;
42extern struct xattr_handler ocfs2_xattr_security_handler; 42extern const struct xattr_handler ocfs2_xattr_security_handler;
43extern struct xattr_handler ocfs2_xattr_acl_access_handler; 43extern const struct xattr_handler ocfs2_xattr_acl_access_handler;
44extern struct xattr_handler ocfs2_xattr_acl_default_handler; 44extern const struct xattr_handler ocfs2_xattr_acl_default_handler;
45extern struct xattr_handler *ocfs2_xattr_handlers[]; 45extern const struct xattr_handler *ocfs2_xattr_handlers[];
46 46
47ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); 47ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
48int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int, 48int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index c82af6acc2e7..089839a6cc64 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -3,7 +3,6 @@
3 * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com> 3 * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com>
4 * Released under GPL v2. 4 * Released under GPL v2.
5 */ 5 */
6#include <linux/version.h>
7#include <linux/module.h> 6#include <linux/module.h>
8#include <linux/sched.h> 7#include <linux/sched.h>
9#include <linux/slab.h> 8#include <linux/slab.h>
@@ -38,9 +37,7 @@ struct inode *omfs_new_inode(struct inode *dir, int mode)
38 goto fail; 37 goto fail;
39 38
40 inode->i_ino = new_block; 39 inode->i_ino = new_block;
41 inode->i_mode = mode; 40 inode_init_owner(inode, NULL, mode);
42 inode->i_uid = current_fsuid();
43 inode->i_gid = current_fsgid();
44 inode->i_mapping->a_ops = &omfs_aops; 41 inode->i_mapping->a_ops = &omfs_aops;
45 42
46 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 43 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/open.c b/fs/open.c
index 74e5cd9f718e..5463266db9e6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -17,7 +17,6 @@
17#include <linux/securebits.h> 17#include <linux/securebits.h>
18#include <linux/security.h> 18#include <linux/security.h>
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/vfs.h>
21#include <linux/fcntl.h> 20#include <linux/fcntl.h>
22#include <linux/slab.h> 21#include <linux/slab.h>
23#include <asm/uaccess.h> 22#include <asm/uaccess.h>
@@ -33,171 +32,6 @@
33 32
34#include "internal.h" 33#include "internal.h"
35 34
36int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
37{
38 int retval = -ENODEV;
39
40 if (dentry) {
41 retval = -ENOSYS;
42 if (dentry->d_sb->s_op->statfs) {
43 memset(buf, 0, sizeof(*buf));
44 retval = security_sb_statfs(dentry);
45 if (retval)
46 return retval;
47 retval = dentry->d_sb->s_op->statfs(dentry, buf);
48 if (retval == 0 && buf->f_frsize == 0)
49 buf->f_frsize = buf->f_bsize;
50 }
51 }
52 return retval;
53}
54
55EXPORT_SYMBOL(vfs_statfs);
56
57static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
58{
59 struct kstatfs st;
60 int retval;
61
62 retval = vfs_statfs(dentry, &st);
63 if (retval)
64 return retval;
65
66 if (sizeof(*buf) == sizeof(st))
67 memcpy(buf, &st, sizeof(st));
68 else {
69 if (sizeof buf->f_blocks == 4) {
70 if ((st.f_blocks | st.f_bfree | st.f_bavail |
71 st.f_bsize | st.f_frsize) &
72 0xffffffff00000000ULL)
73 return -EOVERFLOW;
74 /*
75 * f_files and f_ffree may be -1; it's okay to stuff
76 * that into 32 bits
77 */
78 if (st.f_files != -1 &&
79 (st.f_files & 0xffffffff00000000ULL))
80 return -EOVERFLOW;
81 if (st.f_ffree != -1 &&
82 (st.f_ffree & 0xffffffff00000000ULL))
83 return -EOVERFLOW;
84 }
85
86 buf->f_type = st.f_type;
87 buf->f_bsize = st.f_bsize;
88 buf->f_blocks = st.f_blocks;
89 buf->f_bfree = st.f_bfree;
90 buf->f_bavail = st.f_bavail;
91 buf->f_files = st.f_files;
92 buf->f_ffree = st.f_ffree;
93 buf->f_fsid = st.f_fsid;
94 buf->f_namelen = st.f_namelen;
95 buf->f_frsize = st.f_frsize;
96 memset(buf->f_spare, 0, sizeof(buf->f_spare));
97 }
98 return 0;
99}
100
101static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
102{
103 struct kstatfs st;
104 int retval;
105
106 retval = vfs_statfs(dentry, &st);
107 if (retval)
108 return retval;
109
110 if (sizeof(*buf) == sizeof(st))
111 memcpy(buf, &st, sizeof(st));
112 else {
113 buf->f_type = st.f_type;
114 buf->f_bsize = st.f_bsize;
115 buf->f_blocks = st.f_blocks;
116 buf->f_bfree = st.f_bfree;
117 buf->f_bavail = st.f_bavail;
118 buf->f_files = st.f_files;
119 buf->f_ffree = st.f_ffree;
120 buf->f_fsid = st.f_fsid;
121 buf->f_namelen = st.f_namelen;
122 buf->f_frsize = st.f_frsize;
123 memset(buf->f_spare, 0, sizeof(buf->f_spare));
124 }
125 return 0;
126}
127
128SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
129{
130 struct path path;
131 int error;
132
133 error = user_path(pathname, &path);
134 if (!error) {
135 struct statfs tmp;
136 error = vfs_statfs_native(path.dentry, &tmp);
137 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
138 error = -EFAULT;
139 path_put(&path);
140 }
141 return error;
142}
143
144SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
145{
146 struct path path;
147 long error;
148
149 if (sz != sizeof(*buf))
150 return -EINVAL;
151 error = user_path(pathname, &path);
152 if (!error) {
153 struct statfs64 tmp;
154 error = vfs_statfs64(path.dentry, &tmp);
155 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
156 error = -EFAULT;
157 path_put(&path);
158 }
159 return error;
160}
161
162SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
163{
164 struct file * file;
165 struct statfs tmp;
166 int error;
167
168 error = -EBADF;
169 file = fget(fd);
170 if (!file)
171 goto out;
172 error = vfs_statfs_native(file->f_path.dentry, &tmp);
173 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
174 error = -EFAULT;
175 fput(file);
176out:
177 return error;
178}
179
180SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
181{
182 struct file * file;
183 struct statfs64 tmp;
184 int error;
185
186 if (sz != sizeof(*buf))
187 return -EINVAL;
188
189 error = -EBADF;
190 file = fget(fd);
191 if (!file)
192 goto out;
193 error = vfs_statfs64(file->f_path.dentry, &tmp);
194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
195 error = -EFAULT;
196 fput(file);
197out:
198 return error;
199}
200
201int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 35int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
202 struct file *filp) 36 struct file *filp)
203{ 37{
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
index a97b477ac0fc..6921e7890be6 100644
--- a/fs/partitions/acorn.c
+++ b/fs/partitions/acorn.c
@@ -70,14 +70,14 @@ struct riscix_record {
70 70
71#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ 71#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
72 defined(CONFIG_ACORN_PARTITION_ADFS) 72 defined(CONFIG_ACORN_PARTITION_ADFS)
73static int 73static int riscix_partition(struct parsed_partitions *state,
74riscix_partition(struct parsed_partitions *state, struct block_device *bdev, 74 unsigned long first_sect, int slot,
75 unsigned long first_sect, int slot, unsigned long nr_sects) 75 unsigned long nr_sects)
76{ 76{
77 Sector sect; 77 Sector sect;
78 struct riscix_record *rr; 78 struct riscix_record *rr;
79 79
80 rr = (struct riscix_record *)read_dev_sector(bdev, first_sect, &sect); 80 rr = read_part_sector(state, first_sect, &sect);
81 if (!rr) 81 if (!rr)
82 return -1; 82 return -1;
83 83
@@ -123,9 +123,9 @@ struct linux_part {
123 123
124#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ 124#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
125 defined(CONFIG_ACORN_PARTITION_ADFS) 125 defined(CONFIG_ACORN_PARTITION_ADFS)
126static int 126static int linux_partition(struct parsed_partitions *state,
127linux_partition(struct parsed_partitions *state, struct block_device *bdev, 127 unsigned long first_sect, int slot,
128 unsigned long first_sect, int slot, unsigned long nr_sects) 128 unsigned long nr_sects)
129{ 129{
130 Sector sect; 130 Sector sect;
131 struct linux_part *linuxp; 131 struct linux_part *linuxp;
@@ -135,7 +135,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev,
135 135
136 put_partition(state, slot++, first_sect, size); 136 put_partition(state, slot++, first_sect, size);
137 137
138 linuxp = (struct linux_part *)read_dev_sector(bdev, first_sect, &sect); 138 linuxp = read_part_sector(state, first_sect, &sect);
139 if (!linuxp) 139 if (!linuxp)
140 return -1; 140 return -1;
141 141
@@ -157,8 +157,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev,
157#endif 157#endif
158 158
159#ifdef CONFIG_ACORN_PARTITION_CUMANA 159#ifdef CONFIG_ACORN_PARTITION_CUMANA
160int 160int adfspart_check_CUMANA(struct parsed_partitions *state)
161adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev)
162{ 161{
163 unsigned long first_sector = 0; 162 unsigned long first_sector = 0;
164 unsigned int start_blk = 0; 163 unsigned int start_blk = 0;
@@ -185,7 +184,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev
185 struct adfs_discrecord *dr; 184 struct adfs_discrecord *dr;
186 unsigned int nr_sects; 185 unsigned int nr_sects;
187 186
188 data = read_dev_sector(bdev, start_blk * 2 + 6, &sect); 187 data = read_part_sector(state, start_blk * 2 + 6, &sect);
189 if (!data) 188 if (!data)
190 return -1; 189 return -1;
191 190
@@ -217,14 +216,14 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev
217#ifdef CONFIG_ACORN_PARTITION_RISCIX 216#ifdef CONFIG_ACORN_PARTITION_RISCIX
218 case PARTITION_RISCIX_SCSI: 217 case PARTITION_RISCIX_SCSI:
219 /* RISCiX - we don't know how to find the next one. */ 218 /* RISCiX - we don't know how to find the next one. */
220 slot = riscix_partition(state, bdev, first_sector, 219 slot = riscix_partition(state, first_sector, slot,
221 slot, nr_sects); 220 nr_sects);
222 break; 221 break;
223#endif 222#endif
224 223
225 case PARTITION_LINUX: 224 case PARTITION_LINUX:
226 slot = linux_partition(state, bdev, first_sector, 225 slot = linux_partition(state, first_sector, slot,
227 slot, nr_sects); 226 nr_sects);
228 break; 227 break;
229 } 228 }
230 put_dev_sector(sect); 229 put_dev_sector(sect);
@@ -249,8 +248,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev
249 * hda1 = ADFS partition on first drive. 248 * hda1 = ADFS partition on first drive.
250 * hda2 = non-ADFS partition. 249 * hda2 = non-ADFS partition.
251 */ 250 */
252int 251int adfspart_check_ADFS(struct parsed_partitions *state)
253adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev)
254{ 252{
255 unsigned long start_sect, nr_sects, sectscyl, heads; 253 unsigned long start_sect, nr_sects, sectscyl, heads;
256 Sector sect; 254 Sector sect;
@@ -259,7 +257,7 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev)
259 unsigned char id; 257 unsigned char id;
260 int slot = 1; 258 int slot = 1;
261 259
262 data = read_dev_sector(bdev, 6, &sect); 260 data = read_part_sector(state, 6, &sect);
263 if (!data) 261 if (!data)
264 return -1; 262 return -1;
265 263
@@ -278,21 +276,21 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev)
278 /* 276 /*
279 * Work out start of non-adfs partition. 277 * Work out start of non-adfs partition.
280 */ 278 */
281 nr_sects = (bdev->bd_inode->i_size >> 9) - start_sect; 279 nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect;
282 280
283 if (start_sect) { 281 if (start_sect) {
284 switch (id) { 282 switch (id) {
285#ifdef CONFIG_ACORN_PARTITION_RISCIX 283#ifdef CONFIG_ACORN_PARTITION_RISCIX
286 case PARTITION_RISCIX_SCSI: 284 case PARTITION_RISCIX_SCSI:
287 case PARTITION_RISCIX_MFM: 285 case PARTITION_RISCIX_MFM:
288 slot = riscix_partition(state, bdev, start_sect, 286 slot = riscix_partition(state, start_sect, slot,
289 slot, nr_sects); 287 nr_sects);
290 break; 288 break;
291#endif 289#endif
292 290
293 case PARTITION_LINUX: 291 case PARTITION_LINUX:
294 slot = linux_partition(state, bdev, start_sect, 292 slot = linux_partition(state, start_sect, slot,
295 slot, nr_sects); 293 nr_sects);
296 break; 294 break;
297 } 295 }
298 } 296 }
@@ -308,10 +306,11 @@ struct ics_part {
308 __le32 size; 306 __le32 size;
309}; 307};
310 308
311static int adfspart_check_ICSLinux(struct block_device *bdev, unsigned long block) 309static int adfspart_check_ICSLinux(struct parsed_partitions *state,
310 unsigned long block)
312{ 311{
313 Sector sect; 312 Sector sect;
314 unsigned char *data = read_dev_sector(bdev, block, &sect); 313 unsigned char *data = read_part_sector(state, block, &sect);
315 int result = 0; 314 int result = 0;
316 315
317 if (data) { 316 if (data) {
@@ -349,8 +348,7 @@ static inline int valid_ics_sector(const unsigned char *data)
349 * hda2 = ADFS partition 1 on first drive. 348 * hda2 = ADFS partition 1 on first drive.
350 * ..etc.. 349 * ..etc..
351 */ 350 */
352int 351int adfspart_check_ICS(struct parsed_partitions *state)
353adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev)
354{ 352{
355 const unsigned char *data; 353 const unsigned char *data;
356 const struct ics_part *p; 354 const struct ics_part *p;
@@ -360,7 +358,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev)
360 /* 358 /*
361 * Try ICS style partitions - sector 0 contains partition info. 359 * Try ICS style partitions - sector 0 contains partition info.
362 */ 360 */
363 data = read_dev_sector(bdev, 0, &sect); 361 data = read_part_sector(state, 0, &sect);
364 if (!data) 362 if (!data)
365 return -1; 363 return -1;
366 364
@@ -392,7 +390,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev)
392 * partition is. We must not make this visible 390 * partition is. We must not make this visible
393 * to the filesystem. 391 * to the filesystem.
394 */ 392 */
395 if (size > 1 && adfspart_check_ICSLinux(bdev, start)) { 393 if (size > 1 && adfspart_check_ICSLinux(state, start)) {
396 start += 1; 394 start += 1;
397 size -= 1; 395 size -= 1;
398 } 396 }
@@ -446,8 +444,7 @@ static inline int valid_ptec_sector(const unsigned char *data)
446 * hda2 = ADFS partition 1 on first drive. 444 * hda2 = ADFS partition 1 on first drive.
447 * ..etc.. 445 * ..etc..
448 */ 446 */
449int 447int adfspart_check_POWERTEC(struct parsed_partitions *state)
450adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev)
451{ 448{
452 Sector sect; 449 Sector sect;
453 const unsigned char *data; 450 const unsigned char *data;
@@ -455,7 +452,7 @@ adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bd
455 int slot = 1; 452 int slot = 1;
456 int i; 453 int i;
457 454
458 data = read_dev_sector(bdev, 0, &sect); 455 data = read_part_sector(state, 0, &sect);
459 if (!data) 456 if (!data)
460 return -1; 457 return -1;
461 458
@@ -508,8 +505,7 @@ static const char eesox_name[] = {
508 * 1. The individual ADFS boot block entries that are placed on the disk. 505 * 1. The individual ADFS boot block entries that are placed on the disk.
509 * 2. The start address of the next entry. 506 * 2. The start address of the next entry.
510 */ 507 */
511int 508int adfspart_check_EESOX(struct parsed_partitions *state)
512adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev)
513{ 509{
514 Sector sect; 510 Sector sect;
515 const unsigned char *data; 511 const unsigned char *data;
@@ -518,7 +514,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev)
518 sector_t start = 0; 514 sector_t start = 0;
519 int i, slot = 1; 515 int i, slot = 1;
520 516
521 data = read_dev_sector(bdev, 7, &sect); 517 data = read_part_sector(state, 7, &sect);
522 if (!data) 518 if (!data)
523 return -1; 519 return -1;
524 520
@@ -545,7 +541,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev)
545 if (i != 0) { 541 if (i != 0) {
546 sector_t size; 542 sector_t size;
547 543
548 size = get_capacity(bdev->bd_disk); 544 size = get_capacity(state->bdev->bd_disk);
549 put_partition(state, slot++, start, size - start); 545 put_partition(state, slot++, start, size - start);
550 printk("\n"); 546 printk("\n");
551 } 547 }
diff --git a/fs/partitions/acorn.h b/fs/partitions/acorn.h
index 81fd50ecc080..ede828529692 100644
--- a/fs/partitions/acorn.h
+++ b/fs/partitions/acorn.h
@@ -7,8 +7,8 @@
7 * format, and everyone stick to it? 7 * format, and everyone stick to it?
8 */ 8 */
9 9
10int adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev); 10int adfspart_check_CUMANA(struct parsed_partitions *state);
11int adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev); 11int adfspart_check_ADFS(struct parsed_partitions *state);
12int adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev); 12int adfspart_check_ICS(struct parsed_partitions *state);
13int adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev); 13int adfspart_check_POWERTEC(struct parsed_partitions *state);
14int adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev); 14int adfspart_check_EESOX(struct parsed_partitions *state);
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c
index 9917a8c360f2..ba443d4229f8 100644
--- a/fs/partitions/amiga.c
+++ b/fs/partitions/amiga.c
@@ -23,8 +23,7 @@ checksum_block(__be32 *m, int size)
23 return sum; 23 return sum;
24} 24}
25 25
26int 26int amiga_partition(struct parsed_partitions *state)
27amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
28{ 27{
29 Sector sect; 28 Sector sect;
30 unsigned char *data; 29 unsigned char *data;
@@ -38,11 +37,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
38 for (blk = 0; ; blk++, put_dev_sector(sect)) { 37 for (blk = 0; ; blk++, put_dev_sector(sect)) {
39 if (blk == RDB_ALLOCATION_LIMIT) 38 if (blk == RDB_ALLOCATION_LIMIT)
40 goto rdb_done; 39 goto rdb_done;
41 data = read_dev_sector(bdev, blk, &sect); 40 data = read_part_sector(state, blk, &sect);
42 if (!data) { 41 if (!data) {
43 if (warn_no_part) 42 if (warn_no_part)
44 printk("Dev %s: unable to read RDB block %d\n", 43 printk("Dev %s: unable to read RDB block %d\n",
45 bdevname(bdev, b), blk); 44 bdevname(state->bdev, b), blk);
46 res = -1; 45 res = -1;
47 goto rdb_done; 46 goto rdb_done;
48 } 47 }
@@ -64,7 +63,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
64 } 63 }
65 64
66 printk("Dev %s: RDB in block %d has bad checksum\n", 65 printk("Dev %s: RDB in block %d has bad checksum\n",
67 bdevname(bdev, b), blk); 66 bdevname(state->bdev, b), blk);
68 } 67 }
69 68
70 /* blksize is blocks per 512 byte standard block */ 69 /* blksize is blocks per 512 byte standard block */
@@ -75,11 +74,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
75 put_dev_sector(sect); 74 put_dev_sector(sect);
76 for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { 75 for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
77 blk *= blksize; /* Read in terms partition table understands */ 76 blk *= blksize; /* Read in terms partition table understands */
78 data = read_dev_sector(bdev, blk, &sect); 77 data = read_part_sector(state, blk, &sect);
79 if (!data) { 78 if (!data) {
80 if (warn_no_part) 79 if (warn_no_part)
81 printk("Dev %s: unable to read partition block %d\n", 80 printk("Dev %s: unable to read partition block %d\n",
82 bdevname(bdev, b), blk); 81 bdevname(state->bdev, b), blk);
83 res = -1; 82 res = -1;
84 goto rdb_done; 83 goto rdb_done;
85 } 84 }
diff --git a/fs/partitions/amiga.h b/fs/partitions/amiga.h
index 2f3e9ce22d53..d094585cadaa 100644
--- a/fs/partitions/amiga.h
+++ b/fs/partitions/amiga.h
@@ -2,5 +2,5 @@
2 * fs/partitions/amiga.h 2 * fs/partitions/amiga.h
3 */ 3 */
4 4
5int amiga_partition(struct parsed_partitions *state, struct block_device *bdev); 5int amiga_partition(struct parsed_partitions *state);
6 6
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c
index 1f3572d5b755..4439ff1b6cec 100644
--- a/fs/partitions/atari.c
+++ b/fs/partitions/atari.c
@@ -30,7 +30,7 @@ static inline int OK_id(char *s)
30 memcmp (s, "RAW", 3) == 0 ; 30 memcmp (s, "RAW", 3) == 0 ;
31} 31}
32 32
33int atari_partition(struct parsed_partitions *state, struct block_device *bdev) 33int atari_partition(struct parsed_partitions *state)
34{ 34{
35 Sector sect; 35 Sector sect;
36 struct rootsector *rs; 36 struct rootsector *rs;
@@ -42,12 +42,12 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev)
42 int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ 42 int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
43#endif 43#endif
44 44
45 rs = (struct rootsector *) read_dev_sector(bdev, 0, &sect); 45 rs = read_part_sector(state, 0, &sect);
46 if (!rs) 46 if (!rs)
47 return -1; 47 return -1;
48 48
49 /* Verify this is an Atari rootsector: */ 49 /* Verify this is an Atari rootsector: */
50 hd_size = bdev->bd_inode->i_size >> 9; 50 hd_size = state->bdev->bd_inode->i_size >> 9;
51 if (!VALID_PARTITION(&rs->part[0], hd_size) && 51 if (!VALID_PARTITION(&rs->part[0], hd_size) &&
52 !VALID_PARTITION(&rs->part[1], hd_size) && 52 !VALID_PARTITION(&rs->part[1], hd_size) &&
53 !VALID_PARTITION(&rs->part[2], hd_size) && 53 !VALID_PARTITION(&rs->part[2], hd_size) &&
@@ -84,7 +84,7 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev)
84 printk(" XGM<"); 84 printk(" XGM<");
85 partsect = extensect = be32_to_cpu(pi->st); 85 partsect = extensect = be32_to_cpu(pi->st);
86 while (1) { 86 while (1) {
87 xrs = (struct rootsector *)read_dev_sector(bdev, partsect, &sect2); 87 xrs = read_part_sector(state, partsect, &sect2);
88 if (!xrs) { 88 if (!xrs) {
89 printk (" block %ld read failed\n", partsect); 89 printk (" block %ld read failed\n", partsect);
90 put_dev_sector(sect); 90 put_dev_sector(sect);
diff --git a/fs/partitions/atari.h b/fs/partitions/atari.h
index 63186b00e135..fe2d32a89f36 100644
--- a/fs/partitions/atari.h
+++ b/fs/partitions/atari.h
@@ -31,4 +31,4 @@ struct rootsector
31 u16 checksum; /* checksum for bootable disks */ 31 u16 checksum; /* checksum for bootable disks */
32} __attribute__((__packed__)); 32} __attribute__((__packed__));
33 33
34int atari_partition(struct parsed_partitions *state, struct block_device *bdev); 34int atari_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index e238ab23a9e7..5dcd4b0c5533 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -45,7 +45,7 @@ extern void md_autodetect_dev(dev_t dev);
45 45
46int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ 46int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
47 47
48static int (*check_part[])(struct parsed_partitions *, struct block_device *) = { 48static int (*check_part[])(struct parsed_partitions *) = {
49 /* 49 /*
50 * Probe partition formats with tables at disk address 0 50 * Probe partition formats with tables at disk address 0
51 * that also have an ADFS boot block at 0xdc0. 51 * that also have an ADFS boot block at 0xdc0.
@@ -161,10 +161,11 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
161 struct parsed_partitions *state; 161 struct parsed_partitions *state;
162 int i, res, err; 162 int i, res, err;
163 163
164 state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL); 164 state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
165 if (!state) 165 if (!state)
166 return NULL; 166 return NULL;
167 167
168 state->bdev = bdev;
168 disk_name(hd, 0, state->name); 169 disk_name(hd, 0, state->name);
169 printk(KERN_INFO " %s:", state->name); 170 printk(KERN_INFO " %s:", state->name);
170 if (isdigit(state->name[strlen(state->name)-1])) 171 if (isdigit(state->name[strlen(state->name)-1]))
@@ -174,7 +175,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
174 i = res = err = 0; 175 i = res = err = 0;
175 while (!res && check_part[i]) { 176 while (!res && check_part[i]) {
176 memset(&state->parts, 0, sizeof(state->parts)); 177 memset(&state->parts, 0, sizeof(state->parts));
177 res = check_part[i++](state, bdev); 178 res = check_part[i++](state);
178 if (res < 0) { 179 if (res < 0) {
179 /* We have hit an I/O error which we don't report now. 180 /* We have hit an I/O error which we don't report now.
180 * But record it, and let the others do their job. 181 * But record it, and let the others do their job.
@@ -186,6 +187,8 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
186 } 187 }
187 if (res > 0) 188 if (res > 0)
188 return state; 189 return state;
190 if (state->access_beyond_eod)
191 err = -ENOSPC;
189 if (err) 192 if (err)
190 /* The partition is unrecognized. So report I/O errors if there were any */ 193 /* The partition is unrecognized. So report I/O errors if there were any */
191 res = err; 194 res = err;
@@ -538,12 +541,33 @@ exit:
538 disk_part_iter_exit(&piter); 541 disk_part_iter_exit(&piter);
539} 542}
540 543
544static bool disk_unlock_native_capacity(struct gendisk *disk)
545{
546 const struct block_device_operations *bdops = disk->fops;
547
548 if (bdops->unlock_native_capacity &&
549 !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
550 printk(KERN_CONT "enabling native capacity\n");
551 bdops->unlock_native_capacity(disk);
552 disk->flags |= GENHD_FL_NATIVE_CAPACITY;
553 return true;
554 } else {
555 printk(KERN_CONT "truncated\n");
556 return false;
557 }
558}
559
541int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 560int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
542{ 561{
562 struct parsed_partitions *state = NULL;
543 struct disk_part_iter piter; 563 struct disk_part_iter piter;
544 struct hd_struct *part; 564 struct hd_struct *part;
545 struct parsed_partitions *state;
546 int p, highest, res; 565 int p, highest, res;
566rescan:
567 if (state && !IS_ERR(state)) {
568 kfree(state);
569 state = NULL;
570 }
547 571
548 if (bdev->bd_part_count) 572 if (bdev->bd_part_count)
549 return -EBUSY; 573 return -EBUSY;
@@ -562,8 +586,32 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
562 bdev->bd_invalidated = 0; 586 bdev->bd_invalidated = 0;
563 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 587 if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
564 return 0; 588 return 0;
565 if (IS_ERR(state)) /* I/O error reading the partition table */ 589 if (IS_ERR(state)) {
590 /*
591 * I/O error reading the partition table. If any
592 * partition code tried to read beyond EOD, retry
593 * after unlocking native capacity.
594 */
595 if (PTR_ERR(state) == -ENOSPC) {
596 printk(KERN_WARNING "%s: partition table beyond EOD, ",
597 disk->disk_name);
598 if (disk_unlock_native_capacity(disk))
599 goto rescan;
600 }
566 return -EIO; 601 return -EIO;
602 }
603 /*
604 * If any partition code tried to read beyond EOD, try
605 * unlocking native capacity even if partition table is
606 * sucessfully read as we could be missing some partitions.
607 */
608 if (state->access_beyond_eod) {
609 printk(KERN_WARNING
610 "%s: partition table partially beyond EOD, ",
611 disk->disk_name);
612 if (disk_unlock_native_capacity(disk))
613 goto rescan;
614 }
567 615
568 /* tell userspace that the media / partition table may have changed */ 616 /* tell userspace that the media / partition table may have changed */
569 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 617 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
@@ -581,7 +629,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
581 /* add partitions */ 629 /* add partitions */
582 for (p = 1; p < state->limit; p++) { 630 for (p = 1; p < state->limit; p++) {
583 sector_t size, from; 631 sector_t size, from;
584try_scan: 632
585 size = state->parts[p].size; 633 size = state->parts[p].size;
586 if (!size) 634 if (!size)
587 continue; 635 continue;
@@ -589,30 +637,21 @@ try_scan:
589 from = state->parts[p].from; 637 from = state->parts[p].from;
590 if (from >= get_capacity(disk)) { 638 if (from >= get_capacity(disk)) {
591 printk(KERN_WARNING 639 printk(KERN_WARNING
592 "%s: p%d ignored, start %llu is behind the end of the disk\n", 640 "%s: p%d start %llu is beyond EOD, ",
593 disk->disk_name, p, (unsigned long long) from); 641 disk->disk_name, p, (unsigned long long) from);
642 if (disk_unlock_native_capacity(disk))
643 goto rescan;
594 continue; 644 continue;
595 } 645 }
596 646
597 if (from + size > get_capacity(disk)) { 647 if (from + size > get_capacity(disk)) {
598 const struct block_device_operations *bdops = disk->fops;
599 unsigned long long capacity;
600
601 printk(KERN_WARNING 648 printk(KERN_WARNING
602 "%s: p%d size %llu exceeds device capacity, ", 649 "%s: p%d size %llu extends beyond EOD, ",
603 disk->disk_name, p, (unsigned long long) size); 650 disk->disk_name, p, (unsigned long long) size);
604 651
605 if (bdops->set_capacity && 652 if (disk_unlock_native_capacity(disk)) {
606 (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) { 653 /* free state and restart */
607 printk(KERN_CONT "enabling native capacity\n"); 654 goto rescan;
608 capacity = bdops->set_capacity(disk, ~0ULL);
609 disk->flags |= GENHD_FL_NATIVE_CAPACITY;
610 if (capacity > get_capacity(disk)) {
611 set_capacity(disk, capacity);
612 check_disk_size_change(disk, bdev);
613 bdev->bd_invalidated = 0;
614 }
615 goto try_scan;
616 } else { 655 } else {
617 /* 656 /*
618 * we can not ignore partitions of broken tables 657 * we can not ignore partitions of broken tables
@@ -620,7 +659,6 @@ try_scan:
620 * we limit them to the end of the disk to avoid 659 * we limit them to the end of the disk to avoid
621 * creating invalid block devices 660 * creating invalid block devices
622 */ 661 */
623 printk(KERN_CONT "limited to end of disk\n");
624 size = get_capacity(disk) - from; 662 size = get_capacity(disk) - from;
625 } 663 }
626 } 664 }
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 98dbe1a84528..52f8bd399396 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -6,6 +6,7 @@
6 * description. 6 * description.
7 */ 7 */
8struct parsed_partitions { 8struct parsed_partitions {
9 struct block_device *bdev;
9 char name[BDEVNAME_SIZE]; 10 char name[BDEVNAME_SIZE];
10 struct { 11 struct {
11 sector_t from; 12 sector_t from;
@@ -14,8 +15,19 @@ struct parsed_partitions {
14 } parts[DISK_MAX_PARTS]; 15 } parts[DISK_MAX_PARTS];
15 int next; 16 int next;
16 int limit; 17 int limit;
18 bool access_beyond_eod;
17}; 19};
18 20
21static inline void *read_part_sector(struct parsed_partitions *state,
22 sector_t n, Sector *p)
23{
24 if (n >= get_capacity(state->bdev->bd_disk)) {
25 state->access_beyond_eod = true;
26 return NULL;
27 }
28 return read_dev_sector(state->bdev, n, p);
29}
30
19static inline void 31static inline void
20put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) 32put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
21{ 33{
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 91babdae7587..9efb2cfe2410 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -140,8 +140,7 @@ efi_crc32(const void *buf, unsigned long len)
140 * the part[0] entry for this disk, and is the number of 140 * the part[0] entry for this disk, and is the number of
141 * physical sectors available on the disk. 141 * physical sectors available on the disk.
142 */ 142 */
143static u64 143static u64 last_lba(struct block_device *bdev)
144last_lba(struct block_device *bdev)
145{ 144{
146 if (!bdev || !bdev->bd_inode) 145 if (!bdev || !bdev->bd_inode)
147 return 0; 146 return 0;
@@ -181,27 +180,28 @@ is_pmbr_valid(legacy_mbr *mbr)
181 180
182/** 181/**
183 * read_lba(): Read bytes from disk, starting at given LBA 182 * read_lba(): Read bytes from disk, starting at given LBA
184 * @bdev 183 * @state
185 * @lba 184 * @lba
186 * @buffer 185 * @buffer
187 * @size_t 186 * @size_t
188 * 187 *
189 * Description: Reads @count bytes from @bdev into @buffer. 188 * Description: Reads @count bytes from @state->bdev into @buffer.
190 * Returns number of bytes read on success, 0 on error. 189 * Returns number of bytes read on success, 0 on error.
191 */ 190 */
192static size_t 191static size_t read_lba(struct parsed_partitions *state,
193read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) 192 u64 lba, u8 *buffer, size_t count)
194{ 193{
195 size_t totalreadcount = 0; 194 size_t totalreadcount = 0;
195 struct block_device *bdev = state->bdev;
196 sector_t n = lba * (bdev_logical_block_size(bdev) / 512); 196 sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
197 197
198 if (!bdev || !buffer || lba > last_lba(bdev)) 198 if (!buffer || lba > last_lba(bdev))
199 return 0; 199 return 0;
200 200
201 while (count) { 201 while (count) {
202 int copied = 512; 202 int copied = 512;
203 Sector sect; 203 Sector sect;
204 unsigned char *data = read_dev_sector(bdev, n++, &sect); 204 unsigned char *data = read_part_sector(state, n++, &sect);
205 if (!data) 205 if (!data)
206 break; 206 break;
207 if (copied > count) 207 if (copied > count)
@@ -217,19 +217,20 @@ read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count)
217 217
218/** 218/**
219 * alloc_read_gpt_entries(): reads partition entries from disk 219 * alloc_read_gpt_entries(): reads partition entries from disk
220 * @bdev 220 * @state
221 * @gpt - GPT header 221 * @gpt - GPT header
222 * 222 *
223 * Description: Returns ptes on success, NULL on error. 223 * Description: Returns ptes on success, NULL on error.
224 * Allocates space for PTEs based on information found in @gpt. 224 * Allocates space for PTEs based on information found in @gpt.
225 * Notes: remember to free pte when you're done! 225 * Notes: remember to free pte when you're done!
226 */ 226 */
227static gpt_entry * 227static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
228alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) 228 gpt_header *gpt)
229{ 229{
230 size_t count; 230 size_t count;
231 gpt_entry *pte; 231 gpt_entry *pte;
232 if (!bdev || !gpt) 232
233 if (!gpt)
233 return NULL; 234 return NULL;
234 235
235 count = le32_to_cpu(gpt->num_partition_entries) * 236 count = le32_to_cpu(gpt->num_partition_entries) *
@@ -240,7 +241,7 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt)
240 if (!pte) 241 if (!pte)
241 return NULL; 242 return NULL;
242 243
243 if (read_lba(bdev, le64_to_cpu(gpt->partition_entry_lba), 244 if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba),
244 (u8 *) pte, 245 (u8 *) pte,
245 count) < count) { 246 count) < count) {
246 kfree(pte); 247 kfree(pte);
@@ -252,27 +253,24 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt)
252 253
253/** 254/**
254 * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk 255 * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk
255 * @bdev 256 * @state
256 * @lba is the Logical Block Address of the partition table 257 * @lba is the Logical Block Address of the partition table
257 * 258 *
258 * Description: returns GPT header on success, NULL on error. Allocates 259 * Description: returns GPT header on success, NULL on error. Allocates
259 * and fills a GPT header starting at @ from @bdev. 260 * and fills a GPT header starting at @ from @state->bdev.
260 * Note: remember to free gpt when finished with it. 261 * Note: remember to free gpt when finished with it.
261 */ 262 */
262static gpt_header * 263static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
263alloc_read_gpt_header(struct block_device *bdev, u64 lba) 264 u64 lba)
264{ 265{
265 gpt_header *gpt; 266 gpt_header *gpt;
266 unsigned ssz = bdev_logical_block_size(bdev); 267 unsigned ssz = bdev_logical_block_size(state->bdev);
267
268 if (!bdev)
269 return NULL;
270 268
271 gpt = kzalloc(ssz, GFP_KERNEL); 269 gpt = kzalloc(ssz, GFP_KERNEL);
272 if (!gpt) 270 if (!gpt)
273 return NULL; 271 return NULL;
274 272
275 if (read_lba(bdev, lba, (u8 *) gpt, ssz) < ssz) { 273 if (read_lba(state, lba, (u8 *) gpt, ssz) < ssz) {
276 kfree(gpt); 274 kfree(gpt);
277 gpt=NULL; 275 gpt=NULL;
278 return NULL; 276 return NULL;
@@ -283,7 +281,7 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba)
283 281
284/** 282/**
285 * is_gpt_valid() - tests one GPT header and PTEs for validity 283 * is_gpt_valid() - tests one GPT header and PTEs for validity
286 * @bdev 284 * @state
287 * @lba is the logical block address of the GPT header to test 285 * @lba is the logical block address of the GPT header to test
288 * @gpt is a GPT header ptr, filled on return. 286 * @gpt is a GPT header ptr, filled on return.
289 * @ptes is a PTEs ptr, filled on return. 287 * @ptes is a PTEs ptr, filled on return.
@@ -291,16 +289,15 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba)
291 * Description: returns 1 if valid, 0 on error. 289 * Description: returns 1 if valid, 0 on error.
292 * If valid, returns pointers to newly allocated GPT header and PTEs. 290 * If valid, returns pointers to newly allocated GPT header and PTEs.
293 */ 291 */
294static int 292static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
295is_gpt_valid(struct block_device *bdev, u64 lba, 293 gpt_header **gpt, gpt_entry **ptes)
296 gpt_header **gpt, gpt_entry **ptes)
297{ 294{
298 u32 crc, origcrc; 295 u32 crc, origcrc;
299 u64 lastlba; 296 u64 lastlba;
300 297
301 if (!bdev || !gpt || !ptes) 298 if (!ptes)
302 return 0; 299 return 0;
303 if (!(*gpt = alloc_read_gpt_header(bdev, lba))) 300 if (!(*gpt = alloc_read_gpt_header(state, lba)))
304 return 0; 301 return 0;
305 302
306 /* Check the GUID Partition Table signature */ 303 /* Check the GUID Partition Table signature */
@@ -336,7 +333,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
336 /* Check the first_usable_lba and last_usable_lba are 333 /* Check the first_usable_lba and last_usable_lba are
337 * within the disk. 334 * within the disk.
338 */ 335 */
339 lastlba = last_lba(bdev); 336 lastlba = last_lba(state->bdev);
340 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { 337 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
341 pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n", 338 pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
342 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), 339 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
@@ -350,7 +347,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
350 goto fail; 347 goto fail;
351 } 348 }
352 349
353 if (!(*ptes = alloc_read_gpt_entries(bdev, *gpt))) 350 if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
354 goto fail; 351 goto fail;
355 352
356 /* Check the GUID Partition Entry Array CRC */ 353 /* Check the GUID Partition Entry Array CRC */
@@ -495,7 +492,7 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
495 492
496/** 493/**
497 * find_valid_gpt() - Search disk for valid GPT headers and PTEs 494 * find_valid_gpt() - Search disk for valid GPT headers and PTEs
498 * @bdev 495 * @state
499 * @gpt is a GPT header ptr, filled on return. 496 * @gpt is a GPT header ptr, filled on return.
500 * @ptes is a PTEs ptr, filled on return. 497 * @ptes is a PTEs ptr, filled on return.
501 * Description: Returns 1 if valid, 0 on error. 498 * Description: Returns 1 if valid, 0 on error.
@@ -508,24 +505,25 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
508 * This protects against devices which misreport their size, and forces 505 * This protects against devices which misreport their size, and forces
509 * the user to decide to use the Alternate GPT. 506 * the user to decide to use the Alternate GPT.
510 */ 507 */
511static int 508static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
512find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) 509 gpt_entry **ptes)
513{ 510{
514 int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; 511 int good_pgpt = 0, good_agpt = 0, good_pmbr = 0;
515 gpt_header *pgpt = NULL, *agpt = NULL; 512 gpt_header *pgpt = NULL, *agpt = NULL;
516 gpt_entry *pptes = NULL, *aptes = NULL; 513 gpt_entry *pptes = NULL, *aptes = NULL;
517 legacy_mbr *legacymbr; 514 legacy_mbr *legacymbr;
518 u64 lastlba; 515 u64 lastlba;
519 if (!bdev || !gpt || !ptes) 516
517 if (!ptes)
520 return 0; 518 return 0;
521 519
522 lastlba = last_lba(bdev); 520 lastlba = last_lba(state->bdev);
523 if (!force_gpt) { 521 if (!force_gpt) {
524 /* This will be added to the EFI Spec. per Intel after v1.02. */ 522 /* This will be added to the EFI Spec. per Intel after v1.02. */
525 legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL); 523 legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL);
526 if (legacymbr) { 524 if (legacymbr) {
527 read_lba(bdev, 0, (u8 *) legacymbr, 525 read_lba(state, 0, (u8 *) legacymbr,
528 sizeof (*legacymbr)); 526 sizeof (*legacymbr));
529 good_pmbr = is_pmbr_valid(legacymbr); 527 good_pmbr = is_pmbr_valid(legacymbr);
530 kfree(legacymbr); 528 kfree(legacymbr);
531 } 529 }
@@ -533,15 +531,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes)
533 goto fail; 531 goto fail;
534 } 532 }
535 533
536 good_pgpt = is_gpt_valid(bdev, GPT_PRIMARY_PARTITION_TABLE_LBA, 534 good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA,
537 &pgpt, &pptes); 535 &pgpt, &pptes);
538 if (good_pgpt) 536 if (good_pgpt)
539 good_agpt = is_gpt_valid(bdev, 537 good_agpt = is_gpt_valid(state,
540 le64_to_cpu(pgpt->alternate_lba), 538 le64_to_cpu(pgpt->alternate_lba),
541 &agpt, &aptes); 539 &agpt, &aptes);
542 if (!good_agpt && force_gpt) 540 if (!good_agpt && force_gpt)
543 good_agpt = is_gpt_valid(bdev, lastlba, 541 good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes);
544 &agpt, &aptes);
545 542
546 /* The obviously unsuccessful case */ 543 /* The obviously unsuccessful case */
547 if (!good_pgpt && !good_agpt) 544 if (!good_pgpt && !good_agpt)
@@ -583,9 +580,8 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes)
583} 580}
584 581
585/** 582/**
586 * efi_partition(struct parsed_partitions *state, struct block_device *bdev) 583 * efi_partition(struct parsed_partitions *state)
587 * @state 584 * @state
588 * @bdev
589 * 585 *
590 * Description: called from check.c, if the disk contains GPT 586 * Description: called from check.c, if the disk contains GPT
591 * partitions, sets up partition entries in the kernel. 587 * partitions, sets up partition entries in the kernel.
@@ -602,15 +598,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes)
602 * 1 if successful 598 * 1 if successful
603 * 599 *
604 */ 600 */
605int 601int efi_partition(struct parsed_partitions *state)
606efi_partition(struct parsed_partitions *state, struct block_device *bdev)
607{ 602{
608 gpt_header *gpt = NULL; 603 gpt_header *gpt = NULL;
609 gpt_entry *ptes = NULL; 604 gpt_entry *ptes = NULL;
610 u32 i; 605 u32 i;
611 unsigned ssz = bdev_logical_block_size(bdev) / 512; 606 unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
612 607
613 if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) { 608 if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
614 kfree(gpt); 609 kfree(gpt);
615 kfree(ptes); 610 kfree(ptes);
616 return 0; 611 return 0;
@@ -623,7 +618,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
623 u64 size = le64_to_cpu(ptes[i].ending_lba) - 618 u64 size = le64_to_cpu(ptes[i].ending_lba) -
624 le64_to_cpu(ptes[i].starting_lba) + 1ULL; 619 le64_to_cpu(ptes[i].starting_lba) + 1ULL;
625 620
626 if (!is_pte_valid(&ptes[i], last_lba(bdev))) 621 if (!is_pte_valid(&ptes[i], last_lba(state->bdev)))
627 continue; 622 continue;
628 623
629 put_partition(state, i+1, start * ssz, size * ssz); 624 put_partition(state, i+1, start * ssz, size * ssz);
@@ -631,7 +626,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
631 /* If this is a RAID volume, tell md */ 626 /* If this is a RAID volume, tell md */
632 if (!efi_guidcmp(ptes[i].partition_type_guid, 627 if (!efi_guidcmp(ptes[i].partition_type_guid,
633 PARTITION_LINUX_RAID_GUID)) 628 PARTITION_LINUX_RAID_GUID))
634 state->parts[i+1].flags = 1; 629 state->parts[i + 1].flags = ADDPART_FLAG_RAID;
635 } 630 }
636 kfree(ptes); 631 kfree(ptes);
637 kfree(gpt); 632 kfree(gpt);
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h
index 6998b589abf9..b69ab729558f 100644
--- a/fs/partitions/efi.h
+++ b/fs/partitions/efi.h
@@ -110,7 +110,7 @@ typedef struct _legacy_mbr {
110} __attribute__ ((packed)) legacy_mbr; 110} __attribute__ ((packed)) legacy_mbr;
111 111
112/* Functions */ 112/* Functions */
113extern int efi_partition(struct parsed_partitions *state, struct block_device *bdev); 113extern int efi_partition(struct parsed_partitions *state);
114 114
115#endif 115#endif
116 116
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index fc71aab08460..3e73de5967ff 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -58,9 +58,9 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
58 58
59/* 59/*
60 */ 60 */
61int 61int ibm_partition(struct parsed_partitions *state)
62ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
63{ 62{
63 struct block_device *bdev = state->bdev;
64 int blocksize, res; 64 int blocksize, res;
65 loff_t i_size, offset, size, fmt_size; 65 loff_t i_size, offset, size, fmt_size;
66 dasd_information2_t *info; 66 dasd_information2_t *info;
@@ -100,7 +100,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
100 /* 100 /*
101 * Get volume label, extract name and type. 101 * Get volume label, extract name and type.
102 */ 102 */
103 data = read_dev_sector(bdev, info->label_block*(blocksize/512), &sect); 103 data = read_part_sector(state, info->label_block*(blocksize/512),
104 &sect);
104 if (data == NULL) 105 if (data == NULL)
105 goto out_readerr; 106 goto out_readerr;
106 107
@@ -193,8 +194,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
193 */ 194 */
194 blk = cchhb2blk(&label->vol.vtoc, geo) + 1; 195 blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
195 counter = 0; 196 counter = 0;
196 data = read_dev_sector(bdev, blk * (blocksize/512), 197 data = read_part_sector(state, blk * (blocksize/512),
197 &sect); 198 &sect);
198 while (data != NULL) { 199 while (data != NULL) {
199 struct vtoc_format1_label f1; 200 struct vtoc_format1_label f1;
200 201
@@ -208,9 +209,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
208 || f1.DS1FMTID == _ascebc['7'] 209 || f1.DS1FMTID == _ascebc['7']
209 || f1.DS1FMTID == _ascebc['9']) { 210 || f1.DS1FMTID == _ascebc['9']) {
210 blk++; 211 blk++;
211 data = read_dev_sector(bdev, blk * 212 data = read_part_sector(state,
212 (blocksize/512), 213 blk * (blocksize/512), &sect);
213 &sect);
214 continue; 214 continue;
215 } 215 }
216 216
@@ -230,9 +230,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
230 size * (blocksize >> 9)); 230 size * (blocksize >> 9));
231 counter++; 231 counter++;
232 blk++; 232 blk++;
233 data = read_dev_sector(bdev, 233 data = read_part_sector(state,
234 blk * (blocksize/512), 234 blk * (blocksize/512), &sect);
235 &sect);
236 } 235 }
237 236
238 if (!data) 237 if (!data)
diff --git a/fs/partitions/ibm.h b/fs/partitions/ibm.h
index 31f85a6ac459..08fb0804a812 100644
--- a/fs/partitions/ibm.h
+++ b/fs/partitions/ibm.h
@@ -1 +1 @@
int ibm_partition(struct parsed_partitions *, struct block_device *); int ibm_partition(struct parsed_partitions *);
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c
index 176d89bcf123..1cc928bb762f 100644
--- a/fs/partitions/karma.c
+++ b/fs/partitions/karma.c
@@ -9,7 +9,7 @@
9#include "check.h" 9#include "check.h"
10#include "karma.h" 10#include "karma.h"
11 11
12int karma_partition(struct parsed_partitions *state, struct block_device *bdev) 12int karma_partition(struct parsed_partitions *state)
13{ 13{
14 int i; 14 int i;
15 int slot = 1; 15 int slot = 1;
@@ -29,7 +29,7 @@ int karma_partition(struct parsed_partitions *state, struct block_device *bdev)
29 } __attribute__((packed)) *label; 29 } __attribute__((packed)) *label;
30 struct d_partition *p; 30 struct d_partition *p;
31 31
32 data = read_dev_sector(bdev, 0, &sect); 32 data = read_part_sector(state, 0, &sect);
33 if (!data) 33 if (!data)
34 return -1; 34 return -1;
35 35
diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h
index ecf7d3f2a3d8..c764b2e9df21 100644
--- a/fs/partitions/karma.h
+++ b/fs/partitions/karma.h
@@ -4,5 +4,5 @@
4 4
5#define KARMA_LABEL_MAGIC 0xAB56 5#define KARMA_LABEL_MAGIC 0xAB56
6 6
7int karma_partition(struct parsed_partitions *state, struct block_device *bdev); 7int karma_partition(struct parsed_partitions *state);
8 8
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 8652fb99e962..3ceca05b668c 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -309,7 +309,7 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1,
309 309
310/** 310/**
311 * ldm_validate_privheads - Compare the primary privhead with its backups 311 * ldm_validate_privheads - Compare the primary privhead with its backups
312 * @bdev: Device holding the LDM Database 312 * @state: Partition check state including device holding the LDM Database
313 * @ph1: Memory struct to fill with ph contents 313 * @ph1: Memory struct to fill with ph contents
314 * 314 *
315 * Read and compare all three privheads from disk. 315 * Read and compare all three privheads from disk.
@@ -321,8 +321,8 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1,
321 * Return: 'true' Success 321 * Return: 'true' Success
322 * 'false' Error 322 * 'false' Error
323 */ 323 */
324static bool ldm_validate_privheads (struct block_device *bdev, 324static bool ldm_validate_privheads(struct parsed_partitions *state,
325 struct privhead *ph1) 325 struct privhead *ph1)
326{ 326{
327 static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 }; 327 static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 };
328 struct privhead *ph[3] = { ph1 }; 328 struct privhead *ph[3] = { ph1 };
@@ -332,7 +332,7 @@ static bool ldm_validate_privheads (struct block_device *bdev,
332 long num_sects; 332 long num_sects;
333 int i; 333 int i;
334 334
335 BUG_ON (!bdev || !ph1); 335 BUG_ON (!state || !ph1);
336 336
337 ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL); 337 ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL);
338 ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL); 338 ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL);
@@ -346,8 +346,8 @@ static bool ldm_validate_privheads (struct block_device *bdev,
346 346
347 /* Read and parse privheads */ 347 /* Read and parse privheads */
348 for (i = 0; i < 3; i++) { 348 for (i = 0; i < 3; i++) {
349 data = read_dev_sector (bdev, 349 data = read_part_sector(state, ph[0]->config_start + off[i],
350 ph[0]->config_start + off[i], &sect); 350 &sect);
351 if (!data) { 351 if (!data) {
352 ldm_crit ("Disk read failed."); 352 ldm_crit ("Disk read failed.");
353 goto out; 353 goto out;
@@ -363,7 +363,7 @@ static bool ldm_validate_privheads (struct block_device *bdev,
363 } 363 }
364 } 364 }
365 365
366 num_sects = bdev->bd_inode->i_size >> 9; 366 num_sects = state->bdev->bd_inode->i_size >> 9;
367 367
368 if ((ph[0]->config_start > num_sects) || 368 if ((ph[0]->config_start > num_sects) ||
369 ((ph[0]->config_start + ph[0]->config_size) > num_sects)) { 369 ((ph[0]->config_start + ph[0]->config_size) > num_sects)) {
@@ -397,20 +397,20 @@ out:
397 397
398/** 398/**
399 * ldm_validate_tocblocks - Validate the table of contents and its backups 399 * ldm_validate_tocblocks - Validate the table of contents and its backups
400 * @bdev: Device holding the LDM Database 400 * @state: Partition check state including device holding the LDM Database
401 * @base: Offset, into @bdev, of the database 401 * @base: Offset, into @state->bdev, of the database
402 * @ldb: Cache of the database structures 402 * @ldb: Cache of the database structures
403 * 403 *
404 * Find and compare the four tables of contents of the LDM Database stored on 404 * Find and compare the four tables of contents of the LDM Database stored on
405 * @bdev and return the parsed information into @toc1. 405 * @state->bdev and return the parsed information into @toc1.
406 * 406 *
407 * The offsets and sizes of the configs are range-checked against a privhead. 407 * The offsets and sizes of the configs are range-checked against a privhead.
408 * 408 *
409 * Return: 'true' @toc1 contains validated TOCBLOCK info 409 * Return: 'true' @toc1 contains validated TOCBLOCK info
410 * 'false' @toc1 contents are undefined 410 * 'false' @toc1 contents are undefined
411 */ 411 */
412static bool ldm_validate_tocblocks(struct block_device *bdev, 412static bool ldm_validate_tocblocks(struct parsed_partitions *state,
413 unsigned long base, struct ldmdb *ldb) 413 unsigned long base, struct ldmdb *ldb)
414{ 414{
415 static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; 415 static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4};
416 struct tocblock *tb[4]; 416 struct tocblock *tb[4];
@@ -420,7 +420,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev,
420 int i, nr_tbs; 420 int i, nr_tbs;
421 bool result = false; 421 bool result = false;
422 422
423 BUG_ON(!bdev || !ldb); 423 BUG_ON(!state || !ldb);
424 ph = &ldb->ph; 424 ph = &ldb->ph;
425 tb[0] = &ldb->toc; 425 tb[0] = &ldb->toc;
426 tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL); 426 tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL);
@@ -437,7 +437,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev,
437 * skip any that fail as long as we get at least one valid TOCBLOCK. 437 * skip any that fail as long as we get at least one valid TOCBLOCK.
438 */ 438 */
439 for (nr_tbs = i = 0; i < 4; i++) { 439 for (nr_tbs = i = 0; i < 4; i++) {
440 data = read_dev_sector(bdev, base + off[i], &sect); 440 data = read_part_sector(state, base + off[i], &sect);
441 if (!data) { 441 if (!data) {
442 ldm_error("Disk read failed for TOCBLOCK %d.", i); 442 ldm_error("Disk read failed for TOCBLOCK %d.", i);
443 continue; 443 continue;
@@ -473,7 +473,7 @@ err:
473 473
474/** 474/**
475 * ldm_validate_vmdb - Read the VMDB and validate it 475 * ldm_validate_vmdb - Read the VMDB and validate it
476 * @bdev: Device holding the LDM Database 476 * @state: Partition check state including device holding the LDM Database
477 * @base: Offset, into @bdev, of the database 477 * @base: Offset, into @bdev, of the database
478 * @ldb: Cache of the database structures 478 * @ldb: Cache of the database structures
479 * 479 *
@@ -483,8 +483,8 @@ err:
483 * Return: 'true' @ldb contains validated VBDB info 483 * Return: 'true' @ldb contains validated VBDB info
484 * 'false' @ldb contents are undefined 484 * 'false' @ldb contents are undefined
485 */ 485 */
486static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base, 486static bool ldm_validate_vmdb(struct parsed_partitions *state,
487 struct ldmdb *ldb) 487 unsigned long base, struct ldmdb *ldb)
488{ 488{
489 Sector sect; 489 Sector sect;
490 u8 *data; 490 u8 *data;
@@ -492,12 +492,12 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
492 struct vmdb *vm; 492 struct vmdb *vm;
493 struct tocblock *toc; 493 struct tocblock *toc;
494 494
495 BUG_ON (!bdev || !ldb); 495 BUG_ON (!state || !ldb);
496 496
497 vm = &ldb->vm; 497 vm = &ldb->vm;
498 toc = &ldb->toc; 498 toc = &ldb->toc;
499 499
500 data = read_dev_sector (bdev, base + OFF_VMDB, &sect); 500 data = read_part_sector(state, base + OFF_VMDB, &sect);
501 if (!data) { 501 if (!data) {
502 ldm_crit ("Disk read failed."); 502 ldm_crit ("Disk read failed.");
503 return false; 503 return false;
@@ -534,21 +534,21 @@ out:
534 534
535/** 535/**
536 * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk 536 * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk
537 * @bdev: Device holding the LDM Database 537 * @state: Partition check state including device holding the LDM Database
538 * 538 *
539 * This function provides a weak test to decide whether the device is a dynamic 539 * This function provides a weak test to decide whether the device is a dynamic
540 * disk or not. It looks for an MS-DOS-style partition table containing at 540 * disk or not. It looks for an MS-DOS-style partition table containing at
541 * least one partition of type 0x42 (formerly SFS, now used by Windows for 541 * least one partition of type 0x42 (formerly SFS, now used by Windows for
542 * dynamic disks). 542 * dynamic disks).
543 * 543 *
544 * N.B. The only possible error can come from the read_dev_sector and that is 544 * N.B. The only possible error can come from the read_part_sector and that is
545 * only likely to happen if the underlying device is strange. If that IS 545 * only likely to happen if the underlying device is strange. If that IS
546 * the case we should return zero to let someone else try. 546 * the case we should return zero to let someone else try.
547 * 547 *
548 * Return: 'true' @bdev is a dynamic disk 548 * Return: 'true' @state->bdev is a dynamic disk
549 * 'false' @bdev is not a dynamic disk, or an error occurred 549 * 'false' @state->bdev is not a dynamic disk, or an error occurred
550 */ 550 */
551static bool ldm_validate_partition_table (struct block_device *bdev) 551static bool ldm_validate_partition_table(struct parsed_partitions *state)
552{ 552{
553 Sector sect; 553 Sector sect;
554 u8 *data; 554 u8 *data;
@@ -556,9 +556,9 @@ static bool ldm_validate_partition_table (struct block_device *bdev)
556 int i; 556 int i;
557 bool result = false; 557 bool result = false;
558 558
559 BUG_ON (!bdev); 559 BUG_ON(!state);
560 560
561 data = read_dev_sector (bdev, 0, &sect); 561 data = read_part_sector(state, 0, &sect);
562 if (!data) { 562 if (!data) {
563 ldm_crit ("Disk read failed."); 563 ldm_crit ("Disk read failed.");
564 return false; 564 return false;
@@ -1391,8 +1391,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb)
1391 1391
1392/** 1392/**
1393 * ldm_get_vblks - Read the on-disk database of VBLKs into memory 1393 * ldm_get_vblks - Read the on-disk database of VBLKs into memory
1394 * @bdev: Device holding the LDM Database 1394 * @state: Partition check state including device holding the LDM Database
1395 * @base: Offset, into @bdev, of the database 1395 * @base: Offset, into @state->bdev, of the database
1396 * @ldb: Cache of the database structures 1396 * @ldb: Cache of the database structures
1397 * 1397 *
1398 * To use the information from the VBLKs, they need to be read from the disk, 1398 * To use the information from the VBLKs, they need to be read from the disk,
@@ -1401,8 +1401,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb)
1401 * Return: 'true' All the VBLKs were read successfully 1401 * Return: 'true' All the VBLKs were read successfully
1402 * 'false' An error occurred 1402 * 'false' An error occurred
1403 */ 1403 */
1404static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, 1404static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base,
1405 struct ldmdb *ldb) 1405 struct ldmdb *ldb)
1406{ 1406{
1407 int size, perbuf, skip, finish, s, v, recs; 1407 int size, perbuf, skip, finish, s, v, recs;
1408 u8 *data = NULL; 1408 u8 *data = NULL;
@@ -1410,7 +1410,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
1410 bool result = false; 1410 bool result = false;
1411 LIST_HEAD (frags); 1411 LIST_HEAD (frags);
1412 1412
1413 BUG_ON (!bdev || !ldb); 1413 BUG_ON(!state || !ldb);
1414 1414
1415 size = ldb->vm.vblk_size; 1415 size = ldb->vm.vblk_size;
1416 perbuf = 512 / size; 1416 perbuf = 512 / size;
@@ -1418,7 +1418,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
1418 finish = (size * ldb->vm.last_vblk_seq) >> 9; 1418 finish = (size * ldb->vm.last_vblk_seq) >> 9;
1419 1419
1420 for (s = skip; s < finish; s++) { /* For each sector */ 1420 for (s = skip; s < finish; s++) { /* For each sector */
1421 data = read_dev_sector (bdev, base + OFF_VMDB + s, &sect); 1421 data = read_part_sector(state, base + OFF_VMDB + s, &sect);
1422 if (!data) { 1422 if (!data) {
1423 ldm_crit ("Disk read failed."); 1423 ldm_crit ("Disk read failed.");
1424 goto out; 1424 goto out;
@@ -1474,8 +1474,7 @@ static void ldm_free_vblks (struct list_head *lh)
1474 1474
1475/** 1475/**
1476 * ldm_partition - Find out whether a device is a dynamic disk and handle it 1476 * ldm_partition - Find out whether a device is a dynamic disk and handle it
1477 * @pp: List of the partitions parsed so far 1477 * @state: Partition check state including device holding the LDM Database
1478 * @bdev: Device holding the LDM Database
1479 * 1478 *
1480 * This determines whether the device @bdev is a dynamic disk and if so creates 1479 * This determines whether the device @bdev is a dynamic disk and if so creates
1481 * the partitions necessary in the gendisk structure pointed to by @hd. 1480 * the partitions necessary in the gendisk structure pointed to by @hd.
@@ -1485,21 +1484,21 @@ static void ldm_free_vblks (struct list_head *lh)
1485 * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3, 1484 * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3,
1486 * and so on: the actual data containing partitions. 1485 * and so on: the actual data containing partitions.
1487 * 1486 *
1488 * Return: 1 Success, @bdev is a dynamic disk and we handled it 1487 * Return: 1 Success, @state->bdev is a dynamic disk and we handled it
1489 * 0 Success, @bdev is not a dynamic disk 1488 * 0 Success, @state->bdev is not a dynamic disk
1490 * -1 An error occurred before enough information had been read 1489 * -1 An error occurred before enough information had been read
1491 * Or @bdev is a dynamic disk, but it may be corrupted 1490 * Or @state->bdev is a dynamic disk, but it may be corrupted
1492 */ 1491 */
1493int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) 1492int ldm_partition(struct parsed_partitions *state)
1494{ 1493{
1495 struct ldmdb *ldb; 1494 struct ldmdb *ldb;
1496 unsigned long base; 1495 unsigned long base;
1497 int result = -1; 1496 int result = -1;
1498 1497
1499 BUG_ON (!pp || !bdev); 1498 BUG_ON(!state);
1500 1499
1501 /* Look for signs of a Dynamic Disk */ 1500 /* Look for signs of a Dynamic Disk */
1502 if (!ldm_validate_partition_table (bdev)) 1501 if (!ldm_validate_partition_table(state))
1503 return 0; 1502 return 0;
1504 1503
1505 ldb = kmalloc (sizeof (*ldb), GFP_KERNEL); 1504 ldb = kmalloc (sizeof (*ldb), GFP_KERNEL);
@@ -1509,15 +1508,15 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev)
1509 } 1508 }
1510 1509
1511 /* Parse and check privheads. */ 1510 /* Parse and check privheads. */
1512 if (!ldm_validate_privheads (bdev, &ldb->ph)) 1511 if (!ldm_validate_privheads(state, &ldb->ph))
1513 goto out; /* Already logged */ 1512 goto out; /* Already logged */
1514 1513
1515 /* All further references are relative to base (database start). */ 1514 /* All further references are relative to base (database start). */
1516 base = ldb->ph.config_start; 1515 base = ldb->ph.config_start;
1517 1516
1518 /* Parse and check tocs and vmdb. */ 1517 /* Parse and check tocs and vmdb. */
1519 if (!ldm_validate_tocblocks (bdev, base, ldb) || 1518 if (!ldm_validate_tocblocks(state, base, ldb) ||
1520 !ldm_validate_vmdb (bdev, base, ldb)) 1519 !ldm_validate_vmdb(state, base, ldb))
1521 goto out; /* Already logged */ 1520 goto out; /* Already logged */
1522 1521
1523 /* Initialize vblk lists in ldmdb struct */ 1522 /* Initialize vblk lists in ldmdb struct */
@@ -1527,13 +1526,13 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev)
1527 INIT_LIST_HEAD (&ldb->v_comp); 1526 INIT_LIST_HEAD (&ldb->v_comp);
1528 INIT_LIST_HEAD (&ldb->v_part); 1527 INIT_LIST_HEAD (&ldb->v_part);
1529 1528
1530 if (!ldm_get_vblks (bdev, base, ldb)) { 1529 if (!ldm_get_vblks(state, base, ldb)) {
1531 ldm_crit ("Failed to read the VBLKs from the database."); 1530 ldm_crit ("Failed to read the VBLKs from the database.");
1532 goto cleanup; 1531 goto cleanup;
1533 } 1532 }
1534 1533
1535 /* Finally, create the data partition devices. */ 1534 /* Finally, create the data partition devices. */
1536 if (ldm_create_data_partitions (pp, ldb)) { 1535 if (ldm_create_data_partitions(state, ldb)) {
1537 ldm_debug ("Parsed LDM database successfully."); 1536 ldm_debug ("Parsed LDM database successfully.");
1538 result = 1; 1537 result = 1;
1539 } 1538 }
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 30e08e809c1d..d1fb50b28d86 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -209,7 +209,7 @@ struct ldmdb { /* Cache of the database */
209 struct list_head v_part; 209 struct list_head v_part;
210}; 210};
211 211
212int ldm_partition (struct parsed_partitions *state, struct block_device *bdev); 212int ldm_partition(struct parsed_partitions *state);
213 213
214#endif /* _FS_PT_LDM_H_ */ 214#endif /* _FS_PT_LDM_H_ */
215 215
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index d4a0fad3563b..74465ff7c263 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -27,7 +27,7 @@ static inline void mac_fix_string(char *stg, int len)
27 stg[i] = 0; 27 stg[i] = 0;
28} 28}
29 29
30int mac_partition(struct parsed_partitions *state, struct block_device *bdev) 30int mac_partition(struct parsed_partitions *state)
31{ 31{
32 int slot = 1; 32 int slot = 1;
33 Sector sect; 33 Sector sect;
@@ -42,7 +42,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
42 struct mac_driver_desc *md; 42 struct mac_driver_desc *md;
43 43
44 /* Get 0th block and look at the first partition map entry. */ 44 /* Get 0th block and look at the first partition map entry. */
45 md = (struct mac_driver_desc *) read_dev_sector(bdev, 0, &sect); 45 md = read_part_sector(state, 0, &sect);
46 if (!md) 46 if (!md)
47 return -1; 47 return -1;
48 if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) { 48 if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) {
@@ -51,7 +51,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
51 } 51 }
52 secsize = be16_to_cpu(md->block_size); 52 secsize = be16_to_cpu(md->block_size);
53 put_dev_sector(sect); 53 put_dev_sector(sect);
54 data = read_dev_sector(bdev, secsize/512, &sect); 54 data = read_part_sector(state, secsize/512, &sect);
55 if (!data) 55 if (!data)
56 return -1; 56 return -1;
57 part = (struct mac_partition *) (data + secsize%512); 57 part = (struct mac_partition *) (data + secsize%512);
@@ -64,7 +64,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
64 for (blk = 1; blk <= blocks_in_map; ++blk) { 64 for (blk = 1; blk <= blocks_in_map; ++blk) {
65 int pos = blk * secsize; 65 int pos = blk * secsize;
66 put_dev_sector(sect); 66 put_dev_sector(sect);
67 data = read_dev_sector(bdev, pos/512, &sect); 67 data = read_part_sector(state, pos/512, &sect);
68 if (!data) 68 if (!data)
69 return -1; 69 return -1;
70 part = (struct mac_partition *) (data + pos%512); 70 part = (struct mac_partition *) (data + pos%512);
@@ -75,7 +75,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
75 be32_to_cpu(part->block_count) * (secsize/512)); 75 be32_to_cpu(part->block_count) * (secsize/512));
76 76
77 if (!strnicmp(part->type, "Linux_RAID", 10)) 77 if (!strnicmp(part->type, "Linux_RAID", 10))
78 state->parts[slot].flags = 1; 78 state->parts[slot].flags = ADDPART_FLAG_RAID;
79#ifdef CONFIG_PPC_PMAC 79#ifdef CONFIG_PPC_PMAC
80 /* 80 /*
81 * If this is the first bootable partition, tell the 81 * If this is the first bootable partition, tell the
@@ -123,7 +123,8 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
123 } 123 }
124#ifdef CONFIG_PPC_PMAC 124#ifdef CONFIG_PPC_PMAC
125 if (found_root_goodness) 125 if (found_root_goodness)
126 note_bootable_part(bdev->bd_dev, found_root, found_root_goodness); 126 note_bootable_part(state->bdev->bd_dev, found_root,
127 found_root_goodness);
127#endif 128#endif
128 129
129 put_dev_sector(sect); 130 put_dev_sector(sect);
diff --git a/fs/partitions/mac.h b/fs/partitions/mac.h
index bbf26e1386fa..3c7d98436380 100644
--- a/fs/partitions/mac.h
+++ b/fs/partitions/mac.h
@@ -41,4 +41,4 @@ struct mac_driver_desc {
41 /* ... more stuff */ 41 /* ... more stuff */
42}; 42};
43 43
44int mac_partition(struct parsed_partitions *state, struct block_device *bdev); 44int mac_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 90be97f1f5a8..15bfb7b1e044 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -64,7 +64,7 @@ msdos_magic_present(unsigned char *p)
64#define AIX_LABEL_MAGIC2 0xC2 64#define AIX_LABEL_MAGIC2 0xC2
65#define AIX_LABEL_MAGIC3 0xD4 65#define AIX_LABEL_MAGIC3 0xD4
66#define AIX_LABEL_MAGIC4 0xC1 66#define AIX_LABEL_MAGIC4 0xC1
67static int aix_magic_present(unsigned char *p, struct block_device *bdev) 67static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
68{ 68{
69 struct partition *pt = (struct partition *) (p + 0x1be); 69 struct partition *pt = (struct partition *) (p + 0x1be);
70 Sector sect; 70 Sector sect;
@@ -85,7 +85,7 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev)
85 is_extended_partition(pt)) 85 is_extended_partition(pt))
86 return 0; 86 return 0;
87 } 87 }
88 d = read_dev_sector(bdev, 7, &sect); 88 d = read_part_sector(state, 7, &sect);
89 if (d) { 89 if (d) {
90 if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') 90 if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M')
91 ret = 1; 91 ret = 1;
@@ -105,15 +105,14 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev)
105 * only for the actual data partitions. 105 * only for the actual data partitions.
106 */ 106 */
107 107
108static void 108static void parse_extended(struct parsed_partitions *state,
109parse_extended(struct parsed_partitions *state, struct block_device *bdev, 109 sector_t first_sector, sector_t first_size)
110 sector_t first_sector, sector_t first_size)
111{ 110{
112 struct partition *p; 111 struct partition *p;
113 Sector sect; 112 Sector sect;
114 unsigned char *data; 113 unsigned char *data;
115 sector_t this_sector, this_size; 114 sector_t this_sector, this_size;
116 sector_t sector_size = bdev_logical_block_size(bdev) / 512; 115 sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
117 int loopct = 0; /* number of links followed 116 int loopct = 0; /* number of links followed
118 without finding a data partition */ 117 without finding a data partition */
119 int i; 118 int i;
@@ -126,7 +125,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev,
126 return; 125 return;
127 if (state->next == state->limit) 126 if (state->next == state->limit)
128 return; 127 return;
129 data = read_dev_sector(bdev, this_sector, &sect); 128 data = read_part_sector(state, this_sector, &sect);
130 if (!data) 129 if (!data)
131 return; 130 return;
132 131
@@ -198,9 +197,8 @@ done:
198/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also 197/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also
199 indicates linux swap. Be careful before believing this is Solaris. */ 198 indicates linux swap. Be careful before believing this is Solaris. */
200 199
201static void 200static void parse_solaris_x86(struct parsed_partitions *state,
202parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, 201 sector_t offset, sector_t size, int origin)
203 sector_t offset, sector_t size, int origin)
204{ 202{
205#ifdef CONFIG_SOLARIS_X86_PARTITION 203#ifdef CONFIG_SOLARIS_X86_PARTITION
206 Sector sect; 204 Sector sect;
@@ -208,7 +206,7 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
208 int i; 206 int i;
209 short max_nparts; 207 short max_nparts;
210 208
211 v = (struct solaris_x86_vtoc *)read_dev_sector(bdev, offset+1, &sect); 209 v = read_part_sector(state, offset + 1, &sect);
212 if (!v) 210 if (!v)
213 return; 211 return;
214 if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { 212 if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) {
@@ -245,16 +243,15 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
245 * Create devices for BSD partitions listed in a disklabel, under a 243 * Create devices for BSD partitions listed in a disklabel, under a
246 * dos-like partition. See parse_extended() for more information. 244 * dos-like partition. See parse_extended() for more information.
247 */ 245 */
248static void 246static void parse_bsd(struct parsed_partitions *state,
249parse_bsd(struct parsed_partitions *state, struct block_device *bdev, 247 sector_t offset, sector_t size, int origin, char *flavour,
250 sector_t offset, sector_t size, int origin, char *flavour, 248 int max_partitions)
251 int max_partitions)
252{ 249{
253 Sector sect; 250 Sector sect;
254 struct bsd_disklabel *l; 251 struct bsd_disklabel *l;
255 struct bsd_partition *p; 252 struct bsd_partition *p;
256 253
257 l = (struct bsd_disklabel *)read_dev_sector(bdev, offset+1, &sect); 254 l = read_part_sector(state, offset + 1, &sect);
258 if (!l) 255 if (!l)
259 return; 256 return;
260 if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { 257 if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) {
@@ -291,33 +288,28 @@ parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
291} 288}
292#endif 289#endif
293 290
294static void 291static void parse_freebsd(struct parsed_partitions *state,
295parse_freebsd(struct parsed_partitions *state, struct block_device *bdev, 292 sector_t offset, sector_t size, int origin)
296 sector_t offset, sector_t size, int origin)
297{ 293{
298#ifdef CONFIG_BSD_DISKLABEL 294#ifdef CONFIG_BSD_DISKLABEL
299 parse_bsd(state, bdev, offset, size, origin, 295 parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS);
300 "bsd", BSD_MAXPARTITIONS);
301#endif 296#endif
302} 297}
303 298
304static void 299static void parse_netbsd(struct parsed_partitions *state,
305parse_netbsd(struct parsed_partitions *state, struct block_device *bdev, 300 sector_t offset, sector_t size, int origin)
306 sector_t offset, sector_t size, int origin)
307{ 301{
308#ifdef CONFIG_BSD_DISKLABEL 302#ifdef CONFIG_BSD_DISKLABEL
309 parse_bsd(state, bdev, offset, size, origin, 303 parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS);
310 "netbsd", BSD_MAXPARTITIONS);
311#endif 304#endif
312} 305}
313 306
314static void 307static void parse_openbsd(struct parsed_partitions *state,
315parse_openbsd(struct parsed_partitions *state, struct block_device *bdev, 308 sector_t offset, sector_t size, int origin)
316 sector_t offset, sector_t size, int origin)
317{ 309{
318#ifdef CONFIG_BSD_DISKLABEL 310#ifdef CONFIG_BSD_DISKLABEL
319 parse_bsd(state, bdev, offset, size, origin, 311 parse_bsd(state, offset, size, origin, "openbsd",
320 "openbsd", OPENBSD_MAXPARTITIONS); 312 OPENBSD_MAXPARTITIONS);
321#endif 313#endif
322} 314}
323 315
@@ -325,16 +317,15 @@ parse_openbsd(struct parsed_partitions *state, struct block_device *bdev,
325 * Create devices for Unixware partitions listed in a disklabel, under a 317 * Create devices for Unixware partitions listed in a disklabel, under a
326 * dos-like partition. See parse_extended() for more information. 318 * dos-like partition. See parse_extended() for more information.
327 */ 319 */
328static void 320static void parse_unixware(struct parsed_partitions *state,
329parse_unixware(struct parsed_partitions *state, struct block_device *bdev, 321 sector_t offset, sector_t size, int origin)
330 sector_t offset, sector_t size, int origin)
331{ 322{
332#ifdef CONFIG_UNIXWARE_DISKLABEL 323#ifdef CONFIG_UNIXWARE_DISKLABEL
333 Sector sect; 324 Sector sect;
334 struct unixware_disklabel *l; 325 struct unixware_disklabel *l;
335 struct unixware_slice *p; 326 struct unixware_slice *p;
336 327
337 l = (struct unixware_disklabel *)read_dev_sector(bdev, offset+29, &sect); 328 l = read_part_sector(state, offset + 29, &sect);
338 if (!l) 329 if (!l)
339 return; 330 return;
340 if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || 331 if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC ||
@@ -365,9 +356,8 @@ parse_unixware(struct parsed_partitions *state, struct block_device *bdev,
365 * Anand Krishnamurthy <anandk@wiproge.med.ge.com> 356 * Anand Krishnamurthy <anandk@wiproge.med.ge.com>
366 * Rajeev V. Pillai <rajeevvp@yahoo.com> 357 * Rajeev V. Pillai <rajeevvp@yahoo.com>
367 */ 358 */
368static void 359static void parse_minix(struct parsed_partitions *state,
369parse_minix(struct parsed_partitions *state, struct block_device *bdev, 360 sector_t offset, sector_t size, int origin)
370 sector_t offset, sector_t size, int origin)
371{ 361{
372#ifdef CONFIG_MINIX_SUBPARTITION 362#ifdef CONFIG_MINIX_SUBPARTITION
373 Sector sect; 363 Sector sect;
@@ -375,7 +365,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev,
375 struct partition *p; 365 struct partition *p;
376 int i; 366 int i;
377 367
378 data = read_dev_sector(bdev, offset, &sect); 368 data = read_part_sector(state, offset, &sect);
379 if (!data) 369 if (!data)
380 return; 370 return;
381 371
@@ -404,8 +394,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev,
404 394
405static struct { 395static struct {
406 unsigned char id; 396 unsigned char id;
407 void (*parse)(struct parsed_partitions *, struct block_device *, 397 void (*parse)(struct parsed_partitions *, sector_t, sector_t, int);
408 sector_t, sector_t, int);
409} subtypes[] = { 398} subtypes[] = {
410 {FREEBSD_PARTITION, parse_freebsd}, 399 {FREEBSD_PARTITION, parse_freebsd},
411 {NETBSD_PARTITION, parse_netbsd}, 400 {NETBSD_PARTITION, parse_netbsd},
@@ -417,16 +406,16 @@ static struct {
417 {0, NULL}, 406 {0, NULL},
418}; 407};
419 408
420int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) 409int msdos_partition(struct parsed_partitions *state)
421{ 410{
422 sector_t sector_size = bdev_logical_block_size(bdev) / 512; 411 sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
423 Sector sect; 412 Sector sect;
424 unsigned char *data; 413 unsigned char *data;
425 struct partition *p; 414 struct partition *p;
426 struct fat_boot_sector *fb; 415 struct fat_boot_sector *fb;
427 int slot; 416 int slot;
428 417
429 data = read_dev_sector(bdev, 0, &sect); 418 data = read_part_sector(state, 0, &sect);
430 if (!data) 419 if (!data)
431 return -1; 420 return -1;
432 if (!msdos_magic_present(data + 510)) { 421 if (!msdos_magic_present(data + 510)) {
@@ -434,7 +423,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
434 return 0; 423 return 0;
435 } 424 }
436 425
437 if (aix_magic_present(data, bdev)) { 426 if (aix_magic_present(state, data)) {
438 put_dev_sector(sect); 427 put_dev_sector(sect);
439 printk( " [AIX]"); 428 printk( " [AIX]");
440 return 0; 429 return 0;
@@ -503,13 +492,13 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
503 put_partition(state, slot, start, n); 492 put_partition(state, slot, start, n);
504 493
505 printk(" <"); 494 printk(" <");
506 parse_extended(state, bdev, start, size); 495 parse_extended(state, start, size);
507 printk(" >"); 496 printk(" >");
508 continue; 497 continue;
509 } 498 }
510 put_partition(state, slot, start, size); 499 put_partition(state, slot, start, size);
511 if (SYS_IND(p) == LINUX_RAID_PARTITION) 500 if (SYS_IND(p) == LINUX_RAID_PARTITION)
512 state->parts[slot].flags = 1; 501 state->parts[slot].flags = ADDPART_FLAG_RAID;
513 if (SYS_IND(p) == DM6_PARTITION) 502 if (SYS_IND(p) == DM6_PARTITION)
514 printk("[DM]"); 503 printk("[DM]");
515 if (SYS_IND(p) == EZD_PARTITION) 504 if (SYS_IND(p) == EZD_PARTITION)
@@ -532,8 +521,8 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
532 521
533 if (!subtypes[n].parse) 522 if (!subtypes[n].parse)
534 continue; 523 continue;
535 subtypes[n].parse(state, bdev, start_sect(p)*sector_size, 524 subtypes[n].parse(state, start_sect(p) * sector_size,
536 nr_sects(p)*sector_size, slot); 525 nr_sects(p) * sector_size, slot);
537 } 526 }
538 put_dev_sector(sect); 527 put_dev_sector(sect);
539 return 1; 528 return 1;
diff --git a/fs/partitions/msdos.h b/fs/partitions/msdos.h
index 01e5e0b6902d..38c781c490b3 100644
--- a/fs/partitions/msdos.h
+++ b/fs/partitions/msdos.h
@@ -4,5 +4,5 @@
4 4
5#define MSDOS_LABEL_MAGIC 0xAA55 5#define MSDOS_LABEL_MAGIC 0xAA55
6 6
7int msdos_partition(struct parsed_partitions *state, struct block_device *bdev); 7int msdos_partition(struct parsed_partitions *state);
8 8
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index c05c17bc5df3..fc22b85d436a 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,7 +10,7 @@
10#include "check.h" 10#include "check.h"
11#include "osf.h" 11#include "osf.h"
12 12
13int osf_partition(struct parsed_partitions *state, struct block_device *bdev) 13int osf_partition(struct parsed_partitions *state)
14{ 14{
15 int i; 15 int i;
16 int slot = 1; 16 int slot = 1;
@@ -49,7 +49,7 @@ int osf_partition(struct parsed_partitions *state, struct block_device *bdev)
49 } * label; 49 } * label;
50 struct d_partition * partition; 50 struct d_partition * partition;
51 51
52 data = read_dev_sector(bdev, 0, &sect); 52 data = read_part_sector(state, 0, &sect);
53 if (!data) 53 if (!data)
54 return -1; 54 return -1;
55 55
diff --git a/fs/partitions/osf.h b/fs/partitions/osf.h
index 427b8eab314b..20ed2315ec16 100644
--- a/fs/partitions/osf.h
+++ b/fs/partitions/osf.h
@@ -4,4 +4,4 @@
4 4
5#define DISKLABELMAGIC (0x82564557UL) 5#define DISKLABELMAGIC (0x82564557UL)
6 6
7int osf_partition(struct parsed_partitions *state, struct block_device *bdev); 7int osf_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c
index ed5ac83fe83a..43b1df9aa16c 100644
--- a/fs/partitions/sgi.c
+++ b/fs/partitions/sgi.c
@@ -27,7 +27,7 @@ struct sgi_disklabel {
27 __be32 _unused1; /* Padding */ 27 __be32 _unused1; /* Padding */
28}; 28};
29 29
30int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) 30int sgi_partition(struct parsed_partitions *state)
31{ 31{
32 int i, csum; 32 int i, csum;
33 __be32 magic; 33 __be32 magic;
@@ -39,7 +39,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev)
39 struct sgi_partition *p; 39 struct sgi_partition *p;
40 char b[BDEVNAME_SIZE]; 40 char b[BDEVNAME_SIZE];
41 41
42 label = (struct sgi_disklabel *) read_dev_sector(bdev, 0, &sect); 42 label = read_part_sector(state, 0, &sect);
43 if (!label) 43 if (!label)
44 return -1; 44 return -1;
45 p = &label->partitions[0]; 45 p = &label->partitions[0];
@@ -57,7 +57,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev)
57 } 57 }
58 if(csum) { 58 if(csum) {
59 printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n", 59 printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n",
60 bdevname(bdev, b)); 60 bdevname(state->bdev, b));
61 put_dev_sector(sect); 61 put_dev_sector(sect);
62 return 0; 62 return 0;
63 } 63 }
diff --git a/fs/partitions/sgi.h b/fs/partitions/sgi.h
index 5d5595c09928..b9553ebdd5a9 100644
--- a/fs/partitions/sgi.h
+++ b/fs/partitions/sgi.h
@@ -2,7 +2,7 @@
2 * fs/partitions/sgi.h 2 * fs/partitions/sgi.h
3 */ 3 */
4 4
5extern int sgi_partition(struct parsed_partitions *state, struct block_device *bdev); 5extern int sgi_partition(struct parsed_partitions *state);
6 6
7#define SGI_LABEL_MAGIC 0x0be5a941 7#define SGI_LABEL_MAGIC 0x0be5a941
8 8
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
index c95e6a62c01d..a32660e25f7f 100644
--- a/fs/partitions/sun.c
+++ b/fs/partitions/sun.c
@@ -10,7 +10,7 @@
10#include "check.h" 10#include "check.h"
11#include "sun.h" 11#include "sun.h"
12 12
13int sun_partition(struct parsed_partitions *state, struct block_device *bdev) 13int sun_partition(struct parsed_partitions *state)
14{ 14{
15 int i; 15 int i;
16 __be16 csum; 16 __be16 csum;
@@ -61,7 +61,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
61 int use_vtoc; 61 int use_vtoc;
62 int nparts; 62 int nparts;
63 63
64 label = (struct sun_disklabel *)read_dev_sector(bdev, 0, &sect); 64 label = read_part_sector(state, 0, &sect);
65 if (!label) 65 if (!label)
66 return -1; 66 return -1;
67 67
@@ -78,7 +78,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
78 csum ^= *ush--; 78 csum ^= *ush--;
79 if (csum) { 79 if (csum) {
80 printk("Dev %s Sun disklabel: Csum bad, label corrupted\n", 80 printk("Dev %s Sun disklabel: Csum bad, label corrupted\n",
81 bdevname(bdev, b)); 81 bdevname(state->bdev, b));
82 put_dev_sector(sect); 82 put_dev_sector(sect);
83 return 0; 83 return 0;
84 } 84 }
diff --git a/fs/partitions/sun.h b/fs/partitions/sun.h
index 7f864d1f86d4..2424baa8319f 100644
--- a/fs/partitions/sun.h
+++ b/fs/partitions/sun.h
@@ -5,4 +5,4 @@
5#define SUN_LABEL_MAGIC 0xDABE 5#define SUN_LABEL_MAGIC 0xDABE
6#define SUN_VTOC_SANITY 0x600DDEEE 6#define SUN_VTOC_SANITY 0x600DDEEE
7 7
8int sun_partition(struct parsed_partitions *state, struct block_device *bdev); 8int sun_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c
index 4eba27b78643..9030c864428e 100644
--- a/fs/partitions/sysv68.c
+++ b/fs/partitions/sysv68.c
@@ -46,7 +46,7 @@ struct slice {
46}; 46};
47 47
48 48
49int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) 49int sysv68_partition(struct parsed_partitions *state)
50{ 50{
51 int i, slices; 51 int i, slices;
52 int slot = 1; 52 int slot = 1;
@@ -55,7 +55,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev)
55 struct dkblk0 *b; 55 struct dkblk0 *b;
56 struct slice *slice; 56 struct slice *slice;
57 57
58 data = read_dev_sector(bdev, 0, &sect); 58 data = read_part_sector(state, 0, &sect);
59 if (!data) 59 if (!data)
60 return -1; 60 return -1;
61 61
@@ -68,7 +68,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev)
68 i = be32_to_cpu(b->dk_ios.ios_slcblk); 68 i = be32_to_cpu(b->dk_ios.ios_slcblk);
69 put_dev_sector(sect); 69 put_dev_sector(sect);
70 70
71 data = read_dev_sector(bdev, i, &sect); 71 data = read_part_sector(state, i, &sect);
72 if (!data) 72 if (!data)
73 return -1; 73 return -1;
74 74
diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h
index fa733f68431b..bf2f5ffa97ac 100644
--- a/fs/partitions/sysv68.h
+++ b/fs/partitions/sysv68.h
@@ -1 +1 @@
extern int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev); extern int sysv68_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c
index ec852c11dce4..db9eef260364 100644
--- a/fs/partitions/ultrix.c
+++ b/fs/partitions/ultrix.c
@@ -9,7 +9,7 @@
9#include "check.h" 9#include "check.h"
10#include "ultrix.h" 10#include "ultrix.h"
11 11
12int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev) 12int ultrix_partition(struct parsed_partitions *state)
13{ 13{
14 int i; 14 int i;
15 Sector sect; 15 Sector sect;
@@ -26,7 +26,7 @@ int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev)
26#define PT_MAGIC 0x032957 /* Partition magic number */ 26#define PT_MAGIC 0x032957 /* Partition magic number */
27#define PT_VALID 1 /* Indicates if struct is valid */ 27#define PT_VALID 1 /* Indicates if struct is valid */
28 28
29 data = read_dev_sector(bdev, (16384 - sizeof(*label))/512, &sect); 29 data = read_part_sector(state, (16384 - sizeof(*label))/512, &sect);
30 if (!data) 30 if (!data)
31 return -1; 31 return -1;
32 32
diff --git a/fs/partitions/ultrix.h b/fs/partitions/ultrix.h
index a74bf8e2d370..a3cc00b2bded 100644
--- a/fs/partitions/ultrix.h
+++ b/fs/partitions/ultrix.h
@@ -2,4 +2,4 @@
2 * fs/partitions/ultrix.h 2 * fs/partitions/ultrix.h
3 */ 3 */
4 4
5int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev); 5int ultrix_partition(struct parsed_partitions *state);
diff --git a/fs/pipe.c b/fs/pipe.c
index 37ba29ff3158..d79872eba09a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -11,6 +11,7 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/log2.h>
14#include <linux/mount.h> 15#include <linux/mount.h>
15#include <linux/pipe_fs_i.h> 16#include <linux/pipe_fs_i.h>
16#include <linux/uio.h> 17#include <linux/uio.h>
@@ -18,11 +19,18 @@
18#include <linux/pagemap.h> 19#include <linux/pagemap.h>
19#include <linux/audit.h> 20#include <linux/audit.h>
20#include <linux/syscalls.h> 21#include <linux/syscalls.h>
22#include <linux/fcntl.h>
21 23
22#include <asm/uaccess.h> 24#include <asm/uaccess.h>
23#include <asm/ioctls.h> 25#include <asm/ioctls.h>
24 26
25/* 27/*
28 * The max size that a non-root user is allowed to grow the pipe. Can
29 * be set by root in /proc/sys/fs/pipe-max-pages
30 */
31unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16;
32
33/*
26 * We use a start+len construction, which provides full use of the 34 * We use a start+len construction, which provides full use of the
27 * allocated memory. 35 * allocated memory.
28 * -- Florian Coosmann (FGC) 36 * -- Florian Coosmann (FGC)
@@ -390,7 +398,7 @@ redo:
390 if (!buf->len) { 398 if (!buf->len) {
391 buf->ops = NULL; 399 buf->ops = NULL;
392 ops->release(pipe, buf); 400 ops->release(pipe, buf);
393 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 401 curbuf = (curbuf + 1) & (pipe->buffers - 1);
394 pipe->curbuf = curbuf; 402 pipe->curbuf = curbuf;
395 pipe->nrbufs = --bufs; 403 pipe->nrbufs = --bufs;
396 do_wakeup = 1; 404 do_wakeup = 1;
@@ -472,7 +480,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
472 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 480 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
473 if (pipe->nrbufs && chars != 0) { 481 if (pipe->nrbufs && chars != 0) {
474 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & 482 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
475 (PIPE_BUFFERS-1); 483 (pipe->buffers - 1);
476 struct pipe_buffer *buf = pipe->bufs + lastbuf; 484 struct pipe_buffer *buf = pipe->bufs + lastbuf;
477 const struct pipe_buf_operations *ops = buf->ops; 485 const struct pipe_buf_operations *ops = buf->ops;
478 int offset = buf->offset + buf->len; 486 int offset = buf->offset + buf->len;
@@ -518,8 +526,8 @@ redo1:
518 break; 526 break;
519 } 527 }
520 bufs = pipe->nrbufs; 528 bufs = pipe->nrbufs;
521 if (bufs < PIPE_BUFFERS) { 529 if (bufs < pipe->buffers) {
522 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 530 int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
523 struct pipe_buffer *buf = pipe->bufs + newbuf; 531 struct pipe_buffer *buf = pipe->bufs + newbuf;
524 struct page *page = pipe->tmp_page; 532 struct page *page = pipe->tmp_page;
525 char *src; 533 char *src;
@@ -580,7 +588,7 @@ redo2:
580 if (!total_len) 588 if (!total_len)
581 break; 589 break;
582 } 590 }
583 if (bufs < PIPE_BUFFERS) 591 if (bufs < pipe->buffers)
584 continue; 592 continue;
585 if (filp->f_flags & O_NONBLOCK) { 593 if (filp->f_flags & O_NONBLOCK) {
586 if (!ret) 594 if (!ret)
@@ -640,7 +648,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
640 nrbufs = pipe->nrbufs; 648 nrbufs = pipe->nrbufs;
641 while (--nrbufs >= 0) { 649 while (--nrbufs >= 0) {
642 count += pipe->bufs[buf].len; 650 count += pipe->bufs[buf].len;
643 buf = (buf+1) & (PIPE_BUFFERS-1); 651 buf = (buf+1) & (pipe->buffers - 1);
644 } 652 }
645 mutex_unlock(&inode->i_mutex); 653 mutex_unlock(&inode->i_mutex);
646 654
@@ -671,7 +679,7 @@ pipe_poll(struct file *filp, poll_table *wait)
671 } 679 }
672 680
673 if (filp->f_mode & FMODE_WRITE) { 681 if (filp->f_mode & FMODE_WRITE) {
674 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; 682 mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0;
675 /* 683 /*
676 * Most Unices do not set POLLERR for FIFOs but on Linux they 684 * Most Unices do not set POLLERR for FIFOs but on Linux they
677 * behave exactly like pipes for poll(). 685 * behave exactly like pipes for poll().
@@ -877,25 +885,32 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
877 885
878 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); 886 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
879 if (pipe) { 887 if (pipe) {
880 init_waitqueue_head(&pipe->wait); 888 pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
881 pipe->r_counter = pipe->w_counter = 1; 889 if (pipe->bufs) {
882 pipe->inode = inode; 890 init_waitqueue_head(&pipe->wait);
891 pipe->r_counter = pipe->w_counter = 1;
892 pipe->inode = inode;
893 pipe->buffers = PIPE_DEF_BUFFERS;
894 return pipe;
895 }
896 kfree(pipe);
883 } 897 }
884 898
885 return pipe; 899 return NULL;
886} 900}
887 901
888void __free_pipe_info(struct pipe_inode_info *pipe) 902void __free_pipe_info(struct pipe_inode_info *pipe)
889{ 903{
890 int i; 904 int i;
891 905
892 for (i = 0; i < PIPE_BUFFERS; i++) { 906 for (i = 0; i < pipe->buffers; i++) {
893 struct pipe_buffer *buf = pipe->bufs + i; 907 struct pipe_buffer *buf = pipe->bufs + i;
894 if (buf->ops) 908 if (buf->ops)
895 buf->ops->release(pipe, buf); 909 buf->ops->release(pipe, buf);
896 } 910 }
897 if (pipe->tmp_page) 911 if (pipe->tmp_page)
898 __free_page(pipe->tmp_page); 912 __free_page(pipe->tmp_page);
913 kfree(pipe->bufs);
899 kfree(pipe); 914 kfree(pipe);
900} 915}
901 916
@@ -1094,6 +1109,89 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
1094} 1109}
1095 1110
1096/* 1111/*
1112 * Allocate a new array of pipe buffers and copy the info over. Returns the
1113 * pipe size if successful, or return -ERROR on error.
1114 */
1115static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1116{
1117 struct pipe_buffer *bufs;
1118
1119 /*
1120 * Must be a power-of-2 currently
1121 */
1122 if (!is_power_of_2(arg))
1123 return -EINVAL;
1124
1125 /*
1126 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
1127 * expect a lot of shrink+grow operations, just free and allocate
1128 * again like we would do for growing. If the pipe currently
1129 * contains more buffers than arg, then return busy.
1130 */
1131 if (arg < pipe->nrbufs)
1132 return -EBUSY;
1133
1134 bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL);
1135 if (unlikely(!bufs))
1136 return -ENOMEM;
1137
1138 /*
1139 * The pipe array wraps around, so just start the new one at zero
1140 * and adjust the indexes.
1141 */
1142 if (pipe->nrbufs) {
1143 const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1);
1144 const unsigned int head = pipe->nrbufs - tail;
1145
1146 if (head)
1147 memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
1148 if (tail)
1149 memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer));
1150 }
1151
1152 pipe->curbuf = 0;
1153 kfree(pipe->bufs);
1154 pipe->bufs = bufs;
1155 pipe->buffers = arg;
1156 return arg;
1157}
1158
1159long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1160{
1161 struct pipe_inode_info *pipe;
1162 long ret;
1163
1164 pipe = file->f_path.dentry->d_inode->i_pipe;
1165 if (!pipe)
1166 return -EBADF;
1167
1168 mutex_lock(&pipe->inode->i_mutex);
1169
1170 switch (cmd) {
1171 case F_SETPIPE_SZ:
1172 if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages)
1173 return -EINVAL;
1174 /*
1175 * The pipe needs to be at least 2 pages large to
1176 * guarantee POSIX behaviour.
1177 */
1178 if (arg < 2)
1179 return -EINVAL;
1180 ret = pipe_set_size(pipe, arg);
1181 break;
1182 case F_GETPIPE_SZ:
1183 ret = pipe->buffers;
1184 break;
1185 default:
1186 ret = -EINVAL;
1187 break;
1188 }
1189
1190 mutex_unlock(&pipe->inode->i_mutex);
1191 return ret;
1192}
1193
1194/*
1097 * pipefs should _never_ be mounted by userland - too much of security hassle, 1195 * pipefs should _never_ be mounted by userland - too much of security hassle,
1098 * no real gain from having the whole whorehouse mounted. So we don't need 1196 * no real gain from having the whole whorehouse mounted. So we don't need
1099 * any operations on the root directory. However, we need a non-trivial 1197 * any operations on the root directory. However, we need a non-trivial
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8418fcc0a6ab..c7f9f23449dc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -730,6 +730,7 @@ out_no_task:
730 730
731static const struct file_operations proc_info_file_operations = { 731static const struct file_operations proc_info_file_operations = {
732 .read = proc_info_read, 732 .read = proc_info_read,
733 .llseek = generic_file_llseek,
733}; 734};
734 735
735static int proc_single_show(struct seq_file *m, void *v) 736static int proc_single_show(struct seq_file *m, void *v)
@@ -987,6 +988,7 @@ out_no_task:
987 988
988static const struct file_operations proc_environ_operations = { 989static const struct file_operations proc_environ_operations = {
989 .read = environ_read, 990 .read = environ_read,
991 .llseek = generic_file_llseek,
990}; 992};
991 993
992static ssize_t oom_adjust_read(struct file *file, char __user *buf, 994static ssize_t oom_adjust_read(struct file *file, char __user *buf,
@@ -1060,6 +1062,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1060static const struct file_operations proc_oom_adjust_operations = { 1062static const struct file_operations proc_oom_adjust_operations = {
1061 .read = oom_adjust_read, 1063 .read = oom_adjust_read,
1062 .write = oom_adjust_write, 1064 .write = oom_adjust_write,
1065 .llseek = generic_file_llseek,
1063}; 1066};
1064 1067
1065#ifdef CONFIG_AUDITSYSCALL 1068#ifdef CONFIG_AUDITSYSCALL
@@ -1131,6 +1134,7 @@ out_free_page:
1131static const struct file_operations proc_loginuid_operations = { 1134static const struct file_operations proc_loginuid_operations = {
1132 .read = proc_loginuid_read, 1135 .read = proc_loginuid_read,
1133 .write = proc_loginuid_write, 1136 .write = proc_loginuid_write,
1137 .llseek = generic_file_llseek,
1134}; 1138};
1135 1139
1136static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1140static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
@@ -1151,6 +1155,7 @@ static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1151 1155
1152static const struct file_operations proc_sessionid_operations = { 1156static const struct file_operations proc_sessionid_operations = {
1153 .read = proc_sessionid_read, 1157 .read = proc_sessionid_read,
1158 .llseek = generic_file_llseek,
1154}; 1159};
1155#endif 1160#endif
1156 1161
@@ -1202,6 +1207,7 @@ static ssize_t proc_fault_inject_write(struct file * file,
1202static const struct file_operations proc_fault_inject_operations = { 1207static const struct file_operations proc_fault_inject_operations = {
1203 .read = proc_fault_inject_read, 1208 .read = proc_fault_inject_read,
1204 .write = proc_fault_inject_write, 1209 .write = proc_fault_inject_write,
1210 .llseek = generic_file_llseek,
1205}; 1211};
1206#endif 1212#endif
1207 1213
@@ -1943,7 +1949,7 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
1943} 1949}
1944 1950
1945static const struct file_operations proc_fdinfo_file_operations = { 1951static const struct file_operations proc_fdinfo_file_operations = {
1946 .open = nonseekable_open, 1952 .open = nonseekable_open,
1947 .read = proc_fdinfo_read, 1953 .read = proc_fdinfo_read,
1948}; 1954};
1949 1955
@@ -2227,6 +2233,7 @@ out_no_task:
2227static const struct file_operations proc_pid_attr_operations = { 2233static const struct file_operations proc_pid_attr_operations = {
2228 .read = proc_pid_attr_read, 2234 .read = proc_pid_attr_read,
2229 .write = proc_pid_attr_write, 2235 .write = proc_pid_attr_write,
2236 .llseek = generic_file_llseek,
2230}; 2237};
2231 2238
2232static const struct pid_entry attr_dir_stuff[] = { 2239static const struct pid_entry attr_dir_stuff[] = {
@@ -2347,6 +2354,7 @@ static ssize_t proc_coredump_filter_write(struct file *file,
2347static const struct file_operations proc_coredump_filter_operations = { 2354static const struct file_operations proc_coredump_filter_operations = {
2348 .read = proc_coredump_filter_read, 2355 .read = proc_coredump_filter_read,
2349 .write = proc_coredump_filter_write, 2356 .write = proc_coredump_filter_write,
2357 .llseek = generic_file_llseek,
2350}; 2358};
2351#endif 2359#endif
2352 2360
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d35b23238fb1..aea8502e58a3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -232,9 +232,9 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
232 if (rv == -ENOIOCTLCMD) 232 if (rv == -ENOIOCTLCMD)
233 rv = -EINVAL; 233 rv = -EINVAL;
234 } else if (ioctl) { 234 } else if (ioctl) {
235 lock_kernel(); 235 WARN_ONCE(1, "Procfs ioctl handlers must use unlocked_ioctl, "
236 "%pf will be called without the Bkl held\n", ioctl);
236 rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg); 237 rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
237 unlock_kernel();
238 } 238 }
239 239
240 pde_users_dec(pde); 240 pde_users_dec(pde);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 19979a2ce272..c837a77351be 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -558,6 +558,7 @@ static int open_kcore(struct inode *inode, struct file *filp)
558static const struct file_operations proc_kcore_operations = { 558static const struct file_operations proc_kcore_operations = {
559 .read = read_kcore, 559 .read = read_kcore,
560 .open = open_kcore, 560 .open = open_kcore,
561 .llseek = generic_file_llseek,
561}; 562};
562 563
563#ifdef CONFIG_MEMORY_HOTPLUG 564#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index cfe90a48a6e8..bd4b5a740ff1 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -53,6 +53,7 @@ static const struct file_operations proc_kmsg_operations = {
53 .poll = kmsg_poll, 53 .poll = kmsg_poll,
54 .open = kmsg_open, 54 .open = kmsg_open,
55 .release = kmsg_release, 55 .release = kmsg_release,
56 .llseek = generic_file_llseek,
56}; 57};
57 58
58static int __init proc_kmsg_init(void) 59static int __init proc_kmsg_init(void)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 9fbc99ec799a..91c817ff02c3 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -163,6 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
163 163
164static const struct file_operations proc_vmcore_operations = { 164static const struct file_operations proc_vmcore_operations = {
165 .read = read_vmcore, 165 .read = read_vmcore,
166 .llseek = generic_file_llseek,
166}; 167};
167 168
168static struct vmcore* __init get_new_element(void) 169static struct vmcore* __init get_new_element(void)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 788b5802a7ce..655a4c52b8c3 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,7 +82,7 @@
82 82
83/* 83/*
84 * There are three quota SMP locks. dq_list_lock protects all lists with quotas 84 * There are three quota SMP locks. dq_list_lock protects all lists with quotas
85 * and quota formats, dqstats structure containing statistics about the lists 85 * and quota formats.
86 * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and 86 * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
87 * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. 87 * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
88 * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly 88 * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@ -132,7 +132,9 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
132__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); 132__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
133EXPORT_SYMBOL(dq_data_lock); 133EXPORT_SYMBOL(dq_data_lock);
134 134
135#if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING)
135static char *quotatypes[] = INITQFNAMES; 136static char *quotatypes[] = INITQFNAMES;
137#endif
136static struct quota_format_type *quota_formats; /* List of registered formats */ 138static struct quota_format_type *quota_formats; /* List of registered formats */
137static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; 139static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
138 140
@@ -226,6 +228,10 @@ static struct hlist_head *dquot_hash;
226 228
227struct dqstats dqstats; 229struct dqstats dqstats;
228EXPORT_SYMBOL(dqstats); 230EXPORT_SYMBOL(dqstats);
231#ifdef CONFIG_SMP
232struct dqstats *dqstats_pcpu;
233EXPORT_SYMBOL(dqstats_pcpu);
234#endif
229 235
230static qsize_t inode_get_rsv_space(struct inode *inode); 236static qsize_t inode_get_rsv_space(struct inode *inode);
231static void __dquot_initialize(struct inode *inode, int type); 237static void __dquot_initialize(struct inode *inode, int type);
@@ -273,7 +279,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
273static inline void put_dquot_last(struct dquot *dquot) 279static inline void put_dquot_last(struct dquot *dquot)
274{ 280{
275 list_add_tail(&dquot->dq_free, &free_dquots); 281 list_add_tail(&dquot->dq_free, &free_dquots);
276 dqstats.free_dquots++; 282 dqstats_inc(DQST_FREE_DQUOTS);
277} 283}
278 284
279static inline void remove_free_dquot(struct dquot *dquot) 285static inline void remove_free_dquot(struct dquot *dquot)
@@ -281,7 +287,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
281 if (list_empty(&dquot->dq_free)) 287 if (list_empty(&dquot->dq_free))
282 return; 288 return;
283 list_del_init(&dquot->dq_free); 289 list_del_init(&dquot->dq_free);
284 dqstats.free_dquots--; 290 dqstats_dec(DQST_FREE_DQUOTS);
285} 291}
286 292
287static inline void put_inuse(struct dquot *dquot) 293static inline void put_inuse(struct dquot *dquot)
@@ -289,12 +295,12 @@ static inline void put_inuse(struct dquot *dquot)
289 /* We add to the back of inuse list so we don't have to restart 295 /* We add to the back of inuse list so we don't have to restart
290 * when traversing this list and we block */ 296 * when traversing this list and we block */
291 list_add_tail(&dquot->dq_inuse, &inuse_list); 297 list_add_tail(&dquot->dq_inuse, &inuse_list);
292 dqstats.allocated_dquots++; 298 dqstats_inc(DQST_ALLOC_DQUOTS);
293} 299}
294 300
295static inline void remove_inuse(struct dquot *dquot) 301static inline void remove_inuse(struct dquot *dquot)
296{ 302{
297 dqstats.allocated_dquots--; 303 dqstats_dec(DQST_ALLOC_DQUOTS);
298 list_del(&dquot->dq_inuse); 304 list_del(&dquot->dq_inuse);
299} 305}
300/* 306/*
@@ -317,14 +323,23 @@ static inline int mark_dquot_dirty(struct dquot *dquot)
317 return dquot->dq_sb->dq_op->mark_dirty(dquot); 323 return dquot->dq_sb->dq_op->mark_dirty(dquot);
318} 324}
319 325
326/* Mark dquot dirty in atomic manner, and return it's old dirty flag state */
320int dquot_mark_dquot_dirty(struct dquot *dquot) 327int dquot_mark_dquot_dirty(struct dquot *dquot)
321{ 328{
329 int ret = 1;
330
331 /* If quota is dirty already, we don't have to acquire dq_list_lock */
332 if (test_bit(DQ_MOD_B, &dquot->dq_flags))
333 return 1;
334
322 spin_lock(&dq_list_lock); 335 spin_lock(&dq_list_lock);
323 if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) 336 if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
324 list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> 337 list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
325 info[dquot->dq_type].dqi_dirty_list); 338 info[dquot->dq_type].dqi_dirty_list);
339 ret = 0;
340 }
326 spin_unlock(&dq_list_lock); 341 spin_unlock(&dq_list_lock);
327 return 0; 342 return ret;
328} 343}
329EXPORT_SYMBOL(dquot_mark_dquot_dirty); 344EXPORT_SYMBOL(dquot_mark_dquot_dirty);
330 345
@@ -550,8 +565,8 @@ int dquot_scan_active(struct super_block *sb,
550 continue; 565 continue;
551 /* Now we have active dquot so we can just increase use count */ 566 /* Now we have active dquot so we can just increase use count */
552 atomic_inc(&dquot->dq_count); 567 atomic_inc(&dquot->dq_count);
553 dqstats.lookups++;
554 spin_unlock(&dq_list_lock); 568 spin_unlock(&dq_list_lock);
569 dqstats_inc(DQST_LOOKUPS);
555 dqput(old_dquot); 570 dqput(old_dquot);
556 old_dquot = dquot; 571 old_dquot = dquot;
557 ret = fn(dquot, priv); 572 ret = fn(dquot, priv);
@@ -596,8 +611,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
596 * holding reference so we can safely just increase 611 * holding reference so we can safely just increase
597 * use count */ 612 * use count */
598 atomic_inc(&dquot->dq_count); 613 atomic_inc(&dquot->dq_count);
599 dqstats.lookups++;
600 spin_unlock(&dq_list_lock); 614 spin_unlock(&dq_list_lock);
615 dqstats_inc(DQST_LOOKUPS);
601 sb->dq_op->write_dquot(dquot); 616 sb->dq_op->write_dquot(dquot);
602 dqput(dquot); 617 dqput(dquot);
603 spin_lock(&dq_list_lock); 618 spin_lock(&dq_list_lock);
@@ -609,9 +624,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
609 if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) 624 if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
610 && info_dirty(&dqopt->info[cnt])) 625 && info_dirty(&dqopt->info[cnt]))
611 sb->dq_op->write_info(sb, cnt); 626 sb->dq_op->write_info(sb, cnt);
612 spin_lock(&dq_list_lock); 627 dqstats_inc(DQST_SYNCS);
613 dqstats.syncs++;
614 spin_unlock(&dq_list_lock);
615 mutex_unlock(&dqopt->dqonoff_mutex); 628 mutex_unlock(&dqopt->dqonoff_mutex);
616 629
617 if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) 630 if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
@@ -663,6 +676,22 @@ static void prune_dqcache(int count)
663 } 676 }
664} 677}
665 678
679static int dqstats_read(unsigned int type)
680{
681 int count = 0;
682#ifdef CONFIG_SMP
683 int cpu;
684 for_each_possible_cpu(cpu)
685 count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
686 /* Statistics reading is racy, but absolute accuracy isn't required */
687 if (count < 0)
688 count = 0;
689#else
690 count = dqstats.stat[type];
691#endif
692 return count;
693}
694
666/* 695/*
667 * This is called from kswapd when we think we need some 696 * This is called from kswapd when we think we need some
668 * more memory 697 * more memory
@@ -675,7 +704,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
675 prune_dqcache(nr); 704 prune_dqcache(nr);
676 spin_unlock(&dq_list_lock); 705 spin_unlock(&dq_list_lock);
677 } 706 }
678 return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; 707 return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
679} 708}
680 709
681static struct shrinker dqcache_shrinker = { 710static struct shrinker dqcache_shrinker = {
@@ -703,10 +732,7 @@ void dqput(struct dquot *dquot)
703 BUG(); 732 BUG();
704 } 733 }
705#endif 734#endif
706 735 dqstats_inc(DQST_DROPS);
707 spin_lock(&dq_list_lock);
708 dqstats.drops++;
709 spin_unlock(&dq_list_lock);
710we_slept: 736we_slept:
711 spin_lock(&dq_list_lock); 737 spin_lock(&dq_list_lock);
712 if (atomic_read(&dquot->dq_count) > 1) { 738 if (atomic_read(&dquot->dq_count) > 1) {
@@ -823,15 +849,15 @@ we_slept:
823 put_inuse(dquot); 849 put_inuse(dquot);
824 /* hash it first so it can be found */ 850 /* hash it first so it can be found */
825 insert_dquot_hash(dquot); 851 insert_dquot_hash(dquot);
826 dqstats.lookups++;
827 spin_unlock(&dq_list_lock); 852 spin_unlock(&dq_list_lock);
853 dqstats_inc(DQST_LOOKUPS);
828 } else { 854 } else {
829 if (!atomic_read(&dquot->dq_count)) 855 if (!atomic_read(&dquot->dq_count))
830 remove_free_dquot(dquot); 856 remove_free_dquot(dquot);
831 atomic_inc(&dquot->dq_count); 857 atomic_inc(&dquot->dq_count);
832 dqstats.cache_hits++;
833 dqstats.lookups++;
834 spin_unlock(&dq_list_lock); 858 spin_unlock(&dq_list_lock);
859 dqstats_inc(DQST_CACHE_HITS);
860 dqstats_inc(DQST_LOOKUPS);
835 } 861 }
836 /* Wait for dq_lock - after this we know that either dquot_release() is 862 /* Wait for dq_lock - after this we know that either dquot_release() is
837 * already finished or it will be canceled due to dq_count > 1 test */ 863 * already finished or it will be canceled due to dq_count > 1 test */
@@ -1677,16 +1703,19 @@ EXPORT_SYMBOL(dquot_free_inode);
1677 1703
1678/* 1704/*
1679 * Transfer the number of inode and blocks from one diskquota to an other. 1705 * Transfer the number of inode and blocks from one diskquota to an other.
1706 * On success, dquot references in transfer_to are consumed and references
1707 * to original dquots that need to be released are placed there. On failure,
1708 * references are kept untouched.
1680 * 1709 *
1681 * This operation can block, but only after everything is updated 1710 * This operation can block, but only after everything is updated
1682 * A transaction must be started when entering this function. 1711 * A transaction must be started when entering this function.
1712 *
1683 */ 1713 */
1684static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask) 1714int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1685{ 1715{
1686 qsize_t space, cur_space; 1716 qsize_t space, cur_space;
1687 qsize_t rsv_space = 0; 1717 qsize_t rsv_space = 0;
1688 struct dquot *transfer_from[MAXQUOTAS]; 1718 struct dquot *transfer_from[MAXQUOTAS] = {};
1689 struct dquot *transfer_to[MAXQUOTAS];
1690 int cnt, ret = 0; 1719 int cnt, ret = 0;
1691 char warntype_to[MAXQUOTAS]; 1720 char warntype_to[MAXQUOTAS];
1692 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; 1721 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
@@ -1696,19 +1725,12 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
1696 if (IS_NOQUOTA(inode)) 1725 if (IS_NOQUOTA(inode))
1697 return 0; 1726 return 0;
1698 /* Initialize the arrays */ 1727 /* Initialize the arrays */
1699 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1728 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1700 transfer_from[cnt] = NULL;
1701 transfer_to[cnt] = NULL;
1702 warntype_to[cnt] = QUOTA_NL_NOWARN; 1729 warntype_to[cnt] = QUOTA_NL_NOWARN;
1703 }
1704 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1705 if (mask & (1 << cnt))
1706 transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
1707 }
1708 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1730 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1709 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ 1731 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
1710 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1732 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1711 goto put_all; 1733 return 0;
1712 } 1734 }
1713 spin_lock(&dq_data_lock); 1735 spin_lock(&dq_data_lock);
1714 cur_space = inode_get_bytes(inode); 1736 cur_space = inode_get_bytes(inode);
@@ -1760,47 +1782,41 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
1760 1782
1761 mark_all_dquot_dirty(transfer_from); 1783 mark_all_dquot_dirty(transfer_from);
1762 mark_all_dquot_dirty(transfer_to); 1784 mark_all_dquot_dirty(transfer_to);
1763 /* The reference we got is transferred to the inode */ 1785 /* Pass back references to put */
1764 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1786 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1765 transfer_to[cnt] = NULL; 1787 transfer_to[cnt] = transfer_from[cnt];
1766warn_put_all: 1788warn:
1767 flush_warnings(transfer_to, warntype_to); 1789 flush_warnings(transfer_to, warntype_to);
1768 flush_warnings(transfer_from, warntype_from_inodes); 1790 flush_warnings(transfer_from, warntype_from_inodes);
1769 flush_warnings(transfer_from, warntype_from_space); 1791 flush_warnings(transfer_from, warntype_from_space);
1770put_all:
1771 dqput_all(transfer_from);
1772 dqput_all(transfer_to);
1773 return ret; 1792 return ret;
1774over_quota: 1793over_quota:
1775 spin_unlock(&dq_data_lock); 1794 spin_unlock(&dq_data_lock);
1776 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1795 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1777 /* Clear dquot pointers we don't want to dqput() */ 1796 goto warn;
1778 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1779 transfer_from[cnt] = NULL;
1780 goto warn_put_all;
1781} 1797}
1798EXPORT_SYMBOL(__dquot_transfer);
1782 1799
1783/* Wrapper for transferring ownership of an inode for uid/gid only 1800/* Wrapper for transferring ownership of an inode for uid/gid only
1784 * Called from FSXXX_setattr() 1801 * Called from FSXXX_setattr()
1785 */ 1802 */
1786int dquot_transfer(struct inode *inode, struct iattr *iattr) 1803int dquot_transfer(struct inode *inode, struct iattr *iattr)
1787{ 1804{
1788 qid_t chid[MAXQUOTAS]; 1805 struct dquot *transfer_to[MAXQUOTAS] = {};
1789 unsigned long mask = 0; 1806 struct super_block *sb = inode->i_sb;
1807 int ret;
1790 1808
1791 if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) { 1809 if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode))
1792 mask |= 1 << USRQUOTA; 1810 return 0;
1793 chid[USRQUOTA] = iattr->ia_uid; 1811
1794 } 1812 if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
1795 if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) { 1813 transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA);
1796 mask |= 1 << GRPQUOTA; 1814 if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)
1797 chid[GRPQUOTA] = iattr->ia_gid; 1815 transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA);
1798 } 1816
1799 if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) { 1817 ret = __dquot_transfer(inode, transfer_to);
1800 dquot_initialize(inode); 1818 dqput_all(transfer_to);
1801 return __dquot_transfer(inode, chid, mask); 1819 return ret;
1802 }
1803 return 0;
1804} 1820}
1805EXPORT_SYMBOL(dquot_transfer); 1821EXPORT_SYMBOL(dquot_transfer);
1806 1822
@@ -2275,25 +2291,30 @@ static inline qsize_t stoqb(qsize_t space)
2275} 2291}
2276 2292
2277/* Generic routine for getting common part of quota structure */ 2293/* Generic routine for getting common part of quota structure */
2278static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) 2294static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2279{ 2295{
2280 struct mem_dqblk *dm = &dquot->dq_dqb; 2296 struct mem_dqblk *dm = &dquot->dq_dqb;
2281 2297
2298 memset(di, 0, sizeof(*di));
2299 di->d_version = FS_DQUOT_VERSION;
2300 di->d_flags = dquot->dq_type == USRQUOTA ?
2301 XFS_USER_QUOTA : XFS_GROUP_QUOTA;
2302 di->d_id = dquot->dq_id;
2303
2282 spin_lock(&dq_data_lock); 2304 spin_lock(&dq_data_lock);
2283 di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit); 2305 di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
2284 di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit); 2306 di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
2285 di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace; 2307 di->d_ino_hardlimit = dm->dqb_ihardlimit;
2286 di->dqb_ihardlimit = dm->dqb_ihardlimit; 2308 di->d_ino_softlimit = dm->dqb_isoftlimit;
2287 di->dqb_isoftlimit = dm->dqb_isoftlimit; 2309 di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace;
2288 di->dqb_curinodes = dm->dqb_curinodes; 2310 di->d_icount = dm->dqb_curinodes;
2289 di->dqb_btime = dm->dqb_btime; 2311 di->d_btimer = dm->dqb_btime;
2290 di->dqb_itime = dm->dqb_itime; 2312 di->d_itimer = dm->dqb_itime;
2291 di->dqb_valid = QIF_ALL;
2292 spin_unlock(&dq_data_lock); 2313 spin_unlock(&dq_data_lock);
2293} 2314}
2294 2315
2295int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, 2316int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
2296 struct if_dqblk *di) 2317 struct fs_disk_quota *di)
2297{ 2318{
2298 struct dquot *dquot; 2319 struct dquot *dquot;
2299 2320
@@ -2307,51 +2328,70 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
2307} 2328}
2308EXPORT_SYMBOL(vfs_get_dqblk); 2329EXPORT_SYMBOL(vfs_get_dqblk);
2309 2330
2331#define VFS_FS_DQ_MASK \
2332 (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \
2333 FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \
2334 FS_DQ_BTIMER | FS_DQ_ITIMER)
2335
2310/* Generic routine for setting common part of quota structure */ 2336/* Generic routine for setting common part of quota structure */
2311static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) 2337static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2312{ 2338{
2313 struct mem_dqblk *dm = &dquot->dq_dqb; 2339 struct mem_dqblk *dm = &dquot->dq_dqb;
2314 int check_blim = 0, check_ilim = 0; 2340 int check_blim = 0, check_ilim = 0;
2315 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; 2341 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
2316 2342
2317 if ((di->dqb_valid & QIF_BLIMITS && 2343 if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
2318 (di->dqb_bhardlimit > dqi->dqi_maxblimit || 2344 return -EINVAL;
2319 di->dqb_bsoftlimit > dqi->dqi_maxblimit)) || 2345
2320 (di->dqb_valid & QIF_ILIMITS && 2346 if (((di->d_fieldmask & FS_DQ_BSOFT) &&
2321 (di->dqb_ihardlimit > dqi->dqi_maxilimit || 2347 (di->d_blk_softlimit > dqi->dqi_maxblimit)) ||
2322 di->dqb_isoftlimit > dqi->dqi_maxilimit))) 2348 ((di->d_fieldmask & FS_DQ_BHARD) &&
2349 (di->d_blk_hardlimit > dqi->dqi_maxblimit)) ||
2350 ((di->d_fieldmask & FS_DQ_ISOFT) &&
2351 (di->d_ino_softlimit > dqi->dqi_maxilimit)) ||
2352 ((di->d_fieldmask & FS_DQ_IHARD) &&
2353 (di->d_ino_hardlimit > dqi->dqi_maxilimit)))
2323 return -ERANGE; 2354 return -ERANGE;
2324 2355
2325 spin_lock(&dq_data_lock); 2356 spin_lock(&dq_data_lock);
2326 if (di->dqb_valid & QIF_SPACE) { 2357 if (di->d_fieldmask & FS_DQ_BCOUNT) {
2327 dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace; 2358 dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
2328 check_blim = 1; 2359 check_blim = 1;
2329 set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); 2360 set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
2330 } 2361 }
2331 if (di->dqb_valid & QIF_BLIMITS) { 2362
2332 dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit); 2363 if (di->d_fieldmask & FS_DQ_BSOFT)
2333 dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit); 2364 dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit);
2365 if (di->d_fieldmask & FS_DQ_BHARD)
2366 dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit);
2367 if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) {
2334 check_blim = 1; 2368 check_blim = 1;
2335 set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); 2369 set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
2336 } 2370 }
2337 if (di->dqb_valid & QIF_INODES) { 2371
2338 dm->dqb_curinodes = di->dqb_curinodes; 2372 if (di->d_fieldmask & FS_DQ_ICOUNT) {
2373 dm->dqb_curinodes = di->d_icount;
2339 check_ilim = 1; 2374 check_ilim = 1;
2340 set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); 2375 set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
2341 } 2376 }
2342 if (di->dqb_valid & QIF_ILIMITS) { 2377
2343 dm->dqb_isoftlimit = di->dqb_isoftlimit; 2378 if (di->d_fieldmask & FS_DQ_ISOFT)
2344 dm->dqb_ihardlimit = di->dqb_ihardlimit; 2379 dm->dqb_isoftlimit = di->d_ino_softlimit;
2380 if (di->d_fieldmask & FS_DQ_IHARD)
2381 dm->dqb_ihardlimit = di->d_ino_hardlimit;
2382 if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) {
2345 check_ilim = 1; 2383 check_ilim = 1;
2346 set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); 2384 set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
2347 } 2385 }
2348 if (di->dqb_valid & QIF_BTIME) { 2386
2349 dm->dqb_btime = di->dqb_btime; 2387 if (di->d_fieldmask & FS_DQ_BTIMER) {
2388 dm->dqb_btime = di->d_btimer;
2350 check_blim = 1; 2389 check_blim = 1;
2351 set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); 2390 set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
2352 } 2391 }
2353 if (di->dqb_valid & QIF_ITIME) { 2392
2354 dm->dqb_itime = di->dqb_itime; 2393 if (di->d_fieldmask & FS_DQ_ITIMER) {
2394 dm->dqb_itime = di->d_itimer;
2355 check_ilim = 1; 2395 check_ilim = 1;
2356 set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); 2396 set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
2357 } 2397 }
@@ -2361,7 +2401,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
2361 dm->dqb_curspace < dm->dqb_bsoftlimit) { 2401 dm->dqb_curspace < dm->dqb_bsoftlimit) {
2362 dm->dqb_btime = 0; 2402 dm->dqb_btime = 0;
2363 clear_bit(DQ_BLKS_B, &dquot->dq_flags); 2403 clear_bit(DQ_BLKS_B, &dquot->dq_flags);
2364 } else if (!(di->dqb_valid & QIF_BTIME)) 2404 } else if (!(di->d_fieldmask & FS_DQ_BTIMER))
2365 /* Set grace only if user hasn't provided his own... */ 2405 /* Set grace only if user hasn't provided his own... */
2366 dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; 2406 dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
2367 } 2407 }
@@ -2370,7 +2410,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
2370 dm->dqb_curinodes < dm->dqb_isoftlimit) { 2410 dm->dqb_curinodes < dm->dqb_isoftlimit) {
2371 dm->dqb_itime = 0; 2411 dm->dqb_itime = 0;
2372 clear_bit(DQ_INODES_B, &dquot->dq_flags); 2412 clear_bit(DQ_INODES_B, &dquot->dq_flags);
2373 } else if (!(di->dqb_valid & QIF_ITIME)) 2413 } else if (!(di->d_fieldmask & FS_DQ_ITIMER))
2374 /* Set grace only if user hasn't provided his own... */ 2414 /* Set grace only if user hasn't provided his own... */
2375 dm->dqb_itime = get_seconds() + dqi->dqi_igrace; 2415 dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
2376 } 2416 }
@@ -2386,7 +2426,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
2386} 2426}
2387 2427
2388int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, 2428int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
2389 struct if_dqblk *di) 2429 struct fs_disk_quota *di)
2390{ 2430{
2391 struct dquot *dquot; 2431 struct dquot *dquot;
2392 int rc; 2432 int rc;
@@ -2465,62 +2505,74 @@ const struct quotactl_ops vfs_quotactl_ops = {
2465 .set_dqblk = vfs_set_dqblk 2505 .set_dqblk = vfs_set_dqblk
2466}; 2506};
2467 2507
2508
2509static int do_proc_dqstats(struct ctl_table *table, int write,
2510 void __user *buffer, size_t *lenp, loff_t *ppos)
2511{
2512#ifdef CONFIG_SMP
2513 /* Update global table */
2514 unsigned int type = (int *)table->data - dqstats.stat;
2515 dqstats.stat[type] = dqstats_read(type);
2516#endif
2517 return proc_dointvec(table, write, buffer, lenp, ppos);
2518}
2519
2468static ctl_table fs_dqstats_table[] = { 2520static ctl_table fs_dqstats_table[] = {
2469 { 2521 {
2470 .procname = "lookups", 2522 .procname = "lookups",
2471 .data = &dqstats.lookups, 2523 .data = &dqstats.stat[DQST_LOOKUPS],
2472 .maxlen = sizeof(int), 2524 .maxlen = sizeof(int),
2473 .mode = 0444, 2525 .mode = 0444,
2474 .proc_handler = proc_dointvec, 2526 .proc_handler = do_proc_dqstats,
2475 }, 2527 },
2476 { 2528 {
2477 .procname = "drops", 2529 .procname = "drops",
2478 .data = &dqstats.drops, 2530 .data = &dqstats.stat[DQST_DROPS],
2479 .maxlen = sizeof(int), 2531 .maxlen = sizeof(int),
2480 .mode = 0444, 2532 .mode = 0444,
2481 .proc_handler = proc_dointvec, 2533 .proc_handler = do_proc_dqstats,
2482 }, 2534 },
2483 { 2535 {
2484 .procname = "reads", 2536 .procname = "reads",
2485 .data = &dqstats.reads, 2537 .data = &dqstats.stat[DQST_READS],
2486 .maxlen = sizeof(int), 2538 .maxlen = sizeof(int),
2487 .mode = 0444, 2539 .mode = 0444,
2488 .proc_handler = proc_dointvec, 2540 .proc_handler = do_proc_dqstats,
2489 }, 2541 },
2490 { 2542 {
2491 .procname = "writes", 2543 .procname = "writes",
2492 .data = &dqstats.writes, 2544 .data = &dqstats.stat[DQST_WRITES],
2493 .maxlen = sizeof(int), 2545 .maxlen = sizeof(int),
2494 .mode = 0444, 2546 .mode = 0444,
2495 .proc_handler = proc_dointvec, 2547 .proc_handler = do_proc_dqstats,
2496 }, 2548 },
2497 { 2549 {
2498 .procname = "cache_hits", 2550 .procname = "cache_hits",
2499 .data = &dqstats.cache_hits, 2551 .data = &dqstats.stat[DQST_CACHE_HITS],
2500 .maxlen = sizeof(int), 2552 .maxlen = sizeof(int),
2501 .mode = 0444, 2553 .mode = 0444,
2502 .proc_handler = proc_dointvec, 2554 .proc_handler = do_proc_dqstats,
2503 }, 2555 },
2504 { 2556 {
2505 .procname = "allocated_dquots", 2557 .procname = "allocated_dquots",
2506 .data = &dqstats.allocated_dquots, 2558 .data = &dqstats.stat[DQST_ALLOC_DQUOTS],
2507 .maxlen = sizeof(int), 2559 .maxlen = sizeof(int),
2508 .mode = 0444, 2560 .mode = 0444,
2509 .proc_handler = proc_dointvec, 2561 .proc_handler = do_proc_dqstats,
2510 }, 2562 },
2511 { 2563 {
2512 .procname = "free_dquots", 2564 .procname = "free_dquots",
2513 .data = &dqstats.free_dquots, 2565 .data = &dqstats.stat[DQST_FREE_DQUOTS],
2514 .maxlen = sizeof(int), 2566 .maxlen = sizeof(int),
2515 .mode = 0444, 2567 .mode = 0444,
2516 .proc_handler = proc_dointvec, 2568 .proc_handler = do_proc_dqstats,
2517 }, 2569 },
2518 { 2570 {
2519 .procname = "syncs", 2571 .procname = "syncs",
2520 .data = &dqstats.syncs, 2572 .data = &dqstats.stat[DQST_SYNCS],
2521 .maxlen = sizeof(int), 2573 .maxlen = sizeof(int),
2522 .mode = 0444, 2574 .mode = 0444,
2523 .proc_handler = proc_dointvec, 2575 .proc_handler = do_proc_dqstats,
2524 }, 2576 },
2525#ifdef CONFIG_PRINT_QUOTA_WARNING 2577#ifdef CONFIG_PRINT_QUOTA_WARNING
2526 { 2578 {
@@ -2572,6 +2624,13 @@ static int __init dquot_init(void)
2572 if (!dquot_hash) 2624 if (!dquot_hash)
2573 panic("Cannot create dquot hash table"); 2625 panic("Cannot create dquot hash table");
2574 2626
2627#ifdef CONFIG_SMP
2628 dqstats_pcpu = alloc_percpu(struct dqstats);
2629 if (!dqstats_pcpu)
2630 panic("Cannot create dquot stats table");
2631#endif
2632 memset(&dqstats, 0, sizeof(struct dqstats));
2633
2575 /* Find power-of-two hlist_heads which can fit into allocation */ 2634 /* Find power-of-two hlist_heads which can fit into allocation */
2576 nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); 2635 nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
2577 dq_hash_bits = 0; 2636 dq_hash_bits = 0;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 95388f9b7356..ce3dfd066f59 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -45,36 +45,22 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
45 return security_quotactl(cmd, type, id, sb); 45 return security_quotactl(cmd, type, id, sb);
46} 46}
47 47
48static void quota_sync_one(struct super_block *sb, void *arg)
49{
50 if (sb->s_qcop && sb->s_qcop->quota_sync)
51 sb->s_qcop->quota_sync(sb, *(int *)arg, 1);
52}
53
48static int quota_sync_all(int type) 54static int quota_sync_all(int type)
49{ 55{
50 struct super_block *sb;
51 int ret; 56 int ret;
52 57
53 if (type >= MAXQUOTAS) 58 if (type >= MAXQUOTAS)
54 return -EINVAL; 59 return -EINVAL;
55 ret = security_quotactl(Q_SYNC, type, 0, NULL); 60 ret = security_quotactl(Q_SYNC, type, 0, NULL);
56 if (ret) 61 if (!ret)
57 return ret; 62 iterate_supers(quota_sync_one, &type);
58 63 return ret;
59 spin_lock(&sb_lock);
60restart:
61 list_for_each_entry(sb, &super_blocks, s_list) {
62 if (!sb->s_qcop || !sb->s_qcop->quota_sync)
63 continue;
64
65 sb->s_count++;
66 spin_unlock(&sb_lock);
67 down_read(&sb->s_umount);
68 if (sb->s_root)
69 sb->s_qcop->quota_sync(sb, type, 1);
70 up_read(&sb->s_umount);
71 spin_lock(&sb_lock);
72 if (__put_super_and_need_restart(sb))
73 goto restart;
74 }
75 spin_unlock(&sb_lock);
76
77 return 0;
78} 64}
79 65
80static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, 66static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
@@ -113,8 +99,6 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
113 struct if_dqinfo info; 99 struct if_dqinfo info;
114 int ret; 100 int ret;
115 101
116 if (!sb_has_quota_active(sb, type))
117 return -ESRCH;
118 if (!sb->s_qcop->get_info) 102 if (!sb->s_qcop->get_info)
119 return -ENOSYS; 103 return -ENOSYS;
120 ret = sb->s_qcop->get_info(sb, type, &info); 104 ret = sb->s_qcop->get_info(sb, type, &info);
@@ -129,43 +113,80 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
129 113
130 if (copy_from_user(&info, addr, sizeof(info))) 114 if (copy_from_user(&info, addr, sizeof(info)))
131 return -EFAULT; 115 return -EFAULT;
132 if (!sb_has_quota_active(sb, type))
133 return -ESRCH;
134 if (!sb->s_qcop->set_info) 116 if (!sb->s_qcop->set_info)
135 return -ENOSYS; 117 return -ENOSYS;
136 return sb->s_qcop->set_info(sb, type, &info); 118 return sb->s_qcop->set_info(sb, type, &info);
137} 119}
138 120
121static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src)
122{
123 dst->dqb_bhardlimit = src->d_blk_hardlimit;
124 dst->dqb_bsoftlimit = src->d_blk_softlimit;
125 dst->dqb_curspace = src->d_bcount;
126 dst->dqb_ihardlimit = src->d_ino_hardlimit;
127 dst->dqb_isoftlimit = src->d_ino_softlimit;
128 dst->dqb_curinodes = src->d_icount;
129 dst->dqb_btime = src->d_btimer;
130 dst->dqb_itime = src->d_itimer;
131 dst->dqb_valid = QIF_ALL;
132}
133
139static int quota_getquota(struct super_block *sb, int type, qid_t id, 134static int quota_getquota(struct super_block *sb, int type, qid_t id,
140 void __user *addr) 135 void __user *addr)
141{ 136{
137 struct fs_disk_quota fdq;
142 struct if_dqblk idq; 138 struct if_dqblk idq;
143 int ret; 139 int ret;
144 140
145 if (!sb_has_quota_active(sb, type))
146 return -ESRCH;
147 if (!sb->s_qcop->get_dqblk) 141 if (!sb->s_qcop->get_dqblk)
148 return -ENOSYS; 142 return -ENOSYS;
149 ret = sb->s_qcop->get_dqblk(sb, type, id, &idq); 143 ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
150 if (ret) 144 if (ret)
151 return ret; 145 return ret;
146 copy_to_if_dqblk(&idq, &fdq);
152 if (copy_to_user(addr, &idq, sizeof(idq))) 147 if (copy_to_user(addr, &idq, sizeof(idq)))
153 return -EFAULT; 148 return -EFAULT;
154 return 0; 149 return 0;
155} 150}
156 151
152static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src)
153{
154 dst->d_blk_hardlimit = src->dqb_bhardlimit;
155 dst->d_blk_softlimit = src->dqb_bsoftlimit;
156 dst->d_bcount = src->dqb_curspace;
157 dst->d_ino_hardlimit = src->dqb_ihardlimit;
158 dst->d_ino_softlimit = src->dqb_isoftlimit;
159 dst->d_icount = src->dqb_curinodes;
160 dst->d_btimer = src->dqb_btime;
161 dst->d_itimer = src->dqb_itime;
162
163 dst->d_fieldmask = 0;
164 if (src->dqb_valid & QIF_BLIMITS)
165 dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD;
166 if (src->dqb_valid & QIF_SPACE)
167 dst->d_fieldmask |= FS_DQ_BCOUNT;
168 if (src->dqb_valid & QIF_ILIMITS)
169 dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD;
170 if (src->dqb_valid & QIF_INODES)
171 dst->d_fieldmask |= FS_DQ_ICOUNT;
172 if (src->dqb_valid & QIF_BTIME)
173 dst->d_fieldmask |= FS_DQ_BTIMER;
174 if (src->dqb_valid & QIF_ITIME)
175 dst->d_fieldmask |= FS_DQ_ITIMER;
176}
177
157static int quota_setquota(struct super_block *sb, int type, qid_t id, 178static int quota_setquota(struct super_block *sb, int type, qid_t id,
158 void __user *addr) 179 void __user *addr)
159{ 180{
181 struct fs_disk_quota fdq;
160 struct if_dqblk idq; 182 struct if_dqblk idq;
161 183
162 if (copy_from_user(&idq, addr, sizeof(idq))) 184 if (copy_from_user(&idq, addr, sizeof(idq)))
163 return -EFAULT; 185 return -EFAULT;
164 if (!sb_has_quota_active(sb, type))
165 return -ESRCH;
166 if (!sb->s_qcop->set_dqblk) 186 if (!sb->s_qcop->set_dqblk)
167 return -ENOSYS; 187 return -ENOSYS;
168 return sb->s_qcop->set_dqblk(sb, type, id, &idq); 188 copy_from_if_dqblk(&fdq, &idq);
189 return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
169} 190}
170 191
171static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) 192static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr)
@@ -199,9 +220,9 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
199 220
200 if (copy_from_user(&fdq, addr, sizeof(fdq))) 221 if (copy_from_user(&fdq, addr, sizeof(fdq)))
201 return -EFAULT; 222 return -EFAULT;
202 if (!sb->s_qcop->set_xquota) 223 if (!sb->s_qcop->set_dqblk)
203 return -ENOSYS; 224 return -ENOSYS;
204 return sb->s_qcop->set_xquota(sb, type, id, &fdq); 225 return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
205} 226}
206 227
207static int quota_getxquota(struct super_block *sb, int type, qid_t id, 228static int quota_getxquota(struct super_block *sb, int type, qid_t id,
@@ -210,9 +231,9 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
210 struct fs_disk_quota fdq; 231 struct fs_disk_quota fdq;
211 int ret; 232 int ret;
212 233
213 if (!sb->s_qcop->get_xquota) 234 if (!sb->s_qcop->get_dqblk)
214 return -ENOSYS; 235 return -ENOSYS;
215 ret = sb->s_qcop->get_xquota(sb, type, id, &fdq); 236 ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
216 if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) 237 if (!ret && copy_to_user(addr, &fdq, sizeof(fdq)))
217 return -EFAULT; 238 return -EFAULT;
218 return ret; 239 return ret;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index f81f4bcfb178..24f03407eeb5 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -60,9 +60,17 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
60static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) 60static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
61{ 61{
62 struct super_block *sb = info->dqi_sb; 62 struct super_block *sb = info->dqi_sb;
63 ssize_t ret;
63 64
64 return sb->s_op->quota_write(sb, info->dqi_type, buf, 65 ret = sb->s_op->quota_write(sb, info->dqi_type, buf,
65 info->dqi_usable_bs, blk << info->dqi_blocksize_bits); 66 info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
67 if (ret != info->dqi_usable_bs) {
68 q_warn(KERN_WARNING "VFS: dquota write failed on "
69 "dev %s\n", sb->s_id);
70 if (ret >= 0)
71 ret = -EIO;
72 }
73 return ret;
66} 74}
67 75
68/* Remove empty block from list and return it */ 76/* Remove empty block from list and return it */
@@ -152,7 +160,7 @@ static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf,
152 dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); 160 dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
153 /* No matter whether write succeeds block is out of list */ 161 /* No matter whether write succeeds block is out of list */
154 if (write_blk(info, blk, buf) < 0) 162 if (write_blk(info, blk, buf) < 0)
155 printk(KERN_ERR 163 q_warn(KERN_ERR
156 "VFS: Can't write block (%u) with free entries.\n", 164 "VFS: Can't write block (%u) with free entries.\n",
157 blk); 165 blk);
158 return 0; 166 return 0;
@@ -244,7 +252,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
244 if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { 252 if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
245 *err = remove_free_dqentry(info, buf, blk); 253 *err = remove_free_dqentry(info, buf, blk);
246 if (*err < 0) { 254 if (*err < 0) {
247 printk(KERN_ERR "VFS: find_free_dqentry(): Can't " 255 q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't "
248 "remove block (%u) from entry free list.\n", 256 "remove block (%u) from entry free list.\n",
249 blk); 257 blk);
250 goto out_buf; 258 goto out_buf;
@@ -268,7 +276,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
268#endif 276#endif
269 *err = write_blk(info, blk, buf); 277 *err = write_blk(info, blk, buf);
270 if (*err < 0) { 278 if (*err < 0) {
271 printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " 279 q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't write quota "
272 "data block %u.\n", blk); 280 "data block %u.\n", blk);
273 goto out_buf; 281 goto out_buf;
274 } 282 }
@@ -303,7 +311,7 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
303 } else { 311 } else {
304 ret = read_blk(info, *treeblk, buf); 312 ret = read_blk(info, *treeblk, buf);
305 if (ret < 0) { 313 if (ret < 0) {
306 printk(KERN_ERR "VFS: Can't read tree quota block " 314 q_warn(KERN_ERR "VFS: Can't read tree quota block "
307 "%u.\n", *treeblk); 315 "%u.\n", *treeblk);
308 goto out_buf; 316 goto out_buf;
309 } 317 }
@@ -365,7 +373,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
365 if (!dquot->dq_off) { 373 if (!dquot->dq_off) {
366 ret = dq_insert_tree(info, dquot); 374 ret = dq_insert_tree(info, dquot);
367 if (ret < 0) { 375 if (ret < 0) {
368 printk(KERN_ERR "VFS: Error %zd occurred while " 376 q_warn(KERN_ERR "VFS: Error %zd occurred while "
369 "creating quota.\n", ret); 377 "creating quota.\n", ret);
370 kfree(ddquot); 378 kfree(ddquot);
371 return ret; 379 return ret;
@@ -377,14 +385,14 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
377 ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, 385 ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
378 dquot->dq_off); 386 dquot->dq_off);
379 if (ret != info->dqi_entry_size) { 387 if (ret != info->dqi_entry_size) {
380 printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", 388 q_warn(KERN_WARNING "VFS: dquota write failed on dev %s\n",
381 sb->s_id); 389 sb->s_id);
382 if (ret >= 0) 390 if (ret >= 0)
383 ret = -ENOSPC; 391 ret = -ENOSPC;
384 } else { 392 } else {
385 ret = 0; 393 ret = 0;
386 } 394 }
387 dqstats.writes++; 395 dqstats_inc(DQST_WRITES);
388 kfree(ddquot); 396 kfree(ddquot);
389 397
390 return ret; 398 return ret;
@@ -402,14 +410,14 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
402 if (!buf) 410 if (!buf)
403 return -ENOMEM; 411 return -ENOMEM;
404 if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { 412 if (dquot->dq_off >> info->dqi_blocksize_bits != blk) {
405 printk(KERN_ERR "VFS: Quota structure has offset to other " 413 q_warn(KERN_ERR "VFS: Quota structure has offset to other "
406 "block (%u) than it should (%u).\n", blk, 414 "block (%u) than it should (%u).\n", blk,
407 (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); 415 (uint)(dquot->dq_off >> info->dqi_blocksize_bits));
408 goto out_buf; 416 goto out_buf;
409 } 417 }
410 ret = read_blk(info, blk, buf); 418 ret = read_blk(info, blk, buf);
411 if (ret < 0) { 419 if (ret < 0) {
412 printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk); 420 q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
413 goto out_buf; 421 goto out_buf;
414 } 422 }
415 dh = (struct qt_disk_dqdbheader *)buf; 423 dh = (struct qt_disk_dqdbheader *)buf;
@@ -419,7 +427,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
419 if (ret >= 0) 427 if (ret >= 0)
420 ret = put_free_dqblk(info, buf, blk); 428 ret = put_free_dqblk(info, buf, blk);
421 if (ret < 0) { 429 if (ret < 0) {
422 printk(KERN_ERR "VFS: Can't move quota data block (%u) " 430 q_warn(KERN_ERR "VFS: Can't move quota data block (%u) "
423 "to free list.\n", blk); 431 "to free list.\n", blk);
424 goto out_buf; 432 goto out_buf;
425 } 433 }
@@ -432,14 +440,14 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
432 /* Insert will write block itself */ 440 /* Insert will write block itself */
433 ret = insert_free_dqentry(info, buf, blk); 441 ret = insert_free_dqentry(info, buf, blk);
434 if (ret < 0) { 442 if (ret < 0) {
435 printk(KERN_ERR "VFS: Can't insert quota data " 443 q_warn(KERN_ERR "VFS: Can't insert quota data "
436 "block (%u) to free entry list.\n", blk); 444 "block (%u) to free entry list.\n", blk);
437 goto out_buf; 445 goto out_buf;
438 } 446 }
439 } else { 447 } else {
440 ret = write_blk(info, blk, buf); 448 ret = write_blk(info, blk, buf);
441 if (ret < 0) { 449 if (ret < 0) {
442 printk(KERN_ERR "VFS: Can't write quota data " 450 q_warn(KERN_ERR "VFS: Can't write quota data "
443 "block %u\n", blk); 451 "block %u\n", blk);
444 goto out_buf; 452 goto out_buf;
445 } 453 }
@@ -464,7 +472,7 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
464 return -ENOMEM; 472 return -ENOMEM;
465 ret = read_blk(info, *blk, buf); 473 ret = read_blk(info, *blk, buf);
466 if (ret < 0) { 474 if (ret < 0) {
467 printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); 475 q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
468 goto out_buf; 476 goto out_buf;
469 } 477 }
470 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); 478 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
@@ -488,7 +496,7 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
488 } else { 496 } else {
489 ret = write_blk(info, *blk, buf); 497 ret = write_blk(info, *blk, buf);
490 if (ret < 0) 498 if (ret < 0)
491 printk(KERN_ERR "VFS: Can't write quota tree " 499 q_warn(KERN_ERR "VFS: Can't write quota tree "
492 "block %u.\n", *blk); 500 "block %u.\n", *blk);
493 } 501 }
494 } 502 }
@@ -521,7 +529,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
521 return -ENOMEM; 529 return -ENOMEM;
522 ret = read_blk(info, blk, buf); 530 ret = read_blk(info, blk, buf);
523 if (ret < 0) { 531 if (ret < 0) {
524 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); 532 q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
525 goto out_buf; 533 goto out_buf;
526 } 534 }
527 ddquot = buf + sizeof(struct qt_disk_dqdbheader); 535 ddquot = buf + sizeof(struct qt_disk_dqdbheader);
@@ -531,7 +539,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
531 ddquot += info->dqi_entry_size; 539 ddquot += info->dqi_entry_size;
532 } 540 }
533 if (i == qtree_dqstr_in_blk(info)) { 541 if (i == qtree_dqstr_in_blk(info)) {
534 printk(KERN_ERR "VFS: Quota for id %u referenced " 542 q_warn(KERN_ERR "VFS: Quota for id %u referenced "
535 "but not present.\n", dquot->dq_id); 543 "but not present.\n", dquot->dq_id);
536 ret = -EIO; 544 ret = -EIO;
537 goto out_buf; 545 goto out_buf;
@@ -556,7 +564,7 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
556 return -ENOMEM; 564 return -ENOMEM;
557 ret = read_blk(info, blk, buf); 565 ret = read_blk(info, blk, buf);
558 if (ret < 0) { 566 if (ret < 0) {
559 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); 567 q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
560 goto out_buf; 568 goto out_buf;
561 } 569 }
562 ret = 0; 570 ret = 0;
@@ -599,7 +607,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
599 offset = find_dqentry(info, dquot); 607 offset = find_dqentry(info, dquot);
600 if (offset <= 0) { /* Entry not present? */ 608 if (offset <= 0) { /* Entry not present? */
601 if (offset < 0) 609 if (offset < 0)
602 printk(KERN_ERR "VFS: Can't read quota " 610 q_warn(KERN_ERR "VFS: Can't read quota "
603 "structure for id %u.\n", dquot->dq_id); 611 "structure for id %u.\n", dquot->dq_id);
604 dquot->dq_off = 0; 612 dquot->dq_off = 0;
605 set_bit(DQ_FAKE_B, &dquot->dq_flags); 613 set_bit(DQ_FAKE_B, &dquot->dq_flags);
@@ -617,7 +625,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
617 if (ret != info->dqi_entry_size) { 625 if (ret != info->dqi_entry_size) {
618 if (ret >= 0) 626 if (ret >= 0)
619 ret = -EIO; 627 ret = -EIO;
620 printk(KERN_ERR "VFS: Error while reading quota " 628 q_warn(KERN_ERR "VFS: Error while reading quota "
621 "structure for id %u.\n", dquot->dq_id); 629 "structure for id %u.\n", dquot->dq_id);
622 set_bit(DQ_FAKE_B, &dquot->dq_flags); 630 set_bit(DQ_FAKE_B, &dquot->dq_flags);
623 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); 631 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
@@ -634,7 +642,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
634 spin_unlock(&dq_data_lock); 642 spin_unlock(&dq_data_lock);
635 kfree(ddquot); 643 kfree(ddquot);
636out: 644out:
637 dqstats.reads++; 645 dqstats_inc(DQST_READS);
638 return ret; 646 return ret;
639} 647}
640EXPORT_SYMBOL(qtree_read_dquot); 648EXPORT_SYMBOL(qtree_read_dquot);
diff --git a/fs/quota/quota_tree.h b/fs/quota/quota_tree.h
index a1ab8db81a51..ccc3e71fb1d8 100644
--- a/fs/quota/quota_tree.h
+++ b/fs/quota/quota_tree.h
@@ -22,4 +22,10 @@ struct qt_disk_dqdbheader {
22 22
23#define QT_TREEOFF 1 /* Offset of tree in file in blocks */ 23#define QT_TREEOFF 1 /* Offset of tree in file in blocks */
24 24
25#define q_warn(fmt, args...) \
26do { \
27 if (printk_ratelimit()) \
28 printk(fmt, ## args); \
29} while(0)
30
25#endif /* _LINUX_QUOTAIO_TREE_H */ 31#endif /* _LINUX_QUOTAIO_TREE_H */
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 2ae757e9c008..4af344c5852a 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
71 dquot->dq_dqb.dqb_ihardlimit == 0 && 71 dquot->dq_dqb.dqb_ihardlimit == 0 &&
72 dquot->dq_dqb.dqb_isoftlimit == 0) 72 dquot->dq_dqb.dqb_isoftlimit == 0)
73 set_bit(DQ_FAKE_B, &dquot->dq_flags); 73 set_bit(DQ_FAKE_B, &dquot->dq_flags);
74 dqstats.reads++; 74 dqstats_inc(DQST_READS);
75 75
76 return 0; 76 return 0;
77} 77}
@@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
104 ret = 0; 104 ret = 0;
105 105
106out: 106out:
107 dqstats.writes++; 107 dqstats_inc(DQST_WRITES);
108 108
109 return ret; 109 return ret;
110} 110}
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index e3da02f4986f..135206af1458 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -63,7 +63,7 @@ static int v2_read_header(struct super_block *sb, int type,
63 size = sb->s_op->quota_read(sb, type, (char *)dqhead, 63 size = sb->s_op->quota_read(sb, type, (char *)dqhead,
64 sizeof(struct v2_disk_dqheader), 0); 64 sizeof(struct v2_disk_dqheader), 0);
65 if (size != sizeof(struct v2_disk_dqheader)) { 65 if (size != sizeof(struct v2_disk_dqheader)) {
66 printk(KERN_WARNING "quota_v2: Failed header read:" 66 q_warn(KERN_WARNING "quota_v2: Failed header read:"
67 " expected=%zd got=%zd\n", 67 " expected=%zd got=%zd\n",
68 sizeof(struct v2_disk_dqheader), size); 68 sizeof(struct v2_disk_dqheader), size);
69 return 0; 69 return 0;
@@ -106,7 +106,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
106 size = sb->s_op->quota_read(sb, type, (char *)&dinfo, 106 size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
107 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 107 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
108 if (size != sizeof(struct v2_disk_dqinfo)) { 108 if (size != sizeof(struct v2_disk_dqinfo)) {
109 printk(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n", 109 q_warn(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n",
110 sb->s_id); 110 sb->s_id);
111 return -1; 111 return -1;
112 } 112 }
@@ -167,7 +167,7 @@ static int v2_write_file_info(struct super_block *sb, int type)
167 size = sb->s_op->quota_write(sb, type, (char *)&dinfo, 167 size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
168 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 168 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
169 if (size != sizeof(struct v2_disk_dqinfo)) { 169 if (size != sizeof(struct v2_disk_dqinfo)) {
170 printk(KERN_WARNING "Can't write info structure on device %s.\n", 170 q_warn(KERN_WARNING "Can't write info structure on device %s.\n",
171 sb->s_id); 171 sb->s_id);
172 return -1; 172 return -1;
173 } 173 }
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index c94853473ca9..a5ebae70dc6d 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -52,14 +52,13 @@ static struct backing_dev_info ramfs_backing_dev_info = {
52 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, 52 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP,
53}; 53};
54 54
55struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) 55struct inode *ramfs_get_inode(struct super_block *sb,
56 const struct inode *dir, int mode, dev_t dev)
56{ 57{
57 struct inode * inode = new_inode(sb); 58 struct inode * inode = new_inode(sb);
58 59
59 if (inode) { 60 if (inode) {
60 inode->i_mode = mode; 61 inode_init_owner(inode, dir, mode);
61 inode->i_uid = current_fsuid();
62 inode->i_gid = current_fsgid();
63 inode->i_mapping->a_ops = &ramfs_aops; 62 inode->i_mapping->a_ops = &ramfs_aops;
64 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; 63 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
65 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); 64 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
@@ -95,15 +94,10 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
95static int 94static int
96ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 95ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
97{ 96{
98 struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev); 97 struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
99 int error = -ENOSPC; 98 int error = -ENOSPC;
100 99
101 if (inode) { 100 if (inode) {
102 if (dir->i_mode & S_ISGID) {
103 inode->i_gid = dir->i_gid;
104 if (S_ISDIR(mode))
105 inode->i_mode |= S_ISGID;
106 }
107 d_instantiate(dentry, inode); 101 d_instantiate(dentry, inode);
108 dget(dentry); /* Extra count - pin the dentry in core */ 102 dget(dentry); /* Extra count - pin the dentry in core */
109 error = 0; 103 error = 0;
@@ -130,13 +124,11 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
130 struct inode *inode; 124 struct inode *inode;
131 int error = -ENOSPC; 125 int error = -ENOSPC;
132 126
133 inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); 127 inode = ramfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
134 if (inode) { 128 if (inode) {
135 int l = strlen(symname)+1; 129 int l = strlen(symname)+1;
136 error = page_symlink(inode, symname, l); 130 error = page_symlink(inode, symname, l);
137 if (!error) { 131 if (!error) {
138 if (dir->i_mode & S_ISGID)
139 inode->i_gid = dir->i_gid;
140 d_instantiate(dentry, inode); 132 d_instantiate(dentry, inode);
141 dget(dentry); 133 dget(dentry);
142 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 134 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
@@ -214,7 +206,7 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
214 return 0; 206 return 0;
215} 207}
216 208
217static int ramfs_fill_super(struct super_block * sb, void * data, int silent) 209int ramfs_fill_super(struct super_block *sb, void *data, int silent)
218{ 210{
219 struct ramfs_fs_info *fsi; 211 struct ramfs_fs_info *fsi;
220 struct inode *inode = NULL; 212 struct inode *inode = NULL;
@@ -241,7 +233,7 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
241 sb->s_op = &ramfs_ops; 233 sb->s_op = &ramfs_ops;
242 sb->s_time_gran = 1; 234 sb->s_time_gran = 1;
243 235
244 inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0); 236 inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
245 if (!inode) { 237 if (!inode) {
246 err = -ENOMEM; 238 err = -ENOMEM;
247 goto fail; 239 goto fail;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 1d9c12714c5c..9977df9f3a54 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -147,7 +147,8 @@ static int reiserfs_sync_file(struct file *filp,
147 barrier_done = reiserfs_commit_for_inode(inode); 147 barrier_done = reiserfs_commit_for_inode(inode);
148 reiserfs_write_unlock(inode->i_sb); 148 reiserfs_write_unlock(inode->i_sb);
149 if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) 149 if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
150 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 150 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
151 BLKDEV_IFL_WAIT);
151 if (barrier_done < 0) 152 if (barrier_done < 0)
152 return barrier_done; 153 return barrier_done;
153 return (err < 0) ? -EIO : 0; 154 return (err < 0) ? -EIO : 0;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index dc2c65e04853..0f22fdaf54ac 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3076,9 +3076,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3076 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3076 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3077 3077
3078 depth = reiserfs_write_lock_once(inode->i_sb); 3078 depth = reiserfs_write_lock_once(inode->i_sb);
3079 if (attr->ia_valid & ATTR_SIZE) { 3079 if (is_quota_modification(inode, attr))
3080 dquot_initialize(inode); 3080 dquot_initialize(inode);
3081 3081
3082 if (attr->ia_valid & ATTR_SIZE) {
3082 /* version 2 items will be caught by the s_maxbytes check 3083 /* version 2 items will be caught by the s_maxbytes check
3083 ** done for us in vmtruncate 3084 ** done for us in vmtruncate
3084 */ 3085 */
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index d0c43cb99ffc..ee78d4a0086a 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -561,23 +561,13 @@ static int drop_new_inode(struct inode *inode)
561*/ 561*/
562static int new_inode_init(struct inode *inode, struct inode *dir, int mode) 562static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
563{ 563{
564
565 /* the quota init calls have to know who to charge the quota to, so
566 ** we have to set uid and gid here
567 */
568 inode->i_uid = current_fsuid();
569 inode->i_mode = mode;
570 /* Make inode invalid - just in case we are going to drop it before 564 /* Make inode invalid - just in case we are going to drop it before
571 * the initialization happens */ 565 * the initialization happens */
572 INODE_PKEY(inode)->k_objectid = 0; 566 INODE_PKEY(inode)->k_objectid = 0;
573 567 /* the quota init calls have to know who to charge the quota to, so
574 if (dir->i_mode & S_ISGID) { 568 ** we have to set uid and gid here
575 inode->i_gid = dir->i_gid; 569 */
576 if (S_ISDIR(mode)) 570 inode_init_owner(inode, dir, mode);
577 inode->i_mode |= S_ISGID;
578 } else {
579 inode->i_gid = current_fsgid();
580 }
581 dquot_initialize(inode); 571 dquot_initialize(inode);
582 return 0; 572 return 0;
583} 573}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e7cc00e636dc..8c4cf273c672 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -723,11 +723,11 @@ out:
723 (handler) = *(handlers)++) 723 (handler) = *(handlers)++)
724 724
725/* This is the implementation for the xattr plugin infrastructure */ 725/* This is the implementation for the xattr plugin infrastructure */
726static inline struct xattr_handler * 726static inline const struct xattr_handler *
727find_xattr_handler_prefix(struct xattr_handler **handlers, 727find_xattr_handler_prefix(const struct xattr_handler **handlers,
728 const char *name) 728 const char *name)
729{ 729{
730 struct xattr_handler *xah; 730 const struct xattr_handler *xah;
731 731
732 if (!handlers) 732 if (!handlers)
733 return NULL; 733 return NULL;
@@ -748,7 +748,7 @@ ssize_t
748reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, 748reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
749 size_t size) 749 size_t size)
750{ 750{
751 struct xattr_handler *handler; 751 const struct xattr_handler *handler;
752 752
753 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); 753 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
754 754
@@ -767,7 +767,7 @@ int
767reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, 767reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
768 size_t size, int flags) 768 size_t size, int flags)
769{ 769{
770 struct xattr_handler *handler; 770 const struct xattr_handler *handler;
771 771
772 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); 772 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
773 773
@@ -784,7 +784,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
784 */ 784 */
785int reiserfs_removexattr(struct dentry *dentry, const char *name) 785int reiserfs_removexattr(struct dentry *dentry, const char *name)
786{ 786{
787 struct xattr_handler *handler; 787 const struct xattr_handler *handler;
788 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); 788 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
789 789
790 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) 790 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
@@ -807,7 +807,7 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
807 size_t size; 807 size_t size;
808 if (name[0] != '.' || 808 if (name[0] != '.' ||
809 (namelen != 1 && (name[1] != '.' || namelen != 2))) { 809 (namelen != 1 && (name[1] != '.' || namelen != 2))) {
810 struct xattr_handler *handler; 810 const struct xattr_handler *handler;
811 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr, 811 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
812 name); 812 name);
813 if (!handler) /* Unsupported xattr name */ 813 if (!handler) /* Unsupported xattr name */
@@ -920,7 +920,7 @@ static int create_privroot(struct dentry *dentry) { return 0; }
920#endif 920#endif
921 921
922/* Actual operations that are exported to VFS-land */ 922/* Actual operations that are exported to VFS-land */
923struct xattr_handler *reiserfs_xattr_handlers[] = { 923const struct xattr_handler *reiserfs_xattr_handlers[] = {
924#ifdef CONFIG_REISERFS_FS_XATTR 924#ifdef CONFIG_REISERFS_FS_XATTR
925 &reiserfs_xattr_user_handler, 925 &reiserfs_xattr_user_handler,
926 &reiserfs_xattr_trusted_handler, 926 &reiserfs_xattr_trusted_handler,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 9cdb759645a9..536d697a8a28 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -500,7 +500,7 @@ static size_t posix_acl_access_list(struct dentry *dentry, char *list,
500 return size; 500 return size;
501} 501}
502 502
503struct xattr_handler reiserfs_posix_acl_access_handler = { 503const struct xattr_handler reiserfs_posix_acl_access_handler = {
504 .prefix = POSIX_ACL_XATTR_ACCESS, 504 .prefix = POSIX_ACL_XATTR_ACCESS,
505 .flags = ACL_TYPE_ACCESS, 505 .flags = ACL_TYPE_ACCESS,
506 .get = posix_acl_get, 506 .get = posix_acl_get,
@@ -520,7 +520,7 @@ static size_t posix_acl_default_list(struct dentry *dentry, char *list,
520 return size; 520 return size;
521} 521}
522 522
523struct xattr_handler reiserfs_posix_acl_default_handler = { 523const struct xattr_handler reiserfs_posix_acl_default_handler = {
524 .prefix = POSIX_ACL_XATTR_DEFAULT, 524 .prefix = POSIX_ACL_XATTR_DEFAULT,
525 .flags = ACL_TYPE_DEFAULT, 525 .flags = ACL_TYPE_DEFAULT,
526 .get = posix_acl_get, 526 .get = posix_acl_get,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 7271a477c041..237c6928d3c6 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -111,7 +111,7 @@ void reiserfs_security_free(struct reiserfs_security_handle *sec)
111 sec->value = NULL; 111 sec->value = NULL;
112} 112}
113 113
114struct xattr_handler reiserfs_xattr_security_handler = { 114const struct xattr_handler reiserfs_xattr_security_handler = {
115 .prefix = XATTR_SECURITY_PREFIX, 115 .prefix = XATTR_SECURITY_PREFIX,
116 .get = security_get, 116 .get = security_get,
117 .set = security_set, 117 .set = security_set,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 5b08aaca3daf..9883736ce3ec 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -48,7 +48,7 @@ static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
48 return len; 48 return len;
49} 49}
50 50
51struct xattr_handler reiserfs_xattr_trusted_handler = { 51const struct xattr_handler reiserfs_xattr_trusted_handler = {
52 .prefix = XATTR_TRUSTED_PREFIX, 52 .prefix = XATTR_TRUSTED_PREFIX,
53 .get = trusted_get, 53 .get = trusted_get,
54 .set = trusted_set, 54 .set = trusted_set,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 75d59c49b911..45ae1a00013a 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -44,7 +44,7 @@ static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
44 return len; 44 return len;
45} 45}
46 46
47struct xattr_handler reiserfs_xattr_user_handler = { 47const struct xattr_handler reiserfs_xattr_user_handler = {
48 .prefix = XATTR_USER_PREFIX, 48 .prefix = XATTR_USER_PREFIX,
49 .get = user_get, 49 .get = user_get,
50 .set = user_set, 50 .set = user_set,
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 3e4803b4427e..6c978428892d 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -39,7 +39,7 @@ const struct file_operations smb_dir_operations =
39{ 39{
40 .read = generic_read_dir, 40 .read = generic_read_dir,
41 .readdir = smb_readdir, 41 .readdir = smb_readdir,
42 .ioctl = smb_ioctl, 42 .unlocked_ioctl = smb_ioctl,
43 .open = smb_dir_open, 43 .open = smb_dir_open,
44}; 44};
45 45
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index dbf6548bbf06..84ecf0e43f91 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -437,7 +437,7 @@ const struct file_operations smb_file_operations =
437 .aio_read = smb_file_aio_read, 437 .aio_read = smb_file_aio_read,
438 .write = do_sync_write, 438 .write = do_sync_write,
439 .aio_write = smb_file_aio_write, 439 .aio_write = smb_file_aio_write,
440 .ioctl = smb_ioctl, 440 .unlocked_ioctl = smb_ioctl,
441 .mmap = smb_file_mmap, 441 .mmap = smb_file_mmap,
442 .open = smb_file_open, 442 .open = smb_file_open,
443 .release = smb_file_release, 443 .release = smb_file_release,
diff --git a/fs/smbfs/ioctl.c b/fs/smbfs/ioctl.c
index dbae1f8ea26f..07215312ad39 100644
--- a/fs/smbfs/ioctl.c
+++ b/fs/smbfs/ioctl.c
@@ -13,6 +13,7 @@
13#include <linux/time.h> 13#include <linux/time.h>
14#include <linux/mm.h> 14#include <linux/mm.h>
15#include <linux/highuid.h> 15#include <linux/highuid.h>
16#include <linux/smp_lock.h>
16#include <linux/net.h> 17#include <linux/net.h>
17 18
18#include <linux/smb_fs.h> 19#include <linux/smb_fs.h>
@@ -22,14 +23,14 @@
22 23
23#include "proto.h" 24#include "proto.h"
24 25
25int 26long
26smb_ioctl(struct inode *inode, struct file *filp, 27smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
27 unsigned int cmd, unsigned long arg)
28{ 28{
29 struct smb_sb_info *server = server_from_inode(inode); 29 struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode);
30 struct smb_conn_opt opt; 30 struct smb_conn_opt opt;
31 int result = -EINVAL; 31 int result = -EINVAL;
32 32
33 lock_kernel();
33 switch (cmd) { 34 switch (cmd) {
34 uid16_t uid16; 35 uid16_t uid16;
35 uid_t uid32; 36 uid_t uid32;
@@ -62,6 +63,7 @@ smb_ioctl(struct inode *inode, struct file *filp,
62 default: 63 default:
63 break; 64 break;
64 } 65 }
66 unlock_kernel();
65 67
66 return result; 68 return result;
67} 69}
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index 03f456c1b7d4..05939a6f43e6 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -67,7 +67,7 @@ extern const struct address_space_operations smb_file_aops;
67extern const struct file_operations smb_file_operations; 67extern const struct file_operations smb_file_operations;
68extern const struct inode_operations smb_file_inode_operations; 68extern const struct inode_operations smb_file_inode_operations;
69/* ioctl.c */ 69/* ioctl.c */
70extern int smb_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); 70extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
71/* smbiod.c */ 71/* smbiod.c */
72extern void smbiod_wake_up(void); 72extern void smbiod_wake_up(void);
73extern int smbiod_register_server(struct smb_sb_info *server); 73extern int smbiod_register_server(struct smb_sb_info *server);
diff --git a/fs/splice.c b/fs/splice.c
index 9313b6124a2e..ac22b00d86c3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -193,8 +193,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
193 break; 193 break;
194 } 194 }
195 195
196 if (pipe->nrbufs < PIPE_BUFFERS) { 196 if (pipe->nrbufs < pipe->buffers) {
197 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 197 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
198 struct pipe_buffer *buf = pipe->bufs + newbuf; 198 struct pipe_buffer *buf = pipe->bufs + newbuf;
199 199
200 buf->page = spd->pages[page_nr]; 200 buf->page = spd->pages[page_nr];
@@ -214,7 +214,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
214 214
215 if (!--spd->nr_pages) 215 if (!--spd->nr_pages)
216 break; 216 break;
217 if (pipe->nrbufs < PIPE_BUFFERS) 217 if (pipe->nrbufs < pipe->buffers)
218 continue; 218 continue;
219 219
220 break; 220 break;
@@ -265,6 +265,36 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
265 page_cache_release(spd->pages[i]); 265 page_cache_release(spd->pages[i]);
266} 266}
267 267
268/*
269 * Check if we need to grow the arrays holding pages and partial page
270 * descriptions.
271 */
272int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
273{
274 if (pipe->buffers <= PIPE_DEF_BUFFERS)
275 return 0;
276
277 spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL);
278 spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL);
279
280 if (spd->pages && spd->partial)
281 return 0;
282
283 kfree(spd->pages);
284 kfree(spd->partial);
285 return -ENOMEM;
286}
287
288void splice_shrink_spd(struct pipe_inode_info *pipe,
289 struct splice_pipe_desc *spd)
290{
291 if (pipe->buffers <= PIPE_DEF_BUFFERS)
292 return;
293
294 kfree(spd->pages);
295 kfree(spd->partial);
296}
297
268static int 298static int
269__generic_file_splice_read(struct file *in, loff_t *ppos, 299__generic_file_splice_read(struct file *in, loff_t *ppos,
270 struct pipe_inode_info *pipe, size_t len, 300 struct pipe_inode_info *pipe, size_t len,
@@ -272,8 +302,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
272{ 302{
273 struct address_space *mapping = in->f_mapping; 303 struct address_space *mapping = in->f_mapping;
274 unsigned int loff, nr_pages, req_pages; 304 unsigned int loff, nr_pages, req_pages;
275 struct page *pages[PIPE_BUFFERS]; 305 struct page *pages[PIPE_DEF_BUFFERS];
276 struct partial_page partial[PIPE_BUFFERS]; 306 struct partial_page partial[PIPE_DEF_BUFFERS];
277 struct page *page; 307 struct page *page;
278 pgoff_t index, end_index; 308 pgoff_t index, end_index;
279 loff_t isize; 309 loff_t isize;
@@ -286,15 +316,18 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
286 .spd_release = spd_release_page, 316 .spd_release = spd_release_page,
287 }; 317 };
288 318
319 if (splice_grow_spd(pipe, &spd))
320 return -ENOMEM;
321
289 index = *ppos >> PAGE_CACHE_SHIFT; 322 index = *ppos >> PAGE_CACHE_SHIFT;
290 loff = *ppos & ~PAGE_CACHE_MASK; 323 loff = *ppos & ~PAGE_CACHE_MASK;
291 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 324 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
292 nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); 325 nr_pages = min(req_pages, pipe->buffers);
293 326
294 /* 327 /*
295 * Lookup the (hopefully) full range of pages we need. 328 * Lookup the (hopefully) full range of pages we need.
296 */ 329 */
297 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); 330 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
298 index += spd.nr_pages; 331 index += spd.nr_pages;
299 332
300 /* 333 /*
@@ -335,7 +368,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
335 unlock_page(page); 368 unlock_page(page);
336 } 369 }
337 370
338 pages[spd.nr_pages++] = page; 371 spd.pages[spd.nr_pages++] = page;
339 index++; 372 index++;
340 } 373 }
341 374
@@ -356,7 +389,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
356 * this_len is the max we'll use from this page 389 * this_len is the max we'll use from this page
357 */ 390 */
358 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); 391 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
359 page = pages[page_nr]; 392 page = spd.pages[page_nr];
360 393
361 if (PageReadahead(page)) 394 if (PageReadahead(page))
362 page_cache_async_readahead(mapping, &in->f_ra, in, 395 page_cache_async_readahead(mapping, &in->f_ra, in,
@@ -393,8 +426,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
393 error = -ENOMEM; 426 error = -ENOMEM;
394 break; 427 break;
395 } 428 }
396 page_cache_release(pages[page_nr]); 429 page_cache_release(spd.pages[page_nr]);
397 pages[page_nr] = page; 430 spd.pages[page_nr] = page;
398 } 431 }
399 /* 432 /*
400 * page was already under io and is now done, great 433 * page was already under io and is now done, great
@@ -451,8 +484,8 @@ fill_it:
451 len = this_len; 484 len = this_len;
452 } 485 }
453 486
454 partial[page_nr].offset = loff; 487 spd.partial[page_nr].offset = loff;
455 partial[page_nr].len = this_len; 488 spd.partial[page_nr].len = this_len;
456 len -= this_len; 489 len -= this_len;
457 loff = 0; 490 loff = 0;
458 spd.nr_pages++; 491 spd.nr_pages++;
@@ -464,12 +497,13 @@ fill_it:
464 * we got, 'nr_pages' is how many pages are in the map. 497 * we got, 'nr_pages' is how many pages are in the map.
465 */ 498 */
466 while (page_nr < nr_pages) 499 while (page_nr < nr_pages)
467 page_cache_release(pages[page_nr++]); 500 page_cache_release(spd.pages[page_nr++]);
468 in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 501 in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
469 502
470 if (spd.nr_pages) 503 if (spd.nr_pages)
471 return splice_to_pipe(pipe, &spd); 504 error = splice_to_pipe(pipe, &spd);
472 505
506 splice_shrink_spd(pipe, &spd);
473 return error; 507 return error;
474} 508}
475 509
@@ -560,9 +594,9 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
560 unsigned int nr_pages; 594 unsigned int nr_pages;
561 unsigned int nr_freed; 595 unsigned int nr_freed;
562 size_t offset; 596 size_t offset;
563 struct page *pages[PIPE_BUFFERS]; 597 struct page *pages[PIPE_DEF_BUFFERS];
564 struct partial_page partial[PIPE_BUFFERS]; 598 struct partial_page partial[PIPE_DEF_BUFFERS];
565 struct iovec vec[PIPE_BUFFERS]; 599 struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
566 pgoff_t index; 600 pgoff_t index;
567 ssize_t res; 601 ssize_t res;
568 size_t this_len; 602 size_t this_len;
@@ -576,11 +610,22 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
576 .spd_release = spd_release_page, 610 .spd_release = spd_release_page,
577 }; 611 };
578 612
613 if (splice_grow_spd(pipe, &spd))
614 return -ENOMEM;
615
616 res = -ENOMEM;
617 vec = __vec;
618 if (pipe->buffers > PIPE_DEF_BUFFERS) {
619 vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL);
620 if (!vec)
621 goto shrink_ret;
622 }
623
579 index = *ppos >> PAGE_CACHE_SHIFT; 624 index = *ppos >> PAGE_CACHE_SHIFT;
580 offset = *ppos & ~PAGE_CACHE_MASK; 625 offset = *ppos & ~PAGE_CACHE_MASK;
581 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 626 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
582 627
583 for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { 628 for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) {
584 struct page *page; 629 struct page *page;
585 630
586 page = alloc_page(GFP_USER); 631 page = alloc_page(GFP_USER);
@@ -591,7 +636,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
591 this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); 636 this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
592 vec[i].iov_base = (void __user *) page_address(page); 637 vec[i].iov_base = (void __user *) page_address(page);
593 vec[i].iov_len = this_len; 638 vec[i].iov_len = this_len;
594 pages[i] = page; 639 spd.pages[i] = page;
595 spd.nr_pages++; 640 spd.nr_pages++;
596 len -= this_len; 641 len -= this_len;
597 offset = 0; 642 offset = 0;
@@ -610,11 +655,11 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
610 nr_freed = 0; 655 nr_freed = 0;
611 for (i = 0; i < spd.nr_pages; i++) { 656 for (i = 0; i < spd.nr_pages; i++) {
612 this_len = min_t(size_t, vec[i].iov_len, res); 657 this_len = min_t(size_t, vec[i].iov_len, res);
613 partial[i].offset = 0; 658 spd.partial[i].offset = 0;
614 partial[i].len = this_len; 659 spd.partial[i].len = this_len;
615 if (!this_len) { 660 if (!this_len) {
616 __free_page(pages[i]); 661 __free_page(spd.pages[i]);
617 pages[i] = NULL; 662 spd.pages[i] = NULL;
618 nr_freed++; 663 nr_freed++;
619 } 664 }
620 res -= this_len; 665 res -= this_len;
@@ -625,13 +670,18 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
625 if (res > 0) 670 if (res > 0)
626 *ppos += res; 671 *ppos += res;
627 672
673shrink_ret:
674 if (vec != __vec)
675 kfree(vec);
676 splice_shrink_spd(pipe, &spd);
628 return res; 677 return res;
629 678
630err: 679err:
631 for (i = 0; i < spd.nr_pages; i++) 680 for (i = 0; i < spd.nr_pages; i++)
632 __free_page(pages[i]); 681 __free_page(spd.pages[i]);
633 682
634 return error; 683 res = error;
684 goto shrink_ret;
635} 685}
636EXPORT_SYMBOL(default_file_splice_read); 686EXPORT_SYMBOL(default_file_splice_read);
637 687
@@ -784,7 +834,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
784 if (!buf->len) { 834 if (!buf->len) {
785 buf->ops = NULL; 835 buf->ops = NULL;
786 ops->release(pipe, buf); 836 ops->release(pipe, buf);
787 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); 837 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
788 pipe->nrbufs--; 838 pipe->nrbufs--;
789 if (pipe->inode) 839 if (pipe->inode)
790 sd->need_wakeup = true; 840 sd->need_wakeup = true;
@@ -1211,7 +1261,7 @@ out_release:
1211 * If we did an incomplete transfer we must release 1261 * If we did an incomplete transfer we must release
1212 * the pipe buffers in question: 1262 * the pipe buffers in question:
1213 */ 1263 */
1214 for (i = 0; i < PIPE_BUFFERS; i++) { 1264 for (i = 0; i < pipe->buffers; i++) {
1215 struct pipe_buffer *buf = pipe->bufs + i; 1265 struct pipe_buffer *buf = pipe->bufs + i;
1216 1266
1217 if (buf->ops) { 1267 if (buf->ops) {
@@ -1371,7 +1421,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1371 */ 1421 */
1372static int get_iovec_page_array(const struct iovec __user *iov, 1422static int get_iovec_page_array(const struct iovec __user *iov,
1373 unsigned int nr_vecs, struct page **pages, 1423 unsigned int nr_vecs, struct page **pages,
1374 struct partial_page *partial, int aligned) 1424 struct partial_page *partial, int aligned,
1425 unsigned int pipe_buffers)
1375{ 1426{
1376 int buffers = 0, error = 0; 1427 int buffers = 0, error = 0;
1377 1428
@@ -1414,8 +1465,8 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1414 break; 1465 break;
1415 1466
1416 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1467 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1417 if (npages > PIPE_BUFFERS - buffers) 1468 if (npages > pipe_buffers - buffers)
1418 npages = PIPE_BUFFERS - buffers; 1469 npages = pipe_buffers - buffers;
1419 1470
1420 error = get_user_pages_fast((unsigned long)base, npages, 1471 error = get_user_pages_fast((unsigned long)base, npages,
1421 0, &pages[buffers]); 1472 0, &pages[buffers]);
@@ -1450,7 +1501,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1450 * or if we mapped the max number of pages that we have 1501 * or if we mapped the max number of pages that we have
1451 * room for. 1502 * room for.
1452 */ 1503 */
1453 if (error < npages || buffers == PIPE_BUFFERS) 1504 if (error < npages || buffers == pipe_buffers)
1454 break; 1505 break;
1455 1506
1456 nr_vecs--; 1507 nr_vecs--;
@@ -1593,8 +1644,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1593 unsigned long nr_segs, unsigned int flags) 1644 unsigned long nr_segs, unsigned int flags)
1594{ 1645{
1595 struct pipe_inode_info *pipe; 1646 struct pipe_inode_info *pipe;
1596 struct page *pages[PIPE_BUFFERS]; 1647 struct page *pages[PIPE_DEF_BUFFERS];
1597 struct partial_page partial[PIPE_BUFFERS]; 1648 struct partial_page partial[PIPE_DEF_BUFFERS];
1598 struct splice_pipe_desc spd = { 1649 struct splice_pipe_desc spd = {
1599 .pages = pages, 1650 .pages = pages,
1600 .partial = partial, 1651 .partial = partial,
@@ -1602,17 +1653,25 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1602 .ops = &user_page_pipe_buf_ops, 1653 .ops = &user_page_pipe_buf_ops,
1603 .spd_release = spd_release_page, 1654 .spd_release = spd_release_page,
1604 }; 1655 };
1656 long ret;
1605 1657
1606 pipe = pipe_info(file->f_path.dentry->d_inode); 1658 pipe = pipe_info(file->f_path.dentry->d_inode);
1607 if (!pipe) 1659 if (!pipe)
1608 return -EBADF; 1660 return -EBADF;
1609 1661
1610 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, 1662 if (splice_grow_spd(pipe, &spd))
1611 flags & SPLICE_F_GIFT); 1663 return -ENOMEM;
1664
1665 spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
1666 spd.partial, flags & SPLICE_F_GIFT,
1667 pipe->buffers);
1612 if (spd.nr_pages <= 0) 1668 if (spd.nr_pages <= 0)
1613 return spd.nr_pages; 1669 ret = spd.nr_pages;
1670 else
1671 ret = splice_to_pipe(pipe, &spd);
1614 1672
1615 return splice_to_pipe(pipe, &spd); 1673 splice_shrink_spd(pipe, &spd);
1674 return ret;
1616} 1675}
1617 1676
1618/* 1677/*
@@ -1738,13 +1797,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1738 * Check ->nrbufs without the inode lock first. This function 1797 * Check ->nrbufs without the inode lock first. This function
1739 * is speculative anyways, so missing one is ok. 1798 * is speculative anyways, so missing one is ok.
1740 */ 1799 */
1741 if (pipe->nrbufs < PIPE_BUFFERS) 1800 if (pipe->nrbufs < pipe->buffers)
1742 return 0; 1801 return 0;
1743 1802
1744 ret = 0; 1803 ret = 0;
1745 pipe_lock(pipe); 1804 pipe_lock(pipe);
1746 1805
1747 while (pipe->nrbufs >= PIPE_BUFFERS) { 1806 while (pipe->nrbufs >= pipe->buffers) {
1748 if (!pipe->readers) { 1807 if (!pipe->readers) {
1749 send_sig(SIGPIPE, current, 0); 1808 send_sig(SIGPIPE, current, 0);
1750 ret = -EPIPE; 1809 ret = -EPIPE;
@@ -1810,7 +1869,7 @@ retry:
1810 * Cannot make any progress, because either the input 1869 * Cannot make any progress, because either the input
1811 * pipe is empty or the output pipe is full. 1870 * pipe is empty or the output pipe is full.
1812 */ 1871 */
1813 if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) { 1872 if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) {
1814 /* Already processed some buffers, break */ 1873 /* Already processed some buffers, break */
1815 if (ret) 1874 if (ret)
1816 break; 1875 break;
@@ -1831,7 +1890,7 @@ retry:
1831 } 1890 }
1832 1891
1833 ibuf = ipipe->bufs + ipipe->curbuf; 1892 ibuf = ipipe->bufs + ipipe->curbuf;
1834 nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS; 1893 nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
1835 obuf = opipe->bufs + nbuf; 1894 obuf = opipe->bufs + nbuf;
1836 1895
1837 if (len >= ibuf->len) { 1896 if (len >= ibuf->len) {
@@ -1841,7 +1900,7 @@ retry:
1841 *obuf = *ibuf; 1900 *obuf = *ibuf;
1842 ibuf->ops = NULL; 1901 ibuf->ops = NULL;
1843 opipe->nrbufs++; 1902 opipe->nrbufs++;
1844 ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS; 1903 ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1);
1845 ipipe->nrbufs--; 1904 ipipe->nrbufs--;
1846 input_wakeup = true; 1905 input_wakeup = true;
1847 } else { 1906 } else {
@@ -1914,11 +1973,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1914 * If we have iterated all input buffers or ran out of 1973 * If we have iterated all input buffers or ran out of
1915 * output room, break. 1974 * output room, break.
1916 */ 1975 */
1917 if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) 1976 if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers)
1918 break; 1977 break;
1919 1978
1920 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); 1979 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1));
1921 nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); 1980 nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
1922 1981
1923 /* 1982 /*
1924 * Get a reference to this pipe buffer, 1983 * Get a reference to this pipe buffer,
diff --git a/fs/statfs.c b/fs/statfs.c
new file mode 100644
index 000000000000..4ef021f3b612
--- /dev/null
+++ b/fs/statfs.c
@@ -0,0 +1,196 @@
1#include <linux/syscalls.h>
2#include <linux/module.h>
3#include <linux/fs.h>
4#include <linux/file.h>
5#include <linux/namei.h>
6#include <linux/statfs.h>
7#include <linux/security.h>
8#include <linux/uaccess.h>
9
10int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
11{
12 int retval = -ENODEV;
13
14 if (dentry) {
15 retval = -ENOSYS;
16 if (dentry->d_sb->s_op->statfs) {
17 memset(buf, 0, sizeof(*buf));
18 retval = security_sb_statfs(dentry);
19 if (retval)
20 return retval;
21 retval = dentry->d_sb->s_op->statfs(dentry, buf);
22 if (retval == 0 && buf->f_frsize == 0)
23 buf->f_frsize = buf->f_bsize;
24 }
25 }
26 return retval;
27}
28
29EXPORT_SYMBOL(vfs_statfs);
30
31static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
32{
33 struct kstatfs st;
34 int retval;
35
36 retval = vfs_statfs(dentry, &st);
37 if (retval)
38 return retval;
39
40 if (sizeof(*buf) == sizeof(st))
41 memcpy(buf, &st, sizeof(st));
42 else {
43 if (sizeof buf->f_blocks == 4) {
44 if ((st.f_blocks | st.f_bfree | st.f_bavail |
45 st.f_bsize | st.f_frsize) &
46 0xffffffff00000000ULL)
47 return -EOVERFLOW;
48 /*
49 * f_files and f_ffree may be -1; it's okay to stuff
50 * that into 32 bits
51 */
52 if (st.f_files != -1 &&
53 (st.f_files & 0xffffffff00000000ULL))
54 return -EOVERFLOW;
55 if (st.f_ffree != -1 &&
56 (st.f_ffree & 0xffffffff00000000ULL))
57 return -EOVERFLOW;
58 }
59
60 buf->f_type = st.f_type;
61 buf->f_bsize = st.f_bsize;
62 buf->f_blocks = st.f_blocks;
63 buf->f_bfree = st.f_bfree;
64 buf->f_bavail = st.f_bavail;
65 buf->f_files = st.f_files;
66 buf->f_ffree = st.f_ffree;
67 buf->f_fsid = st.f_fsid;
68 buf->f_namelen = st.f_namelen;
69 buf->f_frsize = st.f_frsize;
70 memset(buf->f_spare, 0, sizeof(buf->f_spare));
71 }
72 return 0;
73}
74
75static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
76{
77 struct kstatfs st;
78 int retval;
79
80 retval = vfs_statfs(dentry, &st);
81 if (retval)
82 return retval;
83
84 if (sizeof(*buf) == sizeof(st))
85 memcpy(buf, &st, sizeof(st));
86 else {
87 buf->f_type = st.f_type;
88 buf->f_bsize = st.f_bsize;
89 buf->f_blocks = st.f_blocks;
90 buf->f_bfree = st.f_bfree;
91 buf->f_bavail = st.f_bavail;
92 buf->f_files = st.f_files;
93 buf->f_ffree = st.f_ffree;
94 buf->f_fsid = st.f_fsid;
95 buf->f_namelen = st.f_namelen;
96 buf->f_frsize = st.f_frsize;
97 memset(buf->f_spare, 0, sizeof(buf->f_spare));
98 }
99 return 0;
100}
101
102SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
103{
104 struct path path;
105 int error;
106
107 error = user_path(pathname, &path);
108 if (!error) {
109 struct statfs tmp;
110 error = vfs_statfs_native(path.dentry, &tmp);
111 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
112 error = -EFAULT;
113 path_put(&path);
114 }
115 return error;
116}
117
118SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
119{
120 struct path path;
121 long error;
122
123 if (sz != sizeof(*buf))
124 return -EINVAL;
125 error = user_path(pathname, &path);
126 if (!error) {
127 struct statfs64 tmp;
128 error = vfs_statfs64(path.dentry, &tmp);
129 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
130 error = -EFAULT;
131 path_put(&path);
132 }
133 return error;
134}
135
136SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
137{
138 struct file *file;
139 struct statfs tmp;
140 int error;
141
142 error = -EBADF;
143 file = fget(fd);
144 if (!file)
145 goto out;
146 error = vfs_statfs_native(file->f_path.dentry, &tmp);
147 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
148 error = -EFAULT;
149 fput(file);
150out:
151 return error;
152}
153
154SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
155{
156 struct file *file;
157 struct statfs64 tmp;
158 int error;
159
160 if (sz != sizeof(*buf))
161 return -EINVAL;
162
163 error = -EBADF;
164 file = fget(fd);
165 if (!file)
166 goto out;
167 error = vfs_statfs64(file->f_path.dentry, &tmp);
168 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
169 error = -EFAULT;
170 fput(file);
171out:
172 return error;
173}
174
175SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
176{
177 struct super_block *s;
178 struct ustat tmp;
179 struct kstatfs sbuf;
180 int err;
181
182 s = user_get_super(new_decode_dev(dev));
183 if (!s)
184 return -EINVAL;
185
186 err = vfs_statfs(s->s_root, &sbuf);
187 drop_super(s);
188 if (err)
189 return err;
190
191 memset(&tmp,0,sizeof(struct ustat));
192 tmp.f_tfree = sbuf.f_bfree;
193 tmp.f_tinode = sbuf.f_ffree;
194
195 return copy_to_user(ubuf, &tmp, sizeof(struct ustat)) ? -EFAULT : 0;
196}
diff --git a/fs/super.c b/fs/super.c
index 1527e6a0ee35..69688b15f1fa 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -22,23 +22,15 @@
22 22
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/init.h>
26#include <linux/smp_lock.h>
27#include <linux/acct.h> 25#include <linux/acct.h>
28#include <linux/blkdev.h> 26#include <linux/blkdev.h>
29#include <linux/quotaops.h> 27#include <linux/quotaops.h>
30#include <linux/namei.h>
31#include <linux/mount.h> 28#include <linux/mount.h>
32#include <linux/security.h> 29#include <linux/security.h>
33#include <linux/syscalls.h>
34#include <linux/vfs.h>
35#include <linux/writeback.h> /* for the emergency remount stuff */ 30#include <linux/writeback.h> /* for the emergency remount stuff */
36#include <linux/idr.h> 31#include <linux/idr.h>
37#include <linux/kobject.h>
38#include <linux/mutex.h> 32#include <linux/mutex.h>
39#include <linux/file.h>
40#include <linux/backing-dev.h> 33#include <linux/backing-dev.h>
41#include <asm/uaccess.h>
42#include "internal.h" 34#include "internal.h"
43 35
44 36
@@ -93,9 +85,10 @@ static struct super_block *alloc_super(struct file_system_type *type)
93 * subclass. 85 * subclass.
94 */ 86 */
95 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); 87 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
96 s->s_count = S_BIAS; 88 s->s_count = 1;
97 atomic_set(&s->s_active, 1); 89 atomic_set(&s->s_active, 1);
98 mutex_init(&s->s_vfs_rename_mutex); 90 mutex_init(&s->s_vfs_rename_mutex);
91 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
99 mutex_init(&s->s_dquot.dqio_mutex); 92 mutex_init(&s->s_dquot.dqio_mutex);
100 mutex_init(&s->s_dquot.dqonoff_mutex); 93 mutex_init(&s->s_dquot.dqonoff_mutex);
101 init_rwsem(&s->s_dquot.dqptr_sem); 94 init_rwsem(&s->s_dquot.dqptr_sem);
@@ -127,39 +120,14 @@ static inline void destroy_super(struct super_block *s)
127/* Superblock refcounting */ 120/* Superblock refcounting */
128 121
129/* 122/*
130 * Drop a superblock's refcount. Returns non-zero if the superblock was 123 * Drop a superblock's refcount. The caller must hold sb_lock.
131 * destroyed. The caller must hold sb_lock.
132 */ 124 */
133static int __put_super(struct super_block *sb) 125void __put_super(struct super_block *sb)
134{ 126{
135 int ret = 0;
136
137 if (!--sb->s_count) { 127 if (!--sb->s_count) {
128 list_del_init(&sb->s_list);
138 destroy_super(sb); 129 destroy_super(sb);
139 ret = 1;
140 } 130 }
141 return ret;
142}
143
144/*
145 * Drop a superblock's refcount.
146 * Returns non-zero if the superblock is about to be destroyed and
147 * at least is already removed from super_blocks list, so if we are
148 * making a loop through super blocks then we need to restart.
149 * The caller must hold sb_lock.
150 */
151int __put_super_and_need_restart(struct super_block *sb)
152{
153 /* check for race with generic_shutdown_super() */
154 if (list_empty(&sb->s_list)) {
155 /* super block is removed, need to restart... */
156 __put_super(sb);
157 return 1;
158 }
159 /* can't be the last, since s_list is still in use */
160 sb->s_count--;
161 BUG_ON(sb->s_count == 0);
162 return 0;
163} 131}
164 132
165/** 133/**
@@ -178,57 +146,48 @@ void put_super(struct super_block *sb)
178 146
179 147
180/** 148/**
181 * deactivate_super - drop an active reference to superblock 149 * deactivate_locked_super - drop an active reference to superblock
182 * @s: superblock to deactivate 150 * @s: superblock to deactivate
183 * 151 *
184 * Drops an active reference to superblock, acquiring a temprory one if 152 * Drops an active reference to superblock, converting it into a temprory
185 * there is no active references left. In that case we lock superblock, 153 * one if there is no other active references left. In that case we
186 * tell fs driver to shut it down and drop the temporary reference we 154 * tell fs driver to shut it down and drop the temporary reference we
187 * had just acquired. 155 * had just acquired.
156 *
157 * Caller holds exclusive lock on superblock; that lock is released.
188 */ 158 */
189void deactivate_super(struct super_block *s) 159void deactivate_locked_super(struct super_block *s)
190{ 160{
191 struct file_system_type *fs = s->s_type; 161 struct file_system_type *fs = s->s_type;
192 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { 162 if (atomic_dec_and_test(&s->s_active)) {
193 s->s_count -= S_BIAS-1;
194 spin_unlock(&sb_lock);
195 vfs_dq_off(s, 0); 163 vfs_dq_off(s, 0);
196 down_write(&s->s_umount);
197 fs->kill_sb(s); 164 fs->kill_sb(s);
198 put_filesystem(fs); 165 put_filesystem(fs);
199 put_super(s); 166 put_super(s);
167 } else {
168 up_write(&s->s_umount);
200 } 169 }
201} 170}
202 171
203EXPORT_SYMBOL(deactivate_super); 172EXPORT_SYMBOL(deactivate_locked_super);
204 173
205/** 174/**
206 * deactivate_locked_super - drop an active reference to superblock 175 * deactivate_super - drop an active reference to superblock
207 * @s: superblock to deactivate 176 * @s: superblock to deactivate
208 * 177 *
209 * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that 178 * Variant of deactivate_locked_super(), except that superblock is *not*
210 * it does not unlock it until it's all over. As the result, it's safe to 179 * locked by caller. If we are going to drop the final active reference,
211 * use to dispose of new superblock on ->get_sb() failure exits - nobody 180 * lock will be acquired prior to that.
212 * will see the sucker until it's all over. Equivalent using up_write +
213 * deactivate_super is safe for that purpose only if superblock is either
214 * safe to use or has NULL ->s_root when we unlock.
215 */ 181 */
216void deactivate_locked_super(struct super_block *s) 182void deactivate_super(struct super_block *s)
217{ 183{
218 struct file_system_type *fs = s->s_type; 184 if (!atomic_add_unless(&s->s_active, -1, 1)) {
219 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { 185 down_write(&s->s_umount);
220 s->s_count -= S_BIAS-1; 186 deactivate_locked_super(s);
221 spin_unlock(&sb_lock);
222 vfs_dq_off(s, 0);
223 fs->kill_sb(s);
224 put_filesystem(fs);
225 put_super(s);
226 } else {
227 up_write(&s->s_umount);
228 } 187 }
229} 188}
230 189
231EXPORT_SYMBOL(deactivate_locked_super); 190EXPORT_SYMBOL(deactivate_super);
232 191
233/** 192/**
234 * grab_super - acquire an active reference 193 * grab_super - acquire an active reference
@@ -243,22 +202,17 @@ EXPORT_SYMBOL(deactivate_locked_super);
243 */ 202 */
244static int grab_super(struct super_block *s) __releases(sb_lock) 203static int grab_super(struct super_block *s) __releases(sb_lock)
245{ 204{
205 if (atomic_inc_not_zero(&s->s_active)) {
206 spin_unlock(&sb_lock);
207 return 1;
208 }
209 /* it's going away */
246 s->s_count++; 210 s->s_count++;
247 spin_unlock(&sb_lock); 211 spin_unlock(&sb_lock);
212 /* wait for it to die */
248 down_write(&s->s_umount); 213 down_write(&s->s_umount);
249 if (s->s_root) {
250 spin_lock(&sb_lock);
251 if (s->s_count > S_BIAS) {
252 atomic_inc(&s->s_active);
253 s->s_count--;
254 spin_unlock(&sb_lock);
255 return 1;
256 }
257 spin_unlock(&sb_lock);
258 }
259 up_write(&s->s_umount); 214 up_write(&s->s_umount);
260 put_super(s); 215 put_super(s);
261 yield();
262 return 0; 216 return 0;
263} 217}
264 218
@@ -321,8 +275,7 @@ void generic_shutdown_super(struct super_block *sb)
321 } 275 }
322 spin_lock(&sb_lock); 276 spin_lock(&sb_lock);
323 /* should be initialized for __put_super_and_need_restart() */ 277 /* should be initialized for __put_super_and_need_restart() */
324 list_del_init(&sb->s_list); 278 list_del_init(&sb->s_instances);
325 list_del(&sb->s_instances);
326 spin_unlock(&sb_lock); 279 spin_unlock(&sb_lock);
327 up_write(&sb->s_umount); 280 up_write(&sb->s_umount);
328} 281}
@@ -357,6 +310,7 @@ retry:
357 up_write(&s->s_umount); 310 up_write(&s->s_umount);
358 destroy_super(s); 311 destroy_super(s);
359 } 312 }
313 down_write(&old->s_umount);
360 return old; 314 return old;
361 } 315 }
362 } 316 }
@@ -408,11 +362,12 @@ EXPORT_SYMBOL(drop_super);
408 */ 362 */
409void sync_supers(void) 363void sync_supers(void)
410{ 364{
411 struct super_block *sb; 365 struct super_block *sb, *n;
412 366
413 spin_lock(&sb_lock); 367 spin_lock(&sb_lock);
414restart: 368 list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
415 list_for_each_entry(sb, &super_blocks, s_list) { 369 if (list_empty(&sb->s_instances))
370 continue;
416 if (sb->s_op->write_super && sb->s_dirt) { 371 if (sb->s_op->write_super && sb->s_dirt) {
417 sb->s_count++; 372 sb->s_count++;
418 spin_unlock(&sb_lock); 373 spin_unlock(&sb_lock);
@@ -423,14 +378,43 @@ restart:
423 up_read(&sb->s_umount); 378 up_read(&sb->s_umount);
424 379
425 spin_lock(&sb_lock); 380 spin_lock(&sb_lock);
426 if (__put_super_and_need_restart(sb)) 381 __put_super(sb);
427 goto restart;
428 } 382 }
429 } 383 }
430 spin_unlock(&sb_lock); 384 spin_unlock(&sb_lock);
431} 385}
432 386
433/** 387/**
388 * iterate_supers - call function for all active superblocks
389 * @f: function to call
390 * @arg: argument to pass to it
391 *
392 * Scans the superblock list and calls given function, passing it
393 * locked superblock and given argument.
394 */
395void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
396{
397 struct super_block *sb, *n;
398
399 spin_lock(&sb_lock);
400 list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
401 if (list_empty(&sb->s_instances))
402 continue;
403 sb->s_count++;
404 spin_unlock(&sb_lock);
405
406 down_read(&sb->s_umount);
407 if (sb->s_root)
408 f(sb, arg);
409 up_read(&sb->s_umount);
410
411 spin_lock(&sb_lock);
412 __put_super(sb);
413 }
414 spin_unlock(&sb_lock);
415}
416
417/**
434 * get_super - get the superblock of a device 418 * get_super - get the superblock of a device
435 * @bdev: device to get the superblock for 419 * @bdev: device to get the superblock for
436 * 420 *
@@ -438,7 +422,7 @@ restart:
438 * mounted on the device given. %NULL is returned if no match is found. 422 * mounted on the device given. %NULL is returned if no match is found.
439 */ 423 */
440 424
441struct super_block * get_super(struct block_device *bdev) 425struct super_block *get_super(struct block_device *bdev)
442{ 426{
443 struct super_block *sb; 427 struct super_block *sb;
444 428
@@ -448,17 +432,20 @@ struct super_block * get_super(struct block_device *bdev)
448 spin_lock(&sb_lock); 432 spin_lock(&sb_lock);
449rescan: 433rescan:
450 list_for_each_entry(sb, &super_blocks, s_list) { 434 list_for_each_entry(sb, &super_blocks, s_list) {
435 if (list_empty(&sb->s_instances))
436 continue;
451 if (sb->s_bdev == bdev) { 437 if (sb->s_bdev == bdev) {
452 sb->s_count++; 438 sb->s_count++;
453 spin_unlock(&sb_lock); 439 spin_unlock(&sb_lock);
454 down_read(&sb->s_umount); 440 down_read(&sb->s_umount);
441 /* still alive? */
455 if (sb->s_root) 442 if (sb->s_root)
456 return sb; 443 return sb;
457 up_read(&sb->s_umount); 444 up_read(&sb->s_umount);
458 /* restart only when sb is no longer on the list */ 445 /* nope, got unmounted */
459 spin_lock(&sb_lock); 446 spin_lock(&sb_lock);
460 if (__put_super_and_need_restart(sb)) 447 __put_super(sb);
461 goto rescan; 448 goto rescan;
462 } 449 }
463 } 450 }
464 spin_unlock(&sb_lock); 451 spin_unlock(&sb_lock);
@@ -473,7 +460,7 @@ EXPORT_SYMBOL(get_super);
473 * 460 *
474 * Scans the superblock list and finds the superblock of the file system 461 * Scans the superblock list and finds the superblock of the file system
475 * mounted on the device given. Returns the superblock with an active 462 * mounted on the device given. Returns the superblock with an active
476 * reference and s_umount held exclusively or %NULL if none was found. 463 * reference or %NULL if none was found.
477 */ 464 */
478struct super_block *get_active_super(struct block_device *bdev) 465struct super_block *get_active_super(struct block_device *bdev)
479{ 466{
@@ -482,81 +469,49 @@ struct super_block *get_active_super(struct block_device *bdev)
482 if (!bdev) 469 if (!bdev)
483 return NULL; 470 return NULL;
484 471
472restart:
485 spin_lock(&sb_lock); 473 spin_lock(&sb_lock);
486 list_for_each_entry(sb, &super_blocks, s_list) { 474 list_for_each_entry(sb, &super_blocks, s_list) {
487 if (sb->s_bdev != bdev) 475 if (list_empty(&sb->s_instances))
488 continue; 476 continue;
489 477 if (sb->s_bdev == bdev) {
490 sb->s_count++; 478 if (grab_super(sb)) /* drops sb_lock */
491 spin_unlock(&sb_lock);
492 down_write(&sb->s_umount);
493 if (sb->s_root) {
494 spin_lock(&sb_lock);
495 if (sb->s_count > S_BIAS) {
496 atomic_inc(&sb->s_active);
497 sb->s_count--;
498 spin_unlock(&sb_lock);
499 return sb; 479 return sb;
500 } 480 else
501 spin_unlock(&sb_lock); 481 goto restart;
502 } 482 }
503 up_write(&sb->s_umount);
504 put_super(sb);
505 yield();
506 spin_lock(&sb_lock);
507 } 483 }
508 spin_unlock(&sb_lock); 484 spin_unlock(&sb_lock);
509 return NULL; 485 return NULL;
510} 486}
511 487
512struct super_block * user_get_super(dev_t dev) 488struct super_block *user_get_super(dev_t dev)
513{ 489{
514 struct super_block *sb; 490 struct super_block *sb;
515 491
516 spin_lock(&sb_lock); 492 spin_lock(&sb_lock);
517rescan: 493rescan:
518 list_for_each_entry(sb, &super_blocks, s_list) { 494 list_for_each_entry(sb, &super_blocks, s_list) {
495 if (list_empty(&sb->s_instances))
496 continue;
519 if (sb->s_dev == dev) { 497 if (sb->s_dev == dev) {
520 sb->s_count++; 498 sb->s_count++;
521 spin_unlock(&sb_lock); 499 spin_unlock(&sb_lock);
522 down_read(&sb->s_umount); 500 down_read(&sb->s_umount);
501 /* still alive? */
523 if (sb->s_root) 502 if (sb->s_root)
524 return sb; 503 return sb;
525 up_read(&sb->s_umount); 504 up_read(&sb->s_umount);
526 /* restart only when sb is no longer on the list */ 505 /* nope, got unmounted */
527 spin_lock(&sb_lock); 506 spin_lock(&sb_lock);
528 if (__put_super_and_need_restart(sb)) 507 __put_super(sb);
529 goto rescan; 508 goto rescan;
530 } 509 }
531 } 510 }
532 spin_unlock(&sb_lock); 511 spin_unlock(&sb_lock);
533 return NULL; 512 return NULL;
534} 513}
535 514
536SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
537{
538 struct super_block *s;
539 struct ustat tmp;
540 struct kstatfs sbuf;
541 int err = -EINVAL;
542
543 s = user_get_super(new_decode_dev(dev));
544 if (s == NULL)
545 goto out;
546 err = vfs_statfs(s->s_root, &sbuf);
547 drop_super(s);
548 if (err)
549 goto out;
550
551 memset(&tmp,0,sizeof(struct ustat));
552 tmp.f_tfree = sbuf.f_bfree;
553 tmp.f_tinode = sbuf.f_ffree;
554
555 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
556out:
557 return err;
558}
559
560/** 515/**
561 * do_remount_sb - asks filesystem to change mount options. 516 * do_remount_sb - asks filesystem to change mount options.
562 * @sb: superblock in question 517 * @sb: superblock in question
@@ -622,24 +577,24 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
622 577
623static void do_emergency_remount(struct work_struct *work) 578static void do_emergency_remount(struct work_struct *work)
624{ 579{
625 struct super_block *sb; 580 struct super_block *sb, *n;
626 581
627 spin_lock(&sb_lock); 582 spin_lock(&sb_lock);
628 list_for_each_entry(sb, &super_blocks, s_list) { 583 list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
584 if (list_empty(&sb->s_instances))
585 continue;
629 sb->s_count++; 586 sb->s_count++;
630 spin_unlock(&sb_lock); 587 spin_unlock(&sb_lock);
631 down_write(&sb->s_umount); 588 down_write(&sb->s_umount);
632 if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { 589 if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) {
633 /* 590 /*
634 * ->remount_fs needs lock_kernel().
635 *
636 * What lock protects sb->s_flags?? 591 * What lock protects sb->s_flags??
637 */ 592 */
638 do_remount_sb(sb, MS_RDONLY, NULL, 1); 593 do_remount_sb(sb, MS_RDONLY, NULL, 1);
639 } 594 }
640 up_write(&sb->s_umount); 595 up_write(&sb->s_umount);
641 put_super(sb);
642 spin_lock(&sb_lock); 596 spin_lock(&sb_lock);
597 __put_super(sb);
643 } 598 }
644 spin_unlock(&sb_lock); 599 spin_unlock(&sb_lock);
645 kfree(work); 600 kfree(work);
@@ -990,6 +945,96 @@ out:
990 945
991EXPORT_SYMBOL_GPL(vfs_kern_mount); 946EXPORT_SYMBOL_GPL(vfs_kern_mount);
992 947
948/**
949 * freeze_super -- lock the filesystem and force it into a consistent state
950 * @super: the super to lock
951 *
952 * Syncs the super to make sure the filesystem is consistent and calls the fs's
953 * freeze_fs. Subsequent calls to this without first thawing the fs will return
954 * -EBUSY.
955 */
956int freeze_super(struct super_block *sb)
957{
958 int ret;
959
960 atomic_inc(&sb->s_active);
961 down_write(&sb->s_umount);
962 if (sb->s_frozen) {
963 deactivate_locked_super(sb);
964 return -EBUSY;
965 }
966
967 if (sb->s_flags & MS_RDONLY) {
968 sb->s_frozen = SB_FREEZE_TRANS;
969 smp_wmb();
970 up_write(&sb->s_umount);
971 return 0;
972 }
973
974 sb->s_frozen = SB_FREEZE_WRITE;
975 smp_wmb();
976
977 sync_filesystem(sb);
978
979 sb->s_frozen = SB_FREEZE_TRANS;
980 smp_wmb();
981
982 sync_blockdev(sb->s_bdev);
983 if (sb->s_op->freeze_fs) {
984 ret = sb->s_op->freeze_fs(sb);
985 if (ret) {
986 printk(KERN_ERR
987 "VFS:Filesystem freeze failed\n");
988 sb->s_frozen = SB_UNFROZEN;
989 deactivate_locked_super(sb);
990 return ret;
991 }
992 }
993 up_write(&sb->s_umount);
994 return 0;
995}
996EXPORT_SYMBOL(freeze_super);
997
998/**
999 * thaw_super -- unlock filesystem
1000 * @sb: the super to thaw
1001 *
1002 * Unlocks the filesystem and marks it writeable again after freeze_super().
1003 */
1004int thaw_super(struct super_block *sb)
1005{
1006 int error;
1007
1008 down_write(&sb->s_umount);
1009 if (sb->s_frozen == SB_UNFROZEN) {
1010 up_write(&sb->s_umount);
1011 return -EINVAL;
1012 }
1013
1014 if (sb->s_flags & MS_RDONLY)
1015 goto out;
1016
1017 if (sb->s_op->unfreeze_fs) {
1018 error = sb->s_op->unfreeze_fs(sb);
1019 if (error) {
1020 printk(KERN_ERR
1021 "VFS:Filesystem thaw failed\n");
1022 sb->s_frozen = SB_FREEZE_TRANS;
1023 up_write(&sb->s_umount);
1024 return error;
1025 }
1026 }
1027
1028out:
1029 sb->s_frozen = SB_UNFROZEN;
1030 smp_wmb();
1031 wake_up(&sb->s_wait_unfrozen);
1032 deactivate_locked_super(sb);
1033
1034 return 0;
1035}
1036EXPORT_SYMBOL(thaw_super);
1037
993static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) 1038static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
994{ 1039{
995 int err; 1040 int err;
diff --git a/fs/sync.c b/fs/sync.c
index 92b228176f7c..e8cbd415e50a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
42 if (wait) 42 if (wait)
43 sync_inodes_sb(sb); 43 sync_inodes_sb(sb);
44 else 44 else
45 writeback_inodes_sb(sb); 45 writeback_inodes_sb_locked(sb);
46 46
47 if (sb->s_op->sync_fs) 47 if (sb->s_op->sync_fs)
48 sb->s_op->sync_fs(sb, wait); 48 sb->s_op->sync_fs(sb, wait);
@@ -77,50 +77,18 @@ int sync_filesystem(struct super_block *sb)
77} 77}
78EXPORT_SYMBOL_GPL(sync_filesystem); 78EXPORT_SYMBOL_GPL(sync_filesystem);
79 79
80static void sync_one_sb(struct super_block *sb, void *arg)
81{
82 if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi)
83 __sync_filesystem(sb, *(int *)arg);
84}
80/* 85/*
81 * Sync all the data for all the filesystems (called by sys_sync() and 86 * Sync all the data for all the filesystems (called by sys_sync() and
82 * emergency sync) 87 * emergency sync)
83 *
84 * This operation is careful to avoid the livelock which could easily happen
85 * if two or more filesystems are being continuously dirtied. s_need_sync
86 * is used only here. We set it against all filesystems and then clear it as
87 * we sync them. So redirtied filesystems are skipped.
88 *
89 * But if process A is currently running sync_filesystems and then process B
90 * calls sync_filesystems as well, process B will set all the s_need_sync
91 * flags again, which will cause process A to resync everything. Fix that with
92 * a local mutex.
93 */ 88 */
94static void sync_filesystems(int wait) 89static void sync_filesystems(int wait)
95{ 90{
96 struct super_block *sb; 91 iterate_supers(sync_one_sb, &wait);
97 static DEFINE_MUTEX(mutex);
98
99 mutex_lock(&mutex); /* Could be down_interruptible */
100 spin_lock(&sb_lock);
101 list_for_each_entry(sb, &super_blocks, s_list)
102 sb->s_need_sync = 1;
103
104restart:
105 list_for_each_entry(sb, &super_blocks, s_list) {
106 if (!sb->s_need_sync)
107 continue;
108 sb->s_need_sync = 0;
109 sb->s_count++;
110 spin_unlock(&sb_lock);
111
112 down_read(&sb->s_umount);
113 if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi)
114 __sync_filesystem(sb, wait);
115 up_read(&sb->s_umount);
116
117 /* restart only when sb is no longer on the list */
118 spin_lock(&sb_lock);
119 if (__put_super_and_need_restart(sb))
120 goto restart;
121 }
122 spin_unlock(&sb_lock);
123 mutex_unlock(&mutex);
124} 92}
125 93
126/* 94/*
@@ -190,7 +158,6 @@ EXPORT_SYMBOL(file_fsync);
190/** 158/**
191 * vfs_fsync_range - helper to sync a range of data & metadata to disk 159 * vfs_fsync_range - helper to sync a range of data & metadata to disk
192 * @file: file to sync 160 * @file: file to sync
193 * @dentry: dentry of @file
194 * @start: offset in bytes of the beginning of data range to sync 161 * @start: offset in bytes of the beginning of data range to sync
195 * @end: offset in bytes of the end of data range (inclusive) 162 * @end: offset in bytes of the end of data range (inclusive)
196 * @datasync: perform only datasync 163 * @datasync: perform only datasync
@@ -198,32 +165,13 @@ EXPORT_SYMBOL(file_fsync);
198 * Write back data in range @start..@end and metadata for @file to disk. If 165 * Write back data in range @start..@end and metadata for @file to disk. If
199 * @datasync is set only metadata needed to access modified file data is 166 * @datasync is set only metadata needed to access modified file data is
200 * written. 167 * written.
201 *
202 * In case this function is called from nfsd @file may be %NULL and
203 * only @dentry is set. This can only happen when the filesystem
204 * implements the export_operations API.
205 */ 168 */
206int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start, 169int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
207 loff_t end, int datasync)
208{ 170{
209 const struct file_operations *fop; 171 struct address_space *mapping = file->f_mapping;
210 struct address_space *mapping;
211 int err, ret; 172 int err, ret;
212 173
213 /* 174 if (!file->f_op || !file->f_op->fsync) {
214 * Get mapping and operations from the file in case we have
215 * as file, or get the default values for them in case we
216 * don't have a struct file available. Damn nfsd..
217 */
218 if (file) {
219 mapping = file->f_mapping;
220 fop = file->f_op;
221 } else {
222 mapping = dentry->d_inode->i_mapping;
223 fop = dentry->d_inode->i_fop;
224 }
225
226 if (!fop || !fop->fsync) {
227 ret = -EINVAL; 175 ret = -EINVAL;
228 goto out; 176 goto out;
229 } 177 }
@@ -235,7 +183,7 @@ int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
235 * livelocks in fsync_buffers_list(). 183 * livelocks in fsync_buffers_list().
236 */ 184 */
237 mutex_lock(&mapping->host->i_mutex); 185 mutex_lock(&mapping->host->i_mutex);
238 err = fop->fsync(file, dentry, datasync); 186 err = file->f_op->fsync(file, file->f_path.dentry, datasync);
239 if (!ret) 187 if (!ret)
240 ret = err; 188 ret = err;
241 mutex_unlock(&mapping->host->i_mutex); 189 mutex_unlock(&mapping->host->i_mutex);
@@ -248,19 +196,14 @@ EXPORT_SYMBOL(vfs_fsync_range);
248/** 196/**
249 * vfs_fsync - perform a fsync or fdatasync on a file 197 * vfs_fsync - perform a fsync or fdatasync on a file
250 * @file: file to sync 198 * @file: file to sync
251 * @dentry: dentry of @file
252 * @datasync: only perform a fdatasync operation 199 * @datasync: only perform a fdatasync operation
253 * 200 *
254 * Write back data and metadata for @file to disk. If @datasync is 201 * Write back data and metadata for @file to disk. If @datasync is
255 * set only metadata needed to access modified file data is written. 202 * set only metadata needed to access modified file data is written.
256 *
257 * In case this function is called from nfsd @file may be %NULL and
258 * only @dentry is set. This can only happen when the filesystem
259 * implements the export_operations API.
260 */ 203 */
261int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) 204int vfs_fsync(struct file *file, int datasync)
262{ 205{
263 return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync); 206 return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
264} 207}
265EXPORT_SYMBOL(vfs_fsync); 208EXPORT_SYMBOL(vfs_fsync);
266 209
@@ -271,7 +214,7 @@ static int do_fsync(unsigned int fd, int datasync)
271 214
272 file = fget(fd); 215 file = fget(fd);
273 if (file) { 216 if (file) {
274 ret = vfs_fsync(file, file->f_path.dentry, datasync); 217 ret = vfs_fsync(file, datasync);
275 fput(file); 218 fput(file);
276 } 219 }
277 return ret; 220 return ret;
@@ -299,8 +242,7 @@ int generic_write_sync(struct file *file, loff_t pos, loff_t count)
299{ 242{
300 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) 243 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
301 return 0; 244 return 0;
302 return vfs_fsync_range(file, file->f_path.dentry, pos, 245 return vfs_fsync_range(file, pos, pos + count - 1,
303 pos + count - 1,
304 (file->f_flags & __O_SYNC) ? 0 : 1); 246 (file->f_flags & __O_SYNC) ? 0 : 1);
305} 247}
306EXPORT_SYMBOL(generic_write_sync); 248EXPORT_SYMBOL(generic_write_sync);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index e9d293593e52..4e321f7353fa 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -46,9 +46,9 @@ struct bin_buffer {
46}; 46};
47 47
48static int 48static int
49fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) 49fill_read(struct file *file, char *buffer, loff_t off, size_t count)
50{ 50{
51 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 51 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
52 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; 52 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
53 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; 53 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
54 int rc; 54 int rc;
@@ -59,7 +59,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
59 59
60 rc = -EIO; 60 rc = -EIO;
61 if (attr->read) 61 if (attr->read)
62 rc = attr->read(kobj, attr, buffer, off, count); 62 rc = attr->read(file, kobj, attr, buffer, off, count);
63 63
64 sysfs_put_active(attr_sd); 64 sysfs_put_active(attr_sd);
65 65
@@ -70,8 +70,7 @@ static ssize_t
70read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) 70read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
71{ 71{
72 struct bin_buffer *bb = file->private_data; 72 struct bin_buffer *bb = file->private_data;
73 struct dentry *dentry = file->f_path.dentry; 73 int size = file->f_path.dentry->d_inode->i_size;
74 int size = dentry->d_inode->i_size;
75 loff_t offs = *off; 74 loff_t offs = *off;
76 int count = min_t(size_t, bytes, PAGE_SIZE); 75 int count = min_t(size_t, bytes, PAGE_SIZE);
77 char *temp; 76 char *temp;
@@ -92,7 +91,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
92 91
93 mutex_lock(&bb->mutex); 92 mutex_lock(&bb->mutex);
94 93
95 count = fill_read(dentry, bb->buffer, offs, count); 94 count = fill_read(file, bb->buffer, offs, count);
96 if (count < 0) { 95 if (count < 0) {
97 mutex_unlock(&bb->mutex); 96 mutex_unlock(&bb->mutex);
98 goto out_free; 97 goto out_free;
@@ -117,9 +116,9 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
117} 116}
118 117
119static int 118static int
120flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) 119flush_write(struct file *file, char *buffer, loff_t offset, size_t count)
121{ 120{
122 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 121 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
123 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; 122 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
124 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; 123 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
125 int rc; 124 int rc;
@@ -130,7 +129,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
130 129
131 rc = -EIO; 130 rc = -EIO;
132 if (attr->write) 131 if (attr->write)
133 rc = attr->write(kobj, attr, buffer, offset, count); 132 rc = attr->write(file, kobj, attr, buffer, offset, count);
134 133
135 sysfs_put_active(attr_sd); 134 sysfs_put_active(attr_sd);
136 135
@@ -141,8 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
141 size_t bytes, loff_t *off) 140 size_t bytes, loff_t *off)
142{ 141{
143 struct bin_buffer *bb = file->private_data; 142 struct bin_buffer *bb = file->private_data;
144 struct dentry *dentry = file->f_path.dentry; 143 int size = file->f_path.dentry->d_inode->i_size;
145 int size = dentry->d_inode->i_size;
146 loff_t offs = *off; 144 loff_t offs = *off;
147 int count = min_t(size_t, bytes, PAGE_SIZE); 145 int count = min_t(size_t, bytes, PAGE_SIZE);
148 char *temp; 146 char *temp;
@@ -165,7 +163,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
165 163
166 memcpy(bb->buffer, temp, count); 164 memcpy(bb->buffer, temp, count);
167 165
168 count = flush_write(dentry, bb->buffer, offs, count); 166 count = flush_write(file, bb->buffer, offs, count);
169 mutex_unlock(&bb->mutex); 167 mutex_unlock(&bb->mutex);
170 168
171 if (count > 0) 169 if (count > 0)
@@ -363,7 +361,7 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
363 if (!attr->mmap) 361 if (!attr->mmap)
364 goto out_put; 362 goto out_put;
365 363
366 rc = attr->mmap(kobj, attr, vma); 364 rc = attr->mmap(file, kobj, attr, vma);
367 if (rc) 365 if (rc)
368 goto out_put; 366 goto out_put;
369 367
@@ -501,7 +499,7 @@ int sysfs_create_bin_file(struct kobject *kobj,
501void sysfs_remove_bin_file(struct kobject *kobj, 499void sysfs_remove_bin_file(struct kobject *kobj,
502 const struct bin_attribute *attr) 500 const struct bin_attribute *attr)
503{ 501{
504 sysfs_hash_and_remove(kobj->sd, attr->attr.name); 502 sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name);
505} 503}
506 504
507EXPORT_SYMBOL_GPL(sysfs_create_bin_file); 505EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 590717861c7a..7e54bac8c4b0 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -380,7 +380,7 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
380{ 380{
381 struct sysfs_inode_attrs *ps_iattr; 381 struct sysfs_inode_attrs *ps_iattr;
382 382
383 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) 383 if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name))
384 return -EEXIST; 384 return -EEXIST;
385 385
386 sd->s_parent = sysfs_get(acxt->parent_sd); 386 sd->s_parent = sysfs_get(acxt->parent_sd);
@@ -533,13 +533,17 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
533 * Pointer to sysfs_dirent if found, NULL if not. 533 * Pointer to sysfs_dirent if found, NULL if not.
534 */ 534 */
535struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, 535struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
536 const void *ns,
536 const unsigned char *name) 537 const unsigned char *name)
537{ 538{
538 struct sysfs_dirent *sd; 539 struct sysfs_dirent *sd;
539 540
540 for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) 541 for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
542 if (ns && sd->s_ns && (sd->s_ns != ns))
543 continue;
541 if (!strcmp(sd->s_name, name)) 544 if (!strcmp(sd->s_name, name))
542 return sd; 545 return sd;
546 }
543 return NULL; 547 return NULL;
544} 548}
545 549
@@ -558,12 +562,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
558 * Pointer to sysfs_dirent if found, NULL if not. 562 * Pointer to sysfs_dirent if found, NULL if not.
559 */ 563 */
560struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, 564struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
565 const void *ns,
561 const unsigned char *name) 566 const unsigned char *name)
562{ 567{
563 struct sysfs_dirent *sd; 568 struct sysfs_dirent *sd;
564 569
565 mutex_lock(&sysfs_mutex); 570 mutex_lock(&sysfs_mutex);
566 sd = sysfs_find_dirent(parent_sd, name); 571 sd = sysfs_find_dirent(parent_sd, ns, name);
567 sysfs_get(sd); 572 sysfs_get(sd);
568 mutex_unlock(&sysfs_mutex); 573 mutex_unlock(&sysfs_mutex);
569 574
@@ -572,7 +577,8 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
572EXPORT_SYMBOL_GPL(sysfs_get_dirent); 577EXPORT_SYMBOL_GPL(sysfs_get_dirent);
573 578
574static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, 579static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
575 const char *name, struct sysfs_dirent **p_sd) 580 enum kobj_ns_type type, const void *ns, const char *name,
581 struct sysfs_dirent **p_sd)
576{ 582{
577 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 583 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
578 struct sysfs_addrm_cxt acxt; 584 struct sysfs_addrm_cxt acxt;
@@ -583,6 +589,9 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
583 sd = sysfs_new_dirent(name, mode, SYSFS_DIR); 589 sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
584 if (!sd) 590 if (!sd)
585 return -ENOMEM; 591 return -ENOMEM;
592
593 sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT);
594 sd->s_ns = ns;
586 sd->s_dir.kobj = kobj; 595 sd->s_dir.kobj = kobj;
587 596
588 /* link in */ 597 /* link in */
@@ -601,7 +610,33 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
601int sysfs_create_subdir(struct kobject *kobj, const char *name, 610int sysfs_create_subdir(struct kobject *kobj, const char *name,
602 struct sysfs_dirent **p_sd) 611 struct sysfs_dirent **p_sd)
603{ 612{
604 return create_dir(kobj, kobj->sd, name, p_sd); 613 return create_dir(kobj, kobj->sd,
614 KOBJ_NS_TYPE_NONE, NULL, name, p_sd);
615}
616
617/**
618 * sysfs_read_ns_type: return associated ns_type
619 * @kobj: the kobject being queried
620 *
621 * Each kobject can be tagged with exactly one namespace type
622 * (i.e. network or user). Return the ns_type associated with
623 * this object if any
624 */
625static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
626{
627 const struct kobj_ns_type_operations *ops;
628 enum kobj_ns_type type;
629
630 ops = kobj_child_ns_ops(kobj);
631 if (!ops)
632 return KOBJ_NS_TYPE_NONE;
633
634 type = ops->type;
635 BUG_ON(type <= KOBJ_NS_TYPE_NONE);
636 BUG_ON(type >= KOBJ_NS_TYPES);
637 BUG_ON(!kobj_ns_type_registered(type));
638
639 return type;
605} 640}
606 641
607/** 642/**
@@ -610,7 +645,9 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
610 */ 645 */
611int sysfs_create_dir(struct kobject * kobj) 646int sysfs_create_dir(struct kobject * kobj)
612{ 647{
648 enum kobj_ns_type type;
613 struct sysfs_dirent *parent_sd, *sd; 649 struct sysfs_dirent *parent_sd, *sd;
650 const void *ns = NULL;
614 int error = 0; 651 int error = 0;
615 652
616 BUG_ON(!kobj); 653 BUG_ON(!kobj);
@@ -620,7 +657,11 @@ int sysfs_create_dir(struct kobject * kobj)
620 else 657 else
621 parent_sd = &sysfs_root; 658 parent_sd = &sysfs_root;
622 659
623 error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); 660 if (sysfs_ns_type(parent_sd))
661 ns = kobj->ktype->namespace(kobj);
662 type = sysfs_read_ns_type(kobj);
663
664 error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd);
624 if (!error) 665 if (!error)
625 kobj->sd = sd; 666 kobj->sd = sd;
626 return error; 667 return error;
@@ -630,13 +671,19 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
630 struct nameidata *nd) 671 struct nameidata *nd)
631{ 672{
632 struct dentry *ret = NULL; 673 struct dentry *ret = NULL;
633 struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata; 674 struct dentry *parent = dentry->d_parent;
675 struct sysfs_dirent *parent_sd = parent->d_fsdata;
634 struct sysfs_dirent *sd; 676 struct sysfs_dirent *sd;
635 struct inode *inode; 677 struct inode *inode;
678 enum kobj_ns_type type;
679 const void *ns;
636 680
637 mutex_lock(&sysfs_mutex); 681 mutex_lock(&sysfs_mutex);
638 682
639 sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); 683 type = sysfs_ns_type(parent_sd);
684 ns = sysfs_info(dir->i_sb)->ns[type];
685
686 sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name);
640 687
641 /* no such entry */ 688 /* no such entry */
642 if (!sd) { 689 if (!sd) {
@@ -735,7 +782,8 @@ void sysfs_remove_dir(struct kobject * kobj)
735} 782}
736 783
737int sysfs_rename(struct sysfs_dirent *sd, 784int sysfs_rename(struct sysfs_dirent *sd,
738 struct sysfs_dirent *new_parent_sd, const char *new_name) 785 struct sysfs_dirent *new_parent_sd, const void *new_ns,
786 const char *new_name)
739{ 787{
740 const char *dup_name = NULL; 788 const char *dup_name = NULL;
741 int error; 789 int error;
@@ -743,12 +791,12 @@ int sysfs_rename(struct sysfs_dirent *sd,
743 mutex_lock(&sysfs_mutex); 791 mutex_lock(&sysfs_mutex);
744 792
745 error = 0; 793 error = 0;
746 if ((sd->s_parent == new_parent_sd) && 794 if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) &&
747 (strcmp(sd->s_name, new_name) == 0)) 795 (strcmp(sd->s_name, new_name) == 0))
748 goto out; /* nothing to rename */ 796 goto out; /* nothing to rename */
749 797
750 error = -EEXIST; 798 error = -EEXIST;
751 if (sysfs_find_dirent(new_parent_sd, new_name)) 799 if (sysfs_find_dirent(new_parent_sd, new_ns, new_name))
752 goto out; 800 goto out;
753 801
754 /* rename sysfs_dirent */ 802 /* rename sysfs_dirent */
@@ -770,6 +818,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
770 sd->s_parent = new_parent_sd; 818 sd->s_parent = new_parent_sd;
771 sysfs_link_sibling(sd); 819 sysfs_link_sibling(sd);
772 } 820 }
821 sd->s_ns = new_ns;
773 822
774 error = 0; 823 error = 0;
775 out: 824 out:
@@ -780,19 +829,28 @@ int sysfs_rename(struct sysfs_dirent *sd,
780 829
781int sysfs_rename_dir(struct kobject *kobj, const char *new_name) 830int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
782{ 831{
783 return sysfs_rename(kobj->sd, kobj->sd->s_parent, new_name); 832 struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
833 const void *new_ns = NULL;
834
835 if (sysfs_ns_type(parent_sd))
836 new_ns = kobj->ktype->namespace(kobj);
837
838 return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name);
784} 839}
785 840
786int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) 841int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
787{ 842{
788 struct sysfs_dirent *sd = kobj->sd; 843 struct sysfs_dirent *sd = kobj->sd;
789 struct sysfs_dirent *new_parent_sd; 844 struct sysfs_dirent *new_parent_sd;
845 const void *new_ns = NULL;
790 846
791 BUG_ON(!sd->s_parent); 847 BUG_ON(!sd->s_parent);
848 if (sysfs_ns_type(sd->s_parent))
849 new_ns = kobj->ktype->namespace(kobj);
792 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? 850 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
793 new_parent_kobj->sd : &sysfs_root; 851 new_parent_kobj->sd : &sysfs_root;
794 852
795 return sysfs_rename(sd, new_parent_sd, sd->s_name); 853 return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name);
796} 854}
797 855
798/* Relationship between s_mode and the DT_xxx types */ 856/* Relationship between s_mode and the DT_xxx types */
@@ -807,32 +865,35 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp)
807 return 0; 865 return 0;
808} 866}
809 867
810static struct sysfs_dirent *sysfs_dir_pos(struct sysfs_dirent *parent_sd, 868static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
811 ino_t ino, struct sysfs_dirent *pos) 869 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
812{ 870{
813 if (pos) { 871 if (pos) {
814 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && 872 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
815 pos->s_parent == parent_sd && 873 pos->s_parent == parent_sd &&
816 ino == pos->s_ino; 874 ino == pos->s_ino;
817 sysfs_put(pos); 875 sysfs_put(pos);
818 if (valid) 876 if (!valid)
819 return pos; 877 pos = NULL;
820 } 878 }
821 pos = NULL; 879 if (!pos && (ino > 1) && (ino < INT_MAX)) {
822 if ((ino > 1) && (ino < INT_MAX)) {
823 pos = parent_sd->s_dir.children; 880 pos = parent_sd->s_dir.children;
824 while (pos && (ino > pos->s_ino)) 881 while (pos && (ino > pos->s_ino))
825 pos = pos->s_sibling; 882 pos = pos->s_sibling;
826 } 883 }
884 while (pos && pos->s_ns && pos->s_ns != ns)
885 pos = pos->s_sibling;
827 return pos; 886 return pos;
828} 887}
829 888
830static struct sysfs_dirent *sysfs_dir_next_pos(struct sysfs_dirent *parent_sd, 889static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
831 ino_t ino, struct sysfs_dirent *pos) 890 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
832{ 891{
833 pos = sysfs_dir_pos(parent_sd, ino, pos); 892 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
834 if (pos) 893 if (pos)
835 pos = pos->s_sibling; 894 pos = pos->s_sibling;
895 while (pos && pos->s_ns && pos->s_ns != ns)
896 pos = pos->s_sibling;
836 return pos; 897 return pos;
837} 898}
838 899
@@ -841,8 +902,13 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
841 struct dentry *dentry = filp->f_path.dentry; 902 struct dentry *dentry = filp->f_path.dentry;
842 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 903 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
843 struct sysfs_dirent *pos = filp->private_data; 904 struct sysfs_dirent *pos = filp->private_data;
905 enum kobj_ns_type type;
906 const void *ns;
844 ino_t ino; 907 ino_t ino;
845 908
909 type = sysfs_ns_type(parent_sd);
910 ns = sysfs_info(dentry->d_sb)->ns[type];
911
846 if (filp->f_pos == 0) { 912 if (filp->f_pos == 0) {
847 ino = parent_sd->s_ino; 913 ino = parent_sd->s_ino;
848 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) 914 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
@@ -857,9 +923,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
857 filp->f_pos++; 923 filp->f_pos++;
858 } 924 }
859 mutex_lock(&sysfs_mutex); 925 mutex_lock(&sysfs_mutex);
860 for (pos = sysfs_dir_pos(parent_sd, filp->f_pos, pos); 926 for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
861 pos; 927 pos;
862 pos = sysfs_dir_next_pos(parent_sd, filp->f_pos, pos)) { 928 pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) {
863 const char * name; 929 const char * name;
864 unsigned int type; 930 unsigned int type;
865 int len, ret; 931 int len, ret;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e222b2582746..1beaa739d0a6 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -478,9 +478,12 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
478 mutex_lock(&sysfs_mutex); 478 mutex_lock(&sysfs_mutex);
479 479
480 if (sd && dir) 480 if (sd && dir)
481 sd = sysfs_find_dirent(sd, dir); 481 /* Only directories are tagged, so no need to pass
482 * a tag explicitly.
483 */
484 sd = sysfs_find_dirent(sd, NULL, dir);
482 if (sd && attr) 485 if (sd && attr)
483 sd = sysfs_find_dirent(sd, attr); 486 sd = sysfs_find_dirent(sd, NULL, attr);
484 if (sd) 487 if (sd)
485 sysfs_notify_dirent(sd); 488 sysfs_notify_dirent(sd);
486 489
@@ -569,7 +572,7 @@ int sysfs_add_file_to_group(struct kobject *kobj,
569 int error; 572 int error;
570 573
571 if (group) 574 if (group)
572 dir_sd = sysfs_get_dirent(kobj->sd, group); 575 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
573 else 576 else
574 dir_sd = sysfs_get(kobj->sd); 577 dir_sd = sysfs_get(kobj->sd);
575 578
@@ -599,7 +602,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
599 mutex_lock(&sysfs_mutex); 602 mutex_lock(&sysfs_mutex);
600 603
601 rc = -ENOENT; 604 rc = -ENOENT;
602 sd = sysfs_find_dirent(kobj->sd, attr->name); 605 sd = sysfs_find_dirent(kobj->sd, NULL, attr->name);
603 if (!sd) 606 if (!sd)
604 goto out; 607 goto out;
605 608
@@ -624,7 +627,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
624 627
625void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) 628void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
626{ 629{
627 sysfs_hash_and_remove(kobj->sd, attr->name); 630 sysfs_hash_and_remove(kobj->sd, NULL, attr->name);
628} 631}
629 632
630void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr) 633void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr)
@@ -646,11 +649,11 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
646 struct sysfs_dirent *dir_sd; 649 struct sysfs_dirent *dir_sd;
647 650
648 if (group) 651 if (group)
649 dir_sd = sysfs_get_dirent(kobj->sd, group); 652 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
650 else 653 else
651 dir_sd = sysfs_get(kobj->sd); 654 dir_sd = sysfs_get(kobj->sd);
652 if (dir_sd) { 655 if (dir_sd) {
653 sysfs_hash_and_remove(dir_sd, attr->name); 656 sysfs_hash_and_remove(dir_sd, NULL, attr->name);
654 sysfs_put(dir_sd); 657 sysfs_put(dir_sd);
655 } 658 }
656} 659}
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index fe611949a7f7..23c1e598792a 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -23,7 +23,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
23 int i; 23 int i;
24 24
25 for (i = 0, attr = grp->attrs; *attr; i++, attr++) 25 for (i = 0, attr = grp->attrs; *attr; i++, attr++)
26 sysfs_hash_and_remove(dir_sd, (*attr)->name); 26 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
27} 27}
28 28
29static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, 29static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
@@ -39,7 +39,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
39 * visibility. Do this by first removing then 39 * visibility. Do this by first removing then
40 * re-adding (if required) the file */ 40 * re-adding (if required) the file */
41 if (update) 41 if (update)
42 sysfs_hash_and_remove(dir_sd, (*attr)->name); 42 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
43 if (grp->is_visible) { 43 if (grp->is_visible) {
44 mode = grp->is_visible(kobj, *attr, i); 44 mode = grp->is_visible(kobj, *attr, i);
45 if (!mode) 45 if (!mode)
@@ -132,7 +132,7 @@ void sysfs_remove_group(struct kobject * kobj,
132 struct sysfs_dirent *sd; 132 struct sysfs_dirent *sd;
133 133
134 if (grp->name) { 134 if (grp->name) {
135 sd = sysfs_get_dirent(dir_sd, grp->name); 135 sd = sysfs_get_dirent(dir_sd, NULL, grp->name);
136 if (!sd) { 136 if (!sd) {
137 WARN(!sd, KERN_WARNING "sysfs group %p not found for " 137 WARN(!sd, KERN_WARNING "sysfs group %p not found for "
138 "kobject '%s'\n", grp, kobject_name(kobj)); 138 "kobject '%s'\n", grp, kobject_name(kobj));
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index a4a0a9419711..bbd77e95cf7f 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -324,7 +324,7 @@ void sysfs_delete_inode(struct inode *inode)
324 sysfs_put(sd); 324 sysfs_put(sd);
325} 325}
326 326
327int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) 327int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name)
328{ 328{
329 struct sysfs_addrm_cxt acxt; 329 struct sysfs_addrm_cxt acxt;
330 struct sysfs_dirent *sd; 330 struct sysfs_dirent *sd;
@@ -334,7 +334,9 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
334 334
335 sysfs_addrm_start(&acxt, dir_sd); 335 sysfs_addrm_start(&acxt, dir_sd);
336 336
337 sd = sysfs_find_dirent(dir_sd, name); 337 sd = sysfs_find_dirent(dir_sd, ns, name);
338 if (sd && (sd->s_ns != ns))
339 sd = NULL;
338 if (sd) 340 if (sd)
339 sysfs_remove_one(&acxt, sd); 341 sysfs_remove_one(&acxt, sd);
340 342
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 776137828dca..281c0c9bc39f 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -35,7 +35,7 @@ static const struct super_operations sysfs_ops = {
35struct sysfs_dirent sysfs_root = { 35struct sysfs_dirent sysfs_root = {
36 .s_name = "", 36 .s_name = "",
37 .s_count = ATOMIC_INIT(1), 37 .s_count = ATOMIC_INIT(1),
38 .s_flags = SYSFS_DIR, 38 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
39 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 39 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
40 .s_ino = 1, 40 .s_ino = 1,
41}; 41};
@@ -72,18 +72,107 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
72 return 0; 72 return 0;
73} 73}
74 74
75static int sysfs_test_super(struct super_block *sb, void *data)
76{
77 struct sysfs_super_info *sb_info = sysfs_info(sb);
78 struct sysfs_super_info *info = data;
79 enum kobj_ns_type type;
80 int found = 1;
81
82 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
83 if (sb_info->ns[type] != info->ns[type])
84 found = 0;
85 }
86 return found;
87}
88
89static int sysfs_set_super(struct super_block *sb, void *data)
90{
91 int error;
92 error = set_anon_super(sb, data);
93 if (!error)
94 sb->s_fs_info = data;
95 return error;
96}
97
75static int sysfs_get_sb(struct file_system_type *fs_type, 98static int sysfs_get_sb(struct file_system_type *fs_type,
76 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 99 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
77{ 100{
78 return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt); 101 struct sysfs_super_info *info;
102 enum kobj_ns_type type;
103 struct super_block *sb;
104 int error;
105
106 error = -ENOMEM;
107 info = kzalloc(sizeof(*info), GFP_KERNEL);
108 if (!info)
109 goto out;
110
111 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
112 info->ns[type] = kobj_ns_current(type);
113
114 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
115 if (IS_ERR(sb) || sb->s_fs_info != info)
116 kfree(info);
117 if (IS_ERR(sb)) {
118 error = PTR_ERR(sb);
119 goto out;
120 }
121 if (!sb->s_root) {
122 sb->s_flags = flags;
123 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
124 if (error) {
125 deactivate_locked_super(sb);
126 goto out;
127 }
128 sb->s_flags |= MS_ACTIVE;
129 }
130
131 simple_set_mnt(mnt, sb);
132 error = 0;
133out:
134 return error;
135}
136
137static void sysfs_kill_sb(struct super_block *sb)
138{
139 struct sysfs_super_info *info = sysfs_info(sb);
140
141 /* Remove the superblock from fs_supers/s_instances
142 * so we can't find it, before freeing sysfs_super_info.
143 */
144 kill_anon_super(sb);
145 kfree(info);
79} 146}
80 147
81static struct file_system_type sysfs_fs_type = { 148static struct file_system_type sysfs_fs_type = {
82 .name = "sysfs", 149 .name = "sysfs",
83 .get_sb = sysfs_get_sb, 150 .get_sb = sysfs_get_sb,
84 .kill_sb = kill_anon_super, 151 .kill_sb = sysfs_kill_sb,
85}; 152};
86 153
154void sysfs_exit_ns(enum kobj_ns_type type, const void *ns)
155{
156 struct super_block *sb;
157
158 mutex_lock(&sysfs_mutex);
159 spin_lock(&sb_lock);
160 list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
161 struct sysfs_super_info *info = sysfs_info(sb);
162 /*
163 * If we see a superblock on the fs_supers/s_instances
164 * list the unmount has not completed and sb->s_fs_info
165 * points to a valid struct sysfs_super_info.
166 */
167 /* Ignore superblocks with the wrong ns */
168 if (info->ns[type] != ns)
169 continue;
170 info->ns[type] = NULL;
171 }
172 spin_unlock(&sb_lock);
173 mutex_unlock(&sysfs_mutex);
174}
175
87int __init sysfs_init(void) 176int __init sysfs_init(void)
88{ 177{
89 int err = -ENOMEM; 178 int err = -ENOMEM;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index b93ec51fa7ac..f71246bebfe4 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -58,6 +58,8 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
58 if (!sd) 58 if (!sd)
59 goto out_put; 59 goto out_put;
60 60
61 if (sysfs_ns_type(parent_sd))
62 sd->s_ns = target->ktype->namespace(target);
61 sd->s_symlink.target_sd = target_sd; 63 sd->s_symlink.target_sd = target_sd;
62 target_sd = NULL; /* reference is now owned by the symlink */ 64 target_sd = NULL; /* reference is now owned by the symlink */
63 65
@@ -107,6 +109,26 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
107} 109}
108 110
109/** 111/**
112 * sysfs_delete_link - remove symlink in object's directory.
113 * @kobj: object we're acting for.
114 * @targ: object we're pointing to.
115 * @name: name of the symlink to remove.
116 *
117 * Unlike sysfs_remove_link sysfs_delete_link has enough information
118 * to successfully delete symlinks in tagged directories.
119 */
120void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
121 const char *name)
122{
123 const void *ns = NULL;
124 spin_lock(&sysfs_assoc_lock);
125 if (targ->sd)
126 ns = targ->sd->s_ns;
127 spin_unlock(&sysfs_assoc_lock);
128 sysfs_hash_and_remove(kobj->sd, ns, name);
129}
130
131/**
110 * sysfs_remove_link - remove symlink in object's directory. 132 * sysfs_remove_link - remove symlink in object's directory.
111 * @kobj: object we're acting for. 133 * @kobj: object we're acting for.
112 * @name: name of the symlink to remove. 134 * @name: name of the symlink to remove.
@@ -121,7 +143,7 @@ void sysfs_remove_link(struct kobject * kobj, const char * name)
121 else 143 else
122 parent_sd = kobj->sd; 144 parent_sd = kobj->sd;
123 145
124 sysfs_hash_and_remove(parent_sd, name); 146 sysfs_hash_and_remove(parent_sd, NULL, name);
125} 147}
126 148
127/** 149/**
@@ -137,6 +159,7 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
137 const char *old, const char *new) 159 const char *old, const char *new)
138{ 160{
139 struct sysfs_dirent *parent_sd, *sd = NULL; 161 struct sysfs_dirent *parent_sd, *sd = NULL;
162 const void *old_ns = NULL, *new_ns = NULL;
140 int result; 163 int result;
141 164
142 if (!kobj) 165 if (!kobj)
@@ -144,8 +167,11 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
144 else 167 else
145 parent_sd = kobj->sd; 168 parent_sd = kobj->sd;
146 169
170 if (targ->sd)
171 old_ns = targ->sd->s_ns;
172
147 result = -ENOENT; 173 result = -ENOENT;
148 sd = sysfs_get_dirent(parent_sd, old); 174 sd = sysfs_get_dirent(parent_sd, old_ns, old);
149 if (!sd) 175 if (!sd)
150 goto out; 176 goto out;
151 177
@@ -155,7 +181,10 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
155 if (sd->s_symlink.target_sd->s_dir.kobj != targ) 181 if (sd->s_symlink.target_sd->s_dir.kobj != targ)
156 goto out; 182 goto out;
157 183
158 result = sysfs_rename(sd, parent_sd, new); 184 if (sysfs_ns_type(parent_sd))
185 new_ns = targ->ktype->namespace(targ);
186
187 result = sysfs_rename(sd, parent_sd, new_ns, new);
159 188
160out: 189out:
161 sysfs_put(sd); 190 sysfs_put(sd);
@@ -261,3 +290,4 @@ const struct inode_operations sysfs_symlink_inode_operations = {
261 290
262EXPORT_SYMBOL_GPL(sysfs_create_link); 291EXPORT_SYMBOL_GPL(sysfs_create_link);
263EXPORT_SYMBOL_GPL(sysfs_remove_link); 292EXPORT_SYMBOL_GPL(sysfs_remove_link);
293EXPORT_SYMBOL_GPL(sysfs_rename_link);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 30f5a44fb5d3..6a13105b5594 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -58,6 +58,7 @@ struct sysfs_dirent {
58 struct sysfs_dirent *s_sibling; 58 struct sysfs_dirent *s_sibling;
59 const char *s_name; 59 const char *s_name;
60 60
61 const void *s_ns; /* namespace tag */
61 union { 62 union {
62 struct sysfs_elem_dir s_dir; 63 struct sysfs_elem_dir s_dir;
63 struct sysfs_elem_symlink s_symlink; 64 struct sysfs_elem_symlink s_symlink;
@@ -81,14 +82,27 @@ struct sysfs_dirent {
81#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) 82#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK)
82#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) 83#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
83 84
84#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK 85/* identify any namespace tag on sysfs_dirents */
85#define SYSFS_FLAG_REMOVED 0x0200 86#define SYSFS_NS_TYPE_MASK 0xff00
87#define SYSFS_NS_TYPE_SHIFT 8
88
89#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
90#define SYSFS_FLAG_REMOVED 0x020000
86 91
87static inline unsigned int sysfs_type(struct sysfs_dirent *sd) 92static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
88{ 93{
89 return sd->s_flags & SYSFS_TYPE_MASK; 94 return sd->s_flags & SYSFS_TYPE_MASK;
90} 95}
91 96
97/*
98 * Return any namespace tags on this dirent.
99 * enum kobj_ns_type is defined in linux/kobject.h
100 */
101static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
102{
103 return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;
104}
105
92#ifdef CONFIG_DEBUG_LOCK_ALLOC 106#ifdef CONFIG_DEBUG_LOCK_ALLOC
93#define sysfs_dirent_init_lockdep(sd) \ 107#define sysfs_dirent_init_lockdep(sd) \
94do { \ 108do { \
@@ -114,6 +128,16 @@ struct sysfs_addrm_cxt {
114/* 128/*
115 * mount.c 129 * mount.c
116 */ 130 */
131
132/*
133 * Each sb is associated with a set of namespace tags (i.e.
134 * the network namespace of the task which mounted this sysfs
135 * instance).
136 */
137struct sysfs_super_info {
138 const void *ns[KOBJ_NS_TYPES];
139};
140#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
117extern struct sysfs_dirent sysfs_root; 141extern struct sysfs_dirent sysfs_root;
118extern struct kmem_cache *sysfs_dir_cachep; 142extern struct kmem_cache *sysfs_dir_cachep;
119 143
@@ -137,8 +161,10 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
137void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); 161void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
138 162
139struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, 163struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
164 const void *ns,
140 const unsigned char *name); 165 const unsigned char *name);
141struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, 166struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
167 const void *ns,
142 const unsigned char *name); 168 const unsigned char *name);
143struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); 169struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
144 170
@@ -149,7 +175,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
149void sysfs_remove_subdir(struct sysfs_dirent *sd); 175void sysfs_remove_subdir(struct sysfs_dirent *sd);
150 176
151int sysfs_rename(struct sysfs_dirent *sd, 177int sysfs_rename(struct sysfs_dirent *sd,
152 struct sysfs_dirent *new_parent_sd, const char *new_name); 178 struct sysfs_dirent *new_parent_sd, const void *ns, const char *new_name);
153 179
154static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) 180static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
155{ 181{
@@ -179,7 +205,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
179int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 205int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
180int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 206int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
181 size_t size, int flags); 207 size_t size, int flags);
182int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); 208int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name);
183int sysfs_inode_init(void); 209int sysfs_inode_init(void);
184 210
185/* 211/*
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 241e9765cfad..bbd69bdb0fa8 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -159,15 +159,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
159 *sbi->s_sb_fic_count = cpu_to_fs16(sbi, count); 159 *sbi->s_sb_fic_count = cpu_to_fs16(sbi, count);
160 fs16_add(sbi, sbi->s_sb_total_free_inodes, -1); 160 fs16_add(sbi, sbi->s_sb_total_free_inodes, -1);
161 dirty_sb(sb); 161 dirty_sb(sb);
162 162 inode_init_owner(inode, dir, mode);
163 if (dir->i_mode & S_ISGID) {
164 inode->i_gid = dir->i_gid;
165 if (S_ISDIR(mode))
166 mode |= S_ISGID;
167 } else
168 inode->i_gid = current_fsgid();
169
170 inode->i_uid = current_fsuid();
171 inode->i_ino = fs16_to_cpu(sbi, ino); 163 inode->i_ino = fs16_to_cpu(sbi, ino);
172 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 164 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
173 inode->i_blocks = 0; 165 inode->i_blocks = 0;
@@ -176,7 +168,6 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
176 insert_inode_hash(inode); 168 insert_inode_hash(inode);
177 mark_inode_dirty(inode); 169 mark_inode_dirty(inode);
178 170
179 inode->i_mode = mode; /* for sysv_write_inode() */
180 sysv_write_inode(inode, 0); /* ensure inode not allocated again */ 171 sysv_write_inode(inode, 0); /* ensure inode not allocated again */
181 mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ 172 mark_inode_dirty(inode); /* cleared by sysv_write_inode() */
182 /* That's it. */ 173 /* That's it. */
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 98158de91d24..b86ab8eff79a 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -110,31 +110,14 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
110 struct timerfd_ctx *ctx = file->private_data; 110 struct timerfd_ctx *ctx = file->private_data;
111 ssize_t res; 111 ssize_t res;
112 u64 ticks = 0; 112 u64 ticks = 0;
113 DECLARE_WAITQUEUE(wait, current);
114 113
115 if (count < sizeof(ticks)) 114 if (count < sizeof(ticks))
116 return -EINVAL; 115 return -EINVAL;
117 spin_lock_irq(&ctx->wqh.lock); 116 spin_lock_irq(&ctx->wqh.lock);
118 res = -EAGAIN; 117 if (file->f_flags & O_NONBLOCK)
119 if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) { 118 res = -EAGAIN;
120 __add_wait_queue(&ctx->wqh, &wait); 119 else
121 for (res = 0;;) { 120 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
122 set_current_state(TASK_INTERRUPTIBLE);
123 if (ctx->ticks) {
124 res = 0;
125 break;
126 }
127 if (signal_pending(current)) {
128 res = -ERESTARTSYS;
129 break;
130 }
131 spin_unlock_irq(&ctx->wqh.lock);
132 schedule();
133 spin_lock_irq(&ctx->wqh.lock);
134 }
135 __remove_wait_queue(&ctx->wqh, &wait);
136 __set_current_state(TASK_RUNNING);
137 }
138 if (ctx->ticks) { 121 if (ctx->ticks) {
139 ticks = ctx->ticks; 122 ticks = ctx->ticks;
140 if (ctx->expired && ctx->tintv.tv64) { 123 if (ctx->expired && ctx->tintv.tv64) {
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 401e503d44a1..87ebcce72213 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -104,14 +104,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
104 */ 104 */
105 inode->i_flags |= (S_NOCMTIME); 105 inode->i_flags |= (S_NOCMTIME);
106 106
107 inode->i_uid = current_fsuid(); 107 inode_init_owner(inode, dir, mode);
108 if (dir->i_mode & S_ISGID) {
109 inode->i_gid = dir->i_gid;
110 if (S_ISDIR(mode))
111 mode |= S_ISGID;
112 } else
113 inode->i_gid = current_fsgid();
114 inode->i_mode = mode;
115 inode->i_mtime = inode->i_atime = inode->i_ctime = 108 inode->i_mtime = inode->i_atime = inode->i_ctime =
116 ubifs_current_time(inode); 109 ubifs_current_time(inode);
117 inode->i_mapping->nrpages = 0; 110 inode->i_mapping->nrpages = 0;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 77d5cf4a7547..bcf5a16f30bb 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -64,6 +64,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
64 if (!c->ro_media) { 64 if (!c->ro_media) {
65 c->ro_media = 1; 65 c->ro_media = 1;
66 c->no_chk_data_crc = 0; 66 c->no_chk_data_crc = 0;
67 c->vfs_sb->s_flags |= MS_RDONLY;
67 ubifs_warn("switched to read-only mode, error %d", err); 68 ubifs_warn("switched to read-only mode, error %d", err);
68 dbg_dump_stack(); 69 dbg_dump_stack();
69 } 70 }
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index f0f2a436251e..3a84455c2a77 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -209,6 +209,6 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
209const struct file_operations udf_dir_operations = { 209const struct file_operations udf_dir_operations = {
210 .read = generic_read_dir, 210 .read = generic_read_dir,
211 .readdir = udf_readdir, 211 .readdir = udf_readdir,
212 .ioctl = udf_ioctl, 212 .unlocked_ioctl = udf_ioctl,
213 .fsync = simple_fsync, 213 .fsync = simple_fsync,
214}; 214};
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 4b6a46ccbf46..baae3a723946 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -37,6 +37,7 @@
37#include <linux/quotaops.h> 37#include <linux/quotaops.h>
38#include <linux/buffer_head.h> 38#include <linux/buffer_head.h>
39#include <linux/aio.h> 39#include <linux/aio.h>
40#include <linux/smp_lock.h>
40 41
41#include "udf_i.h" 42#include "udf_i.h"
42#include "udf_sb.h" 43#include "udf_sb.h"
@@ -144,50 +145,60 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
144 return retval; 145 return retval;
145} 146}
146 147
147int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 148long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
148 unsigned long arg)
149{ 149{
150 struct inode *inode = filp->f_dentry->d_inode;
150 long old_block, new_block; 151 long old_block, new_block;
151 int result = -EINVAL; 152 int result = -EINVAL;
152 153
154 lock_kernel();
155
153 if (file_permission(filp, MAY_READ) != 0) { 156 if (file_permission(filp, MAY_READ) != 0) {
154 udf_debug("no permission to access inode %lu\n", 157 udf_debug("no permission to access inode %lu\n", inode->i_ino);
155 inode->i_ino); 158 result = -EPERM;
156 return -EPERM; 159 goto out;
157 } 160 }
158 161
159 if (!arg) { 162 if (!arg) {
160 udf_debug("invalid argument to udf_ioctl\n"); 163 udf_debug("invalid argument to udf_ioctl\n");
161 return -EINVAL; 164 result = -EINVAL;
165 goto out;
162 } 166 }
163 167
164 switch (cmd) { 168 switch (cmd) {
165 case UDF_GETVOLIDENT: 169 case UDF_GETVOLIDENT:
166 if (copy_to_user((char __user *)arg, 170 if (copy_to_user((char __user *)arg,
167 UDF_SB(inode->i_sb)->s_volume_ident, 32)) 171 UDF_SB(inode->i_sb)->s_volume_ident, 32))
168 return -EFAULT; 172 result = -EFAULT;
169 else 173 else
170 return 0; 174 result = 0;
175 goto out;
171 case UDF_RELOCATE_BLOCKS: 176 case UDF_RELOCATE_BLOCKS:
172 if (!capable(CAP_SYS_ADMIN)) 177 if (!capable(CAP_SYS_ADMIN)) {
173 return -EACCES; 178 result = -EACCES;
174 if (get_user(old_block, (long __user *)arg)) 179 goto out;
175 return -EFAULT; 180 }
181 if (get_user(old_block, (long __user *)arg)) {
182 result = -EFAULT;
183 goto out;
184 }
176 result = udf_relocate_blocks(inode->i_sb, 185 result = udf_relocate_blocks(inode->i_sb,
177 old_block, &new_block); 186 old_block, &new_block);
178 if (result == 0) 187 if (result == 0)
179 result = put_user(new_block, (long __user *)arg); 188 result = put_user(new_block, (long __user *)arg);
180 return result; 189 goto out;
181 case UDF_GETEASIZE: 190 case UDF_GETEASIZE:
182 result = put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg); 191 result = put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg);
183 break; 192 goto out;
184 case UDF_GETEABLOCK: 193 case UDF_GETEABLOCK:
185 result = copy_to_user((char __user *)arg, 194 result = copy_to_user((char __user *)arg,
186 UDF_I(inode)->i_ext.i_data, 195 UDF_I(inode)->i_ext.i_data,
187 UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0; 196 UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0;
188 break; 197 goto out;
189 } 198 }
190 199
200out:
201 unlock_kernel();
191 return result; 202 return result;
192} 203}
193 204
@@ -207,7 +218,7 @@ static int udf_release_file(struct inode *inode, struct file *filp)
207const struct file_operations udf_file_operations = { 218const struct file_operations udf_file_operations = {
208 .read = do_sync_read, 219 .read = do_sync_read,
209 .aio_read = generic_file_aio_read, 220 .aio_read = generic_file_aio_read,
210 .ioctl = udf_ioctl, 221 .unlocked_ioctl = udf_ioctl,
211 .open = dquot_file_open, 222 .open = dquot_file_open,
212 .mmap = generic_file_mmap, 223 .mmap = generic_file_mmap,
213 .write = do_sync_write, 224 .write = do_sync_write,
@@ -227,7 +238,7 @@ int udf_setattr(struct dentry *dentry, struct iattr *iattr)
227 if (error) 238 if (error)
228 return error; 239 return error;
229 240
230 if (iattr->ia_valid & ATTR_SIZE) 241 if (is_quota_modification(inode, iattr))
231 dquot_initialize(inode); 242 dquot_initialize(inode);
232 243
233 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || 244 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index fb68c9cd0c3e..2b5586c7f02a 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -124,15 +124,8 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
124 udf_updated_lvid(sb); 124 udf_updated_lvid(sb);
125 } 125 }
126 mutex_unlock(&sbi->s_alloc_mutex); 126 mutex_unlock(&sbi->s_alloc_mutex);
127 inode->i_mode = mode; 127
128 inode->i_uid = current_fsuid(); 128 inode_init_owner(inode, dir, mode);
129 if (dir->i_mode & S_ISGID) {
130 inode->i_gid = dir->i_gid;
131 if (S_ISDIR(mode))
132 mode |= S_ISGID;
133 } else {
134 inode->i_gid = current_fsgid();
135 }
136 129
137 iinfo->i_location.logicalBlockNum = block; 130 iinfo->i_location.logicalBlockNum = block;
138 iinfo->i_location.partitionReferenceNum = 131 iinfo->i_location.partitionReferenceNum =
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 75816025f95f..585f733615dc 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -579,7 +579,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
579 inode->i_data.a_ops = &udf_aops; 579 inode->i_data.a_ops = &udf_aops;
580 inode->i_op = &udf_file_inode_operations; 580 inode->i_op = &udf_file_inode_operations;
581 inode->i_fop = &udf_file_operations; 581 inode->i_fop = &udf_file_operations;
582 inode->i_mode = mode;
583 mark_inode_dirty(inode); 582 mark_inode_dirty(inode);
584 583
585 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 584 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
@@ -627,7 +626,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
627 goto out; 626 goto out;
628 627
629 iinfo = UDF_I(inode); 628 iinfo = UDF_I(inode);
630 inode->i_uid = current_fsuid();
631 init_special_inode(inode, mode, rdev); 629 init_special_inode(inode, mode, rdev);
632 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 630 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
633 if (!fi) { 631 if (!fi) {
@@ -674,7 +672,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
674 goto out; 672 goto out;
675 673
676 err = -EIO; 674 err = -EIO;
677 inode = udf_new_inode(dir, S_IFDIR, &err); 675 inode = udf_new_inode(dir, S_IFDIR | mode, &err);
678 if (!inode) 676 if (!inode)
679 goto out; 677 goto out;
680 678
@@ -697,9 +695,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
697 FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; 695 FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT;
698 udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL); 696 udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL);
699 brelse(fibh.sbh); 697 brelse(fibh.sbh);
700 inode->i_mode = S_IFDIR | mode;
701 if (dir->i_mode & S_ISGID)
702 inode->i_mode |= S_ISGID;
703 mark_inode_dirty(inode); 698 mark_inode_dirty(inode);
704 699
705 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 700 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
@@ -912,7 +907,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
912 dquot_initialize(dir); 907 dquot_initialize(dir);
913 908
914 lock_kernel(); 909 lock_kernel();
915 inode = udf_new_inode(dir, S_IFLNK, &err); 910 inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err);
916 if (!inode) 911 if (!inode)
917 goto out; 912 goto out;
918 913
@@ -923,7 +918,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
923 } 918 }
924 919
925 iinfo = UDF_I(inode); 920 iinfo = UDF_I(inode);
926 inode->i_mode = S_IFLNK | S_IRWXUGO;
927 inode->i_data.a_ops = &udf_symlink_aops; 921 inode->i_data.a_ops = &udf_symlink_aops;
928 inode->i_op = &udf_symlink_inode_operations; 922 inode->i_op = &udf_symlink_inode_operations;
929 923
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 702a1148e702..9079ff7d6255 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -130,8 +130,7 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
130 uint8_t *, uint8_t *); 130 uint8_t *, uint8_t *);
131 131
132/* file.c */ 132/* file.c */
133extern int udf_ioctl(struct inode *, struct file *, unsigned int, 133extern long udf_ioctl(struct file *, unsigned int, unsigned long);
134 unsigned long);
135extern int udf_setattr(struct dentry *dentry, struct iattr *iattr); 134extern int udf_setattr(struct dentry *dentry, struct iattr *iattr);
136/* inode.c */ 135/* inode.c */
137extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); 136extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 230ecf608026..3a959d55084d 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -303,15 +303,7 @@ cg_found:
303 sb->s_dirt = 1; 303 sb->s_dirt = 1;
304 304
305 inode->i_ino = cg * uspi->s_ipg + bit; 305 inode->i_ino = cg * uspi->s_ipg + bit;
306 inode->i_mode = mode; 306 inode_init_owner(inode, dir, mode);
307 inode->i_uid = current_fsuid();
308 if (dir->i_mode & S_ISGID) {
309 inode->i_gid = dir->i_gid;
310 if (S_ISDIR(mode))
311 inode->i_mode |= S_ISGID;
312 } else
313 inode->i_gid = current_fsgid();
314
315 inode->i_blocks = 0; 307 inode->i_blocks = 0;
316 inode->i_generation = 0; 308 inode->i_generation = 0;
317 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 309 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 80b68c3702d1..cffa756f1047 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -603,7 +603,7 @@ static void ufs_set_inode_ops(struct inode *inode)
603 if (!inode->i_blocks) 603 if (!inode->i_blocks)
604 inode->i_op = &ufs_fast_symlink_inode_operations; 604 inode->i_op = &ufs_fast_symlink_inode_operations;
605 else { 605 else {
606 inode->i_op = &page_symlink_inode_operations; 606 inode->i_op = &ufs_symlink_inode_operations;
607 inode->i_mapping->a_ops = &ufs_aops; 607 inode->i_mapping->a_ops = &ufs_aops;
608 } 608 }
609 } else 609 } else
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 118556243e7a..eabc02eb1294 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -148,7 +148,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
148 148
149 if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { 149 if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) {
150 /* slow symlink */ 150 /* slow symlink */
151 inode->i_op = &page_symlink_inode_operations; 151 inode->i_op = &ufs_symlink_inode_operations;
152 inode->i_mapping->a_ops = &ufs_aops; 152 inode->i_mapping->a_ops = &ufs_aops;
153 err = page_symlink(inode, symname, l); 153 err = page_symlink(inode, symname, l);
154 if (err) 154 if (err)
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
index c0156eda44bc..d283628b4778 100644
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -42,4 +42,12 @@ static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
42const struct inode_operations ufs_fast_symlink_inode_operations = { 42const struct inode_operations ufs_fast_symlink_inode_operations = {
43 .readlink = generic_readlink, 43 .readlink = generic_readlink,
44 .follow_link = ufs_follow_link, 44 .follow_link = ufs_follow_link,
45 .setattr = ufs_setattr,
46};
47
48const struct inode_operations ufs_symlink_inode_operations = {
49 .readlink = generic_readlink,
50 .follow_link = page_follow_link_light,
51 .put_link = page_put_link,
52 .setattr = ufs_setattr,
45}; 53};
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index d3b6270cb377..f294c44577dc 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -508,7 +508,7 @@ out:
508 * - there is no way to know old size 508 * - there is no way to know old size
509 * - there is no way inform user about error, if it happens in `truncate' 509 * - there is no way inform user about error, if it happens in `truncate'
510 */ 510 */
511static int ufs_setattr(struct dentry *dentry, struct iattr *attr) 511int ufs_setattr(struct dentry *dentry, struct iattr *attr)
512{ 512{
513 struct inode *inode = dentry->d_inode; 513 struct inode *inode = dentry->d_inode;
514 unsigned int ia_valid = attr->ia_valid; 514 unsigned int ia_valid = attr->ia_valid;
@@ -518,18 +518,18 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
518 if (error) 518 if (error)
519 return error; 519 return error;
520 520
521 if (is_quota_modification(inode, attr))
522 dquot_initialize(inode);
523
521 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 524 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
522 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 525 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
523 error = dquot_transfer(inode, attr); 526 error = dquot_transfer(inode, attr);
524 if (error) 527 if (error)
525 return error; 528 return error;
526 } 529 }
527 if (ia_valid & ATTR_SIZE && 530 if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
528 attr->ia_size != i_size_read(inode)) {
529 loff_t old_i_size = inode->i_size; 531 loff_t old_i_size = inode->i_size;
530 532
531 dquot_initialize(inode);
532
533 error = vmtruncate(inode, attr->ia_size); 533 error = vmtruncate(inode, attr->ia_size);
534 if (error) 534 if (error)
535 return error; 535 return error;
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 43f9f5d5670e..179ae6b3180a 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -122,9 +122,11 @@ extern void ufs_panic (struct super_block *, const char *, const char *, ...) __
122 122
123/* symlink.c */ 123/* symlink.c */
124extern const struct inode_operations ufs_fast_symlink_inode_operations; 124extern const struct inode_operations ufs_fast_symlink_inode_operations;
125extern const struct inode_operations ufs_symlink_inode_operations;
125 126
126/* truncate.c */ 127/* truncate.c */
127extern int ufs_truncate (struct inode *, loff_t); 128extern int ufs_truncate (struct inode *, loff_t);
129extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
128 130
129static inline struct ufs_sb_info *UFS_SB(struct super_block *sb) 131static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
130{ 132{
diff --git a/fs/xattr.c b/fs/xattr.c
index 46f87e828b48..01bb8135e14a 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -590,10 +590,10 @@ strcmp_prefix(const char *a, const char *a_prefix)
590/* 590/*
591 * Find the xattr_handler with the matching prefix. 591 * Find the xattr_handler with the matching prefix.
592 */ 592 */
593static struct xattr_handler * 593static const struct xattr_handler *
594xattr_resolve_name(struct xattr_handler **handlers, const char **name) 594xattr_resolve_name(const struct xattr_handler **handlers, const char **name)
595{ 595{
596 struct xattr_handler *handler; 596 const struct xattr_handler *handler;
597 597
598 if (!*name) 598 if (!*name)
599 return NULL; 599 return NULL;
@@ -614,7 +614,7 @@ xattr_resolve_name(struct xattr_handler **handlers, const char **name)
614ssize_t 614ssize_t
615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) 615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
616{ 616{
617 struct xattr_handler *handler; 617 const struct xattr_handler *handler;
618 618
619 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); 619 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
620 if (!handler) 620 if (!handler)
@@ -629,7 +629,7 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
629ssize_t 629ssize_t
630generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) 630generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
631{ 631{
632 struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr; 632 const struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
633 unsigned int size = 0; 633 unsigned int size = 0;
634 634
635 if (!buffer) { 635 if (!buffer) {
@@ -659,7 +659,7 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
659int 659int
660generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) 660generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
661{ 661{
662 struct xattr_handler *handler; 662 const struct xattr_handler *handler;
663 663
664 if (size == 0) 664 if (size == 0)
665 value = ""; /* empty EA, do not remove */ 665 value = ""; /* empty EA, do not remove */
@@ -676,7 +676,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
676int 676int
677generic_removexattr(struct dentry *dentry, const char *name) 677generic_removexattr(struct dentry *dentry, const char *name)
678{ 678{
679 struct xattr_handler *handler; 679 const struct xattr_handler *handler;
680 680
681 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); 681 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
682 if (!handler) 682 if (!handler)
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index a7bc925c4d60..9f769b5b38fc 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -440,14 +440,14 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
440 return error; 440 return error;
441} 441}
442 442
443struct xattr_handler xfs_xattr_acl_access_handler = { 443const struct xattr_handler xfs_xattr_acl_access_handler = {
444 .prefix = POSIX_ACL_XATTR_ACCESS, 444 .prefix = POSIX_ACL_XATTR_ACCESS,
445 .flags = ACL_TYPE_ACCESS, 445 .flags = ACL_TYPE_ACCESS,
446 .get = xfs_xattr_acl_get, 446 .get = xfs_xattr_acl_get,
447 .set = xfs_xattr_acl_set, 447 .set = xfs_xattr_acl_set,
448}; 448};
449 449
450struct xattr_handler xfs_xattr_acl_default_handler = { 450const struct xattr_handler xfs_xattr_acl_default_handler = {
451 .prefix = POSIX_ACL_XATTR_DEFAULT, 451 .prefix = POSIX_ACL_XATTR_DEFAULT,
452 .flags = ACL_TYPE_DEFAULT, 452 .flags = ACL_TYPE_DEFAULT,
453 .get = xfs_xattr_acl_get, 453 .get = xfs_xattr_acl_get,
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 2e73688dae9c..9ac8aea91529 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -98,7 +98,7 @@ xfs_fs_set_xstate(
98} 98}
99 99
100STATIC int 100STATIC int
101xfs_fs_get_xquota( 101xfs_fs_get_dqblk(
102 struct super_block *sb, 102 struct super_block *sb,
103 int type, 103 int type,
104 qid_t id, 104 qid_t id,
@@ -115,7 +115,7 @@ xfs_fs_get_xquota(
115} 115}
116 116
117STATIC int 117STATIC int
118xfs_fs_set_xquota( 118xfs_fs_set_dqblk(
119 struct super_block *sb, 119 struct super_block *sb,
120 int type, 120 int type,
121 qid_t id, 121 qid_t id,
@@ -136,6 +136,6 @@ xfs_fs_set_xquota(
136const struct quotactl_ops xfs_quotactl_operations = { 136const struct quotactl_ops xfs_quotactl_operations = {
137 .get_xstate = xfs_fs_get_xstate, 137 .get_xstate = xfs_fs_get_xstate,
138 .set_xstate = xfs_fs_set_xstate, 138 .set_xstate = xfs_fs_set_xstate,
139 .get_xquota = xfs_fs_get_xquota, 139 .get_dqblk = xfs_fs_get_dqblk,
140 .set_xquota = xfs_fs_set_xquota, 140 .set_dqblk = xfs_fs_set_dqblk,
141}; 141};
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 775de2b5727c..f2d1718c9165 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -735,7 +735,8 @@ void
735xfs_blkdev_issue_flush( 735xfs_blkdev_issue_flush(
736 xfs_buftarg_t *buftarg) 736 xfs_buftarg_t *buftarg)
737{ 737{
738 blkdev_issue_flush(buftarg->bt_bdev, NULL); 738 blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
739 BLKDEV_IFL_WAIT);
739} 740}
740 741
741STATIC void 742STATIC void
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 233d4b9881b1..519618e9279e 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -85,7 +85,7 @@ extern __uint64_t xfs_max_file_offset(unsigned int);
85extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 85extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
86 86
87extern const struct export_operations xfs_export_operations; 87extern const struct export_operations xfs_export_operations;
88extern struct xattr_handler *xfs_xattr_handlers[]; 88extern const struct xattr_handler *xfs_xattr_handlers[];
89extern const struct quotactl_ops xfs_quotactl_operations; 89extern const struct quotactl_ops xfs_quotactl_operations;
90 90
91#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 91#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index fa01b9daba6b..87d3e03878c8 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -72,28 +72,28 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
72 (void *)value, size, xflags); 72 (void *)value, size, xflags);
73} 73}
74 74
75static struct xattr_handler xfs_xattr_user_handler = { 75static const struct xattr_handler xfs_xattr_user_handler = {
76 .prefix = XATTR_USER_PREFIX, 76 .prefix = XATTR_USER_PREFIX,
77 .flags = 0, /* no flags implies user namespace */ 77 .flags = 0, /* no flags implies user namespace */
78 .get = xfs_xattr_get, 78 .get = xfs_xattr_get,
79 .set = xfs_xattr_set, 79 .set = xfs_xattr_set,
80}; 80};
81 81
82static struct xattr_handler xfs_xattr_trusted_handler = { 82static const struct xattr_handler xfs_xattr_trusted_handler = {
83 .prefix = XATTR_TRUSTED_PREFIX, 83 .prefix = XATTR_TRUSTED_PREFIX,
84 .flags = ATTR_ROOT, 84 .flags = ATTR_ROOT,
85 .get = xfs_xattr_get, 85 .get = xfs_xattr_get,
86 .set = xfs_xattr_set, 86 .set = xfs_xattr_set,
87}; 87};
88 88
89static struct xattr_handler xfs_xattr_security_handler = { 89static const struct xattr_handler xfs_xattr_security_handler = {
90 .prefix = XATTR_SECURITY_PREFIX, 90 .prefix = XATTR_SECURITY_PREFIX,
91 .flags = ATTR_SECURE, 91 .flags = ATTR_SECURE,
92 .get = xfs_xattr_get, 92 .get = xfs_xattr_get,
93 .set = xfs_xattr_set, 93 .set = xfs_xattr_set,
94}; 94};
95 95
96struct xattr_handler *xfs_xattr_handlers[] = { 96const struct xattr_handler *xfs_xattr_handlers[] = {
97 &xfs_xattr_user_handler, 97 &xfs_xattr_user_handler,
98 &xfs_xattr_trusted_handler, 98 &xfs_xattr_trusted_handler,
99 &xfs_xattr_security_handler, 99 &xfs_xattr_security_handler,
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 26fa43140f2e..92b002f1805f 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -448,6 +448,9 @@ xfs_qm_scall_getqstat(
448 return 0; 448 return 0;
449} 449}
450 450
451#define XFS_DQ_MASK \
452 (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
453
451/* 454/*
452 * Adjust quota limits, and start/stop timers accordingly. 455 * Adjust quota limits, and start/stop timers accordingly.
453 */ 456 */
@@ -465,9 +468,10 @@ xfs_qm_scall_setqlim(
465 int error; 468 int error;
466 xfs_qcnt_t hard, soft; 469 xfs_qcnt_t hard, soft;
467 470
468 if ((newlim->d_fieldmask & 471 if (newlim->d_fieldmask & ~XFS_DQ_MASK)
469 (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) 472 return EINVAL;
470 return (0); 473 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
474 return 0;
471 475
472 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 476 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
473 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, 477 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index d13eeba2c8f8..0135e2a669d7 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -49,8 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
49extern int posix_acl_access_exists(struct inode *inode); 49extern int posix_acl_access_exists(struct inode *inode);
50extern int posix_acl_default_exists(struct inode *inode); 50extern int posix_acl_default_exists(struct inode *inode);
51 51
52extern struct xattr_handler xfs_xattr_acl_access_handler; 52extern const struct xattr_handler xfs_xattr_acl_access_handler;
53extern struct xattr_handler xfs_xattr_acl_default_handler; 53extern const struct xattr_handler xfs_xattr_acl_default_handler;
54#else 54#else
55# define xfs_check_acl NULL 55# define xfs_check_acl NULL
56# define xfs_get_acl(inode, type) NULL 56# define xfs_get_acl(inode, type) NULL