aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c41
-rw-r--r--fs/afs/write.c8
-rw-r--r--fs/aio.c94
-rw-r--r--fs/anon_inodes.c35
-rw-r--r--fs/autofs4/autofs_i.h38
-rw-r--r--fs/autofs4/expire.c8
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c616
-rw-r--r--fs/binfmt_aout.c13
-rw-r--r--fs/binfmt_elf.c37
-rw-r--r--fs/binfmt_elf_fdpic.c57
-rw-r--r--fs/binfmt_flat.c6
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/bio.c14
-rw-r--r--fs/block_dev.c12
-rw-r--r--fs/btrfs/acl.c47
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/cachefiles/bind.c11
-rw-r--r--fs/cachefiles/daemon.c4
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/cifs/CHANGES4
-rw-r--r--fs/cifs/README2
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/cifs/dir.c3
-rw-r--r--fs/cifs/export.c2
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/smbdes.c2
-rw-r--r--fs/coda/sysctl.c10
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c1530
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/debugfs/inode.c61
-rw-r--r--fs/devpts/inode.c16
-rw-r--r--fs/direct-io.c175
-rw-r--r--fs/dlm/config.c24
-rw-r--r--fs/dlm/debug_fs.c2
-rw-r--r--fs/dlm/dir.c7
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/lock.c6
-rw-r--r--fs/dlm/lockspace.c15
-rw-r--r--fs/dlm/lowcomms.c6
-rw-r--r--fs/dlm/member.c8
-rw-r--r--fs/dlm/memory.c6
-rw-r--r--fs/dlm/netlink.c2
-rw-r--r--fs/dlm/plock.c8
-rw-r--r--fs/dlm/rcom.c2
-rw-r--r--fs/dlm/requestqueue.c2
-rw-r--r--fs/dlm/user.c12
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/inode.c30
-rw-r--r--fs/ecryptfs/main.c9
-rw-r--r--fs/eventfd.c2
-rw-r--r--fs/eventpoll.c6
-rw-r--r--fs/exec.c53
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/common.h81
-rw-r--r--fs/exofs/exofs.h97
-rw-r--r--fs/exofs/inode.c426
-rw-r--r--fs/exofs/ios.c421
-rw-r--r--fs/exofs/osd.c125
-rw-r--r--fs/exofs/pnfs.h45
-rw-r--r--fs/exofs/super.c353
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext2/acl.c79
-rw-r--r--fs/ext2/dir.c6
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/file.c21
-rw-r--r--fs/ext2/inode.c6
-rw-r--r--fs/ext2/super.c206
-rw-r--r--fs/ext2/xattr.c11
-rw-r--r--fs/ext2/xattr_security.c16
-rw-r--r--fs/ext2/xattr_trusted.c16
-rw-r--r--fs/ext2/xattr_user.c25
-rw-r--r--fs/ext2/xip.c5
-rw-r--r--fs/ext3/acl.c74
-rw-r--r--fs/ext3/inode.c28
-rw-r--r--fs/ext3/namei.c28
-rw-r--r--fs/ext3/resize.c37
-rw-r--r--fs/ext3/super.c487
-rw-r--r--fs/ext3/xattr.c38
-rw-r--r--fs/ext3/xattr_security.c20
-rw-r--r--fs/ext3/xattr_trusted.c18
-rw-r--r--fs/ext3/xattr_user.c25
-rw-r--r--fs/ext4/Kconfig11
-rw-r--r--fs/ext4/acl.c74
-rw-r--r--fs/ext4/balloc.c46
-rw-r--r--fs/ext4/block_validity.c4
-rw-r--r--fs/ext4/ext4.h31
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/ext4_jbd2.c82
-rw-r--r--fs/ext4/ext4_jbd2.h44
-rw-r--r--fs/ext4/extents.c121
-rw-r--r--fs/ext4/fsync.c66
-rw-r--r--fs/ext4/inode.c434
-rw-r--r--fs/ext4/ioctl.c29
-rw-r--r--fs/ext4/mballoc.c111
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/migrate.c27
-rw-r--r--fs/ext4/move_extent.c282
-rw-r--r--fs/ext4/namei.c38
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c167
-rw-r--r--fs/ext4/xattr.c48
-rw-r--r--fs/ext4/xattr_security.c20
-rw-r--r--fs/ext4/xattr_trusted.c20
-rw-r--r--fs/ext4/xattr_user.c25
-rw-r--r--fs/fat/fat.h3
-rw-r--r--fs/fat/fatent.c25
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fat/misc.c57
-rw-r--r--fs/fcntl.c102
-rw-r--r--fs/file_table.c52
-rw-r--r--fs/fs-writeback.c46
-rw-r--r--fs/fscache/object-list.c2
-rw-r--r--fs/generic_acl.c158
-rw-r--r--fs/gfs2/Kconfig2
-rw-r--r--fs/gfs2/acl.c361
-rw-r--r--fs/gfs2/acl.h24
-rw-r--r--fs/gfs2/aops.c20
-rw-r--r--fs/gfs2/dir.c34
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/glock.c31
-rw-r--r--fs/gfs2/glock.h9
-rw-r--r--fs/gfs2/glops.c5
-rw-r--r--fs/gfs2/incore.h5
-rw-r--r--fs/gfs2/inode.c9
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/gfs2/lops.c4
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c154
-rw-r--r--fs/gfs2/ops_inode.c6
-rw-r--r--fs/gfs2/quota.c393
-rw-r--r--fs/gfs2/quota.h5
-rw-r--r--fs/gfs2/recovery.c2
-rw-r--r--fs/gfs2/rgrp.c14
-rw-r--r--fs/gfs2/super.c110
-rw-r--r--fs/gfs2/super.h4
-rw-r--r--fs/gfs2/sys.c30
-rw-r--r--fs/gfs2/xattr.c156
-rw-r--r--fs/gfs2/xattr.h15
-rw-r--r--fs/hfs/catalog.c4
-rw-r--r--fs/hfs/dir.c11
-rw-r--r--fs/hfs/super.c7
-rw-r--r--fs/hpfs/super.c17
-rw-r--r--fs/hppfs/hppfs.c18
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/inode.c36
-rw-r--r--fs/internal.h8
-rw-r--r--fs/isofs/compress.c533
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/isofs/rock.c3
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/jbd2/commit.c25
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jffs2/acl.c65
-rw-r--r--fs/jffs2/compr.c2
-rw-r--r--fs/jffs2/gc.c3
-rw-r--r--fs/jffs2/readinode.c4
-rw-r--r--fs/jffs2/security.c18
-rw-r--r--fs/jffs2/summary.c2
-rw-r--r--fs/jffs2/xattr.c8
-rw-r--r--fs/jffs2/xattr_trusted.c18
-rw-r--r--fs/jffs2/xattr_user.c18
-rw-r--r--fs/jfs/jfs_dmap.c4
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/libfs.c1
-rw-r--r--fs/lockd/svc.c26
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/namei.c505
-rw-r--r--fs/namespace.c34
-rw-r--r--fs/ncpfs/ioctl.c2
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/callback.c13
-rw-r--r--fs/nfs/callback.h16
-rw-r--r--fs/nfs/callback_proc.c64
-rw-r--r--fs/nfs/callback_xdr.c34
-rw-r--r--fs/nfs/client.c14
-rw-r--r--fs/nfs/delegation.c77
-rw-r--r--fs/nfs/delegation.h7
-rw-r--r--fs/nfs/dir.c68
-rw-r--r--fs/nfs/dns_resolve.c4
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/internal.h54
-rw-r--r--fs/nfs/iostat.h24
-rw-r--r--fs/nfs/nfs4_fs.h17
-rw-r--r--fs/nfs/nfs4proc.c633
-rw-r--r--fs/nfs/nfs4state.c241
-rw-r--r--fs/nfs/nfs4xdr.c135
-rw-r--r--fs/nfs/read.c12
-rw-r--r--fs/nfs/super.c104
-rw-r--r--fs/nfs/sysctl.c22
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c12
-rw-r--r--fs/nfsctl.c2
-rw-r--r--fs/nfsd/auth.c12
-rw-r--r--fs/nfsd/cache.h83
-rw-r--r--fs/nfsd/export.c65
-rw-r--r--fs/nfsd/lockd.c10
-rw-r--r--fs/nfsd/nfs2acl.c27
-rw-r--r--fs/nfsd/nfs3acl.c15
-rw-r--r--fs/nfsd/nfs3proc.c20
-rw-r--r--fs/nfsd/nfs3xdr.c15
-rw-r--r--fs/nfsd/nfs4acl.c12
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4idmap.c17
-rw-r--r--fs/nfsd/nfs4proc.c19
-rw-r--r--fs/nfsd/nfs4recover.c16
-rw-r--r--fs/nfsd/nfs4state.c84
-rw-r--r--fs/nfsd/nfs4xdr.c26
-rw-r--r--fs/nfsd/nfscache.c14
-rw-r--r--fs/nfsd/nfsctl.c51
-rw-r--r--fs/nfsd/nfsd.h338
-rw-r--r--fs/nfsd/nfsfh.c102
-rw-r--r--fs/nfsd/nfsfh.h208
-rw-r--r--fs/nfsd/nfsproc.c22
-rw-r--r--fs/nfsd/nfssvc.c22
-rw-r--r--fs/nfsd/nfsxdr.c12
-rw-r--r--fs/nfsd/state.h408
-rw-r--r--fs/nfsd/stats.c11
-rw-r--r--fs/nfsd/vfs.c144
-rw-r--r--fs/nfsd/vfs.h101
-rw-r--r--fs/nfsd/xdr.h173
-rw-r--r--fs/nfsd/xdr3.h344
-rw-r--r--fs/nfsd/xdr4.h562
-rw-r--r--fs/nilfs2/alloc.c108
-rw-r--r--fs/nilfs2/alloc.h21
-rw-r--r--fs/nilfs2/bmap.c12
-rw-r--r--fs/nilfs2/btnode.c76
-rw-r--r--fs/nilfs2/btnode.h6
-rw-r--r--fs/nilfs2/btree.c106
-rw-r--r--fs/nilfs2/btree.h22
-rw-r--r--fs/nilfs2/cpfile.c57
-rw-r--r--fs/nilfs2/cpfile.h3
-rw-r--r--fs/nilfs2/dat.c47
-rw-r--r--fs/nilfs2/dat.h3
-rw-r--r--fs/nilfs2/dir.c24
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/gcdat.c3
-rw-r--r--fs/nilfs2/gcinode.c6
-rw-r--r--fs/nilfs2/ifile.c35
-rw-r--r--fs/nilfs2/ifile.h2
-rw-r--r--fs/nilfs2/inode.c7
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/nilfs2/mdt.c56
-rw-r--r--fs/nilfs2/mdt.h25
-rw-r--r--fs/nilfs2/namei.c83
-rw-r--r--fs/nilfs2/recovery.c34
-rw-r--r--fs/nilfs2/segbuf.c185
-rw-r--r--fs/nilfs2/segbuf.h54
-rw-r--r--fs/nilfs2/segment.c369
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c203
-rw-r--r--fs/nilfs2/sufile.h14
-rw-r--r--fs/nilfs2/super.c91
-rw-r--r--fs/nilfs2/the_nilfs.c155
-rw-r--r--fs/nilfs2/the_nilfs.h10
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c51
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ntfs/logfile.c2
-rw-r--r--fs/ntfs/sysctl.c4
-rw-r--r--fs/ocfs2/Kconfig10
-rw-r--r--fs/ocfs2/Makefile7
-rw-r--r--fs/ocfs2/acl.c91
-rw-r--r--fs/ocfs2/acl.h22
-rw-r--r--fs/ocfs2/alloc.c16
-rw-r--r--fs/ocfs2/alloc.h5
-rw-r--r--fs/ocfs2/aops.c34
-rw-r--r--fs/ocfs2/blockcheck.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/cluster/netdebug.c8
-rw-r--r--fs/ocfs2/cluster/nodemanager.c51
-rw-r--r--fs/ocfs2/cluster/nodemanager.h7
-rw-r--r--fs/ocfs2/cluster/quorum.c16
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c18
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/extent_map.c25
-rw-r--r--fs/ocfs2/file.c21
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/ocfs2.h8
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/ocfs2/quota.h4
-rw-r--r--fs/ocfs2/quota_local.c2
-rw-r--r--fs/ocfs2/refcounttree.c152
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/stackglue.c15
-rw-r--r--fs/ocfs2/super.c95
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c78
-rw-r--r--fs/ocfs2/xattr.h2
-rw-r--r--fs/omfs/bitmap.c2
-rw-r--r--fs/open.c44
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/partitions/efi.c30
-rw-r--r--fs/partitions/efi.h8
-rw-r--r--fs/pipe.c45
-rw-r--r--fs/proc/array.c125
-rw-r--r--fs/proc/base.c73
-rw-r--r--fs/proc/generic.c21
-rw-r--r--fs/proc/inode.c31
-rw-r--r--fs/proc/internal.h10
-rw-r--r--fs/proc/page.c45
-rw-r--r--fs/proc/proc_devtree.c41
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/proc/stat.c19
-rw-r--r--fs/proc/task_mmu.c48
-rw-r--r--fs/proc/task_nommu.c8
-rw-r--r--fs/qnx4/bitmap.c26
-rw-r--r--fs/qnx4/dir.c6
-rw-r--r--fs/qnx4/inode.c48
-rw-r--r--fs/qnx4/namei.c6
-rw-r--r--fs/quota/Kconfig10
-rw-r--r--fs/quota/dquot.c421
-rw-r--r--fs/quota/quota.c93
-rw-r--r--fs/quota/quota_v1.c2
-rw-r--r--fs/quota/quota_v2.c170
-rw-r--r--fs/quota/quotaio_v2.h19
-rw-r--r--fs/ramfs/file-nommu.c28
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/reiserfs/Makefile6
-rw-r--r--fs/reiserfs/bitmap.c7
-rw-r--r--fs/reiserfs/dir.c10
-rw-r--r--fs/reiserfs/do_balan.c17
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/fix_node.c21
-rw-r--r--fs/reiserfs/inode.c137
-rw-r--r--fs/reiserfs/ioctl.c80
-rw-r--r--fs/reiserfs/journal.c146
-rw-r--r--fs/reiserfs/lock.c97
-rw-r--r--fs/reiserfs/namei.c27
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c65
-rw-r--r--fs/reiserfs/resize.c2
-rw-r--r--fs/reiserfs/stree.c53
-rw-r--r--fs/reiserfs/super.c56
-rw-r--r--fs/reiserfs/xattr.c80
-rw-r--r--fs/reiserfs/xattr_acl.c71
-rw-r--r--fs/reiserfs/xattr_security.c21
-rw-r--r--fs/reiserfs/xattr_trusted.c21
-rw-r--r--fs/reiserfs/xattr_user.c21
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/splice.c24
-rw-r--r--fs/stack.c71
-rw-r--r--fs/stat.c10
-rw-r--r--fs/super.c3
-rw-r--r--fs/sync.c68
-rw-r--r--fs/sysfs/bin.c6
-rw-r--r--fs/sysfs/dir.c402
-rw-r--r--fs/sysfs/file.c41
-rw-r--r--fs/sysfs/inode.c176
-rw-r--r--fs/sysfs/symlink.c11
-rw-r--r--fs/sysfs/sysfs.h24
-rw-r--r--fs/timerfd.c2
-rw-r--r--fs/ubifs/debug.c11
-rw-r--r--fs/ubifs/file.c15
-rw-r--r--fs/ubifs/gc.c96
-rw-r--r--fs/ubifs/recovery.c2
-rw-r--r--fs/ubifs/super.c27
-rw-r--r--fs/udf/balloc.c2
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/inode.c24
-rw-r--r--fs/udf/namei.c38
-rw-r--r--fs/udf/super.c32
-rw-r--r--fs/ufs/dir.c10
-rw-r--r--fs/ufs/namei.c8
-rw-r--r--fs/ufs/super.c52
-rw-r--r--fs/ufs/ufs.h4
-rw-r--r--fs/xattr.c28
-rw-r--r--fs/xattr_acl.c2
-rw-r--r--fs/xfs/Makefile8
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c61
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c195
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c141
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h43
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c94
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h45
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c173
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c191
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c62
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c75
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1422
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c71
-rw-r--r--fs/xfs/quota/xfs_dquot.c110
-rw-r--r--fs/xfs/quota/xfs_dquot.h23
-rw-r--r--fs/xfs/quota/xfs_qm.c40
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/support/debug.h18
-rw-r--r--fs/xfs/support/ktrace.c323
-rw-r--r--fs/xfs/support/ktrace.h85
-rw-r--r--fs/xfs/xfs.h16
-rw-r--r--fs/xfs/xfs_acl.h3
-rw-r--r--fs/xfs/xfs_ag.h14
-rw-r--r--fs/xfs/xfs_alloc.c270
-rw-r--r--fs/xfs/xfs_alloc.h27
-rw-r--r--fs/xfs/xfs_alloc_btree.c1
-rw-r--r--fs/xfs/xfs_attr.c123
-rw-r--r--fs/xfs/xfs_attr.h11
-rw-r--r--fs/xfs/xfs_attr_leaf.c16
-rw-r--r--fs/xfs/xfs_attr_sf.h40
-rw-r--r--fs/xfs/xfs_bmap.c942
-rw-r--r--fs/xfs/xfs_bmap.h58
-rw-r--r--fs/xfs/xfs_bmap_btree.c9
-rw-r--r--fs/xfs/xfs_bmap_btree.h14
-rw-r--r--fs/xfs/xfs_btree.c5
-rw-r--r--fs/xfs/xfs_btree_trace.h17
-rw-r--r--fs/xfs/xfs_buf_item.c87
-rw-r--r--fs/xfs/xfs_buf_item.h20
-rw-r--r--fs/xfs/xfs_da_btree.c3
-rw-r--r--fs/xfs/xfs_da_btree.h7
-rw-r--r--fs/xfs/xfs_dfrag.c108
-rw-r--r--fs/xfs/xfs_dir2.c8
-rw-r--r--fs/xfs/xfs_dir2_block.c20
-rw-r--r--fs/xfs/xfs_dir2_leaf.c21
-rw-r--r--fs/xfs/xfs_dir2_node.c27
-rw-r--r--fs/xfs/xfs_dir2_sf.c26
-rw-r--r--fs/xfs/xfs_dir2_trace.c216
-rw-r--r--fs/xfs/xfs_dir2_trace.h72
-rw-r--r--fs/xfs/xfs_filestream.c8
-rw-r--r--fs/xfs/xfs_filestream.h8
-rw-r--r--fs/xfs/xfs_fsops.c27
-rw-r--r--fs/xfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/xfs_iget.c131
-rw-r--r--fs/xfs/xfs_inode.c96
-rw-r--r--fs/xfs/xfs_inode.h82
-rw-r--r--fs/xfs/xfs_inode_item.c5
-rw-r--r--fs/xfs/xfs_inode_item.h6
-rw-r--r--fs/xfs/xfs_iomap.c94
-rw-r--r--fs/xfs/xfs_iomap.h8
-rw-r--r--fs/xfs/xfs_log.c183
-rw-r--r--fs/xfs/xfs_log_priv.h20
-rw-r--r--fs/xfs/xfs_log_recover.c55
-rw-r--r--fs/xfs/xfs_mount.c32
-rw-r--r--fs/xfs/xfs_mount.h27
-rw-r--r--fs/xfs/xfs_quota.h8
-rw-r--r--fs/xfs/xfs_rename.c1
-rw-r--r--fs/xfs/xfs_rtalloc.c3
-rw-r--r--fs/xfs/xfs_rw.c33
-rw-r--r--fs/xfs/xfs_rw.h29
-rw-r--r--fs/xfs/xfs_trans.c7
-rw-r--r--fs/xfs/xfs_trans.h49
-rw-r--r--fs/xfs/xfs_trans_buf.c75
-rw-r--r--fs/xfs/xfs_vnodeops.c180
-rw-r--r--fs/xfs/xfs_vnodeops.h1
467 files changed, 15622 insertions, 12432 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 18f74ec4dce..9d03d1ebca6 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1001,44 +1001,6 @@ done:
1001} 1001}
1002 1002
1003/** 1003/**
1004 * v9fs_vfs_readlink - read a symlink's location
1005 * @dentry: dentry for symlink
1006 * @buffer: buffer to load symlink location into
1007 * @buflen: length of buffer
1008 *
1009 */
1010
1011static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1012 int buflen)
1013{
1014 int retval;
1015 int ret;
1016 char *link = __getname();
1017
1018 if (unlikely(!link))
1019 return -ENOMEM;
1020
1021 if (buflen > PATH_MAX)
1022 buflen = PATH_MAX;
1023
1024 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
1025 dentry);
1026
1027 retval = v9fs_readlink(dentry, link, buflen);
1028
1029 if (retval > 0) {
1030 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1031 P9_DPRINTK(P9_DEBUG_ERROR,
1032 "problem copying to user: %d\n", ret);
1033 retval = ret;
1034 }
1035 }
1036
1037 __putname(link);
1038 return retval;
1039}
1040
1041/**
1042 * v9fs_vfs_follow_link - follow a symlink path 1004 * v9fs_vfs_follow_link - follow a symlink path
1043 * @dentry: dentry for symlink 1005 * @dentry: dentry for symlink
1044 * @nd: nameidata 1006 * @nd: nameidata
@@ -1230,7 +1192,6 @@ static const struct inode_operations v9fs_dir_inode_operations_ext = {
1230 .rmdir = v9fs_vfs_rmdir, 1192 .rmdir = v9fs_vfs_rmdir,
1231 .mknod = v9fs_vfs_mknod, 1193 .mknod = v9fs_vfs_mknod,
1232 .rename = v9fs_vfs_rename, 1194 .rename = v9fs_vfs_rename,
1233 .readlink = v9fs_vfs_readlink,
1234 .getattr = v9fs_vfs_getattr, 1195 .getattr = v9fs_vfs_getattr,
1235 .setattr = v9fs_vfs_setattr, 1196 .setattr = v9fs_vfs_setattr,
1236}; 1197};
@@ -1253,7 +1214,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
1253}; 1214};
1254 1215
1255static const struct inode_operations v9fs_symlink_inode_operations = { 1216static const struct inode_operations v9fs_symlink_inode_operations = {
1256 .readlink = v9fs_vfs_readlink, 1217 .readlink = generic_readlink,
1257 .follow_link = v9fs_vfs_follow_link, 1218 .follow_link = v9fs_vfs_follow_link,
1258 .put_link = v9fs_vfs_put_link, 1219 .put_link = v9fs_vfs_put_link,
1259 .getattr = v9fs_vfs_getattr, 1220 .getattr = v9fs_vfs_getattr,
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c63a3c8beb7..5e15a21dbf9 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -671,7 +671,6 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
671 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); 671 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
672 ssize_t result; 672 ssize_t result;
673 size_t count = iov_length(iov, nr_segs); 673 size_t count = iov_length(iov, nr_segs);
674 int ret;
675 674
676 _enter("{%x.%u},{%zu},%lu,", 675 _enter("{%x.%u},{%zu},%lu,",
677 vnode->fid.vid, vnode->fid.vnode, count, nr_segs); 676 vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
@@ -691,13 +690,6 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
691 return result; 690 return result;
692 } 691 }
693 692
694 /* return error values for O_SYNC and IS_SYNC() */
695 if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) {
696 ret = afs_fsync(iocb->ki_filp, dentry, 1);
697 if (ret < 0)
698 result = ret;
699 }
700
701 _leave(" = %zd", result); 693 _leave(" = %zd", result);
702 return result; 694 return result;
703} 695}
diff --git a/fs/aio.c b/fs/aio.c
index 02a2c934057..1cf12b3dd83 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -15,6 +15,7 @@
15#include <linux/aio_abi.h> 15#include <linux/aio_abi.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/backing-dev.h>
18#include <linux/uio.h> 19#include <linux/uio.h>
19 20
20#define DEBUG 0 21#define DEBUG 0
@@ -32,6 +33,9 @@
32#include <linux/workqueue.h> 33#include <linux/workqueue.h>
33#include <linux/security.h> 34#include <linux/security.h>
34#include <linux/eventfd.h> 35#include <linux/eventfd.h>
36#include <linux/blkdev.h>
37#include <linux/mempool.h>
38#include <linux/hash.h>
35 39
36#include <asm/kmap_types.h> 40#include <asm/kmap_types.h>
37#include <asm/uaccess.h> 41#include <asm/uaccess.h>
@@ -60,6 +64,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
60static DEFINE_SPINLOCK(fput_lock); 64static DEFINE_SPINLOCK(fput_lock);
61static LIST_HEAD(fput_head); 65static LIST_HEAD(fput_head);
62 66
67#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
68#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
69struct aio_batch_entry {
70 struct hlist_node list;
71 struct address_space *mapping;
72};
73mempool_t *abe_pool;
74
63static void aio_kick_handler(struct work_struct *); 75static void aio_kick_handler(struct work_struct *);
64static void aio_queue_work(struct kioctx *); 76static void aio_queue_work(struct kioctx *);
65 77
@@ -73,6 +85,8 @@ static int __init aio_setup(void)
73 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 85 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
74 86
75 aio_wq = create_workqueue("aio"); 87 aio_wq = create_workqueue("aio");
88 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
89 BUG_ON(!abe_pool);
76 90
77 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); 91 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
78 92
@@ -697,10 +711,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
697 */ 711 */
698 ret = retry(iocb); 712 ret = retry(iocb);
699 713
700 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { 714 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED)
701 BUG_ON(!list_empty(&iocb->ki_wait.task_list));
702 aio_complete(iocb, ret, 0); 715 aio_complete(iocb, ret, 0);
703 }
704out: 716out:
705 spin_lock_irq(&ctx->ctx_lock); 717 spin_lock_irq(&ctx->ctx_lock);
706 718
@@ -852,13 +864,6 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
852 unsigned long flags; 864 unsigned long flags;
853 int run = 0; 865 int run = 0;
854 866
855 /* We're supposed to be the only path putting the iocb back on the run
856 * list. If we find that the iocb is *back* on a wait queue already
857 * than retry has happened before we could queue the iocb. This also
858 * means that the retry could have completed and freed our iocb, no
859 * good. */
860 BUG_ON((!list_empty(&iocb->ki_wait.task_list)));
861
862 spin_lock_irqsave(&ctx->ctx_lock, flags); 867 spin_lock_irqsave(&ctx->ctx_lock, flags);
863 /* set this inside the lock so that we can't race with aio_run_iocb() 868 /* set this inside the lock so that we can't race with aio_run_iocb()
864 * testing it and putting the iocb on the run list under the lock */ 869 * testing it and putting the iocb on the run list under the lock */
@@ -872,7 +877,7 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
872/* 877/*
873 * kick_iocb: 878 * kick_iocb:
874 * Called typically from a wait queue callback context 879 * Called typically from a wait queue callback context
875 * (aio_wake_function) to trigger a retry of the iocb. 880 * to trigger a retry of the iocb.
876 * The retry is usually executed by aio workqueue 881 * The retry is usually executed by aio workqueue
877 * threads (See aio_kick_handler). 882 * threads (See aio_kick_handler).
878 */ 883 */
@@ -1506,33 +1511,44 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
1506 return 0; 1511 return 0;
1507} 1512}
1508 1513
1509/* 1514static void aio_batch_add(struct address_space *mapping,
1510 * aio_wake_function: 1515 struct hlist_head *batch_hash)
1511 * wait queue callback function for aio notification, 1516{
1512 * Simply triggers a retry of the operation via kick_iocb. 1517 struct aio_batch_entry *abe;
1513 * 1518 struct hlist_node *pos;
1514 * This callback is specified in the wait queue entry in 1519 unsigned bucket;
1515 * a kiocb. 1520
1516 * 1521 bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
1517 * Note: 1522 hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
1518 * This routine is executed with the wait queue lock held. 1523 if (abe->mapping == mapping)
1519 * Since kick_iocb acquires iocb->ctx->ctx_lock, it nests 1524 return;
1520 * the ioctx lock inside the wait queue lock. This is safe 1525 }
1521 * because this callback isn't used for wait queues which 1526
1522 * are nested inside ioctx lock (i.e. ctx->wait) 1527 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1523 */ 1528 BUG_ON(!igrab(mapping->host));
1524static int aio_wake_function(wait_queue_t *wait, unsigned mode, 1529 abe->mapping = mapping;
1525 int sync, void *key) 1530 hlist_add_head(&abe->list, &batch_hash[bucket]);
1531 return;
1532}
1533
1534static void aio_batch_free(struct hlist_head *batch_hash)
1526{ 1535{
1527 struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait); 1536 struct aio_batch_entry *abe;
1537 struct hlist_node *pos, *n;
1538 int i;
1528 1539
1529 list_del_init(&wait->task_list); 1540 for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
1530 kick_iocb(iocb); 1541 hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
1531 return 1; 1542 blk_run_address_space(abe->mapping);
1543 iput(abe->mapping->host);
1544 hlist_del(&abe->list);
1545 mempool_free(abe, abe_pool);
1546 }
1547 }
1532} 1548}
1533 1549
1534static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1550static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1535 struct iocb *iocb) 1551 struct iocb *iocb, struct hlist_head *batch_hash)
1536{ 1552{
1537 struct kiocb *req; 1553 struct kiocb *req;
1538 struct file *file; 1554 struct file *file;
@@ -1592,8 +1608,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1592 req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; 1608 req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf;
1593 req->ki_left = req->ki_nbytes = iocb->aio_nbytes; 1609 req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
1594 req->ki_opcode = iocb->aio_lio_opcode; 1610 req->ki_opcode = iocb->aio_lio_opcode;
1595 init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
1596 INIT_LIST_HEAD(&req->ki_wait.task_list);
1597 1611
1598 ret = aio_setup_iocb(req); 1612 ret = aio_setup_iocb(req);
1599 1613
@@ -1608,6 +1622,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1608 ; 1622 ;
1609 } 1623 }
1610 spin_unlock_irq(&ctx->ctx_lock); 1624 spin_unlock_irq(&ctx->ctx_lock);
1625 if (req->ki_opcode == IOCB_CMD_PREAD ||
1626 req->ki_opcode == IOCB_CMD_PREADV ||
1627 req->ki_opcode == IOCB_CMD_PWRITE ||
1628 req->ki_opcode == IOCB_CMD_PWRITEV)
1629 aio_batch_add(file->f_mapping, batch_hash);
1630
1611 aio_put_req(req); /* drop extra ref to req */ 1631 aio_put_req(req); /* drop extra ref to req */
1612 return 0; 1632 return 0;
1613 1633
@@ -1635,6 +1655,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1635 struct kioctx *ctx; 1655 struct kioctx *ctx;
1636 long ret = 0; 1656 long ret = 0;
1637 int i; 1657 int i;
1658 struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
1638 1659
1639 if (unlikely(nr < 0)) 1660 if (unlikely(nr < 0))
1640 return -EINVAL; 1661 return -EINVAL;
@@ -1666,10 +1687,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1666 break; 1687 break;
1667 } 1688 }
1668 1689
1669 ret = io_submit_one(ctx, user_iocb, &tmp); 1690 ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash);
1670 if (ret) 1691 if (ret)
1671 break; 1692 break;
1672 } 1693 }
1694 aio_batch_free(batch_hash);
1673 1695
1674 put_ioctx(ctx); 1696 put_ioctx(ctx);
1675 return i ? i : ret; 1697 return i ? i : ret;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2ca7a7cafdb..9f0bf13291e 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -35,14 +35,13 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
35 mnt); 35 mnt);
36} 36}
37 37
38static int anon_inodefs_delete_dentry(struct dentry *dentry) 38/*
39 * anon_inodefs_dname() is called from d_path().
40 */
41static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
39{ 42{
40 /* 43 return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
41 * We faked vfs to believe the dentry was hashed when we created it. 44 dentry->d_name.name);
42 * Now we restore the flag so that dput() will work correctly.
43 */
44 dentry->d_flags |= DCACHE_UNHASHED;
45 return 1;
46} 45}
47 46
48static struct file_system_type anon_inode_fs_type = { 47static struct file_system_type anon_inode_fs_type = {
@@ -51,7 +50,7 @@ static struct file_system_type anon_inode_fs_type = {
51 .kill_sb = kill_anon_super, 50 .kill_sb = kill_anon_super,
52}; 51};
53static const struct dentry_operations anon_inodefs_dentry_operations = { 52static const struct dentry_operations anon_inodefs_dentry_operations = {
54 .d_delete = anon_inodefs_delete_dentry, 53 .d_dname = anon_inodefs_dname,
55}; 54};
56 55
57/* 56/*
@@ -88,7 +87,7 @@ struct file *anon_inode_getfile(const char *name,
88 void *priv, int flags) 87 void *priv, int flags)
89{ 88{
90 struct qstr this; 89 struct qstr this;
91 struct dentry *dentry; 90 struct path path;
92 struct file *file; 91 struct file *file;
93 int error; 92 int error;
94 93
@@ -106,10 +105,11 @@ struct file *anon_inode_getfile(const char *name,
106 this.name = name; 105 this.name = name;
107 this.len = strlen(name); 106 this.len = strlen(name);
108 this.hash = 0; 107 this.hash = 0;
109 dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); 108 path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
110 if (!dentry) 109 if (!path.dentry)
111 goto err_module; 110 goto err_module;
112 111
112 path.mnt = mntget(anon_inode_mnt);
113 /* 113 /*
114 * We know the anon_inode inode count is always greater than zero, 114 * We know the anon_inode inode count is always greater than zero,
115 * so we can avoid doing an igrab() and we can use an open-coded 115 * so we can avoid doing an igrab() and we can use an open-coded
@@ -117,27 +117,24 @@ struct file *anon_inode_getfile(const char *name,
117 */ 117 */
118 atomic_inc(&anon_inode_inode->i_count); 118 atomic_inc(&anon_inode_inode->i_count);
119 119
120 dentry->d_op = &anon_inodefs_dentry_operations; 120 path.dentry->d_op = &anon_inodefs_dentry_operations;
121 /* Do not publish this dentry inside the global dentry hash table */ 121 d_instantiate(path.dentry, anon_inode_inode);
122 dentry->d_flags &= ~DCACHE_UNHASHED;
123 d_instantiate(dentry, anon_inode_inode);
124 122
125 error = -ENFILE; 123 error = -ENFILE;
126 file = alloc_file(anon_inode_mnt, dentry, 124 file = alloc_file(&path, OPEN_FMODE(flags), fops);
127 FMODE_READ | FMODE_WRITE, fops);
128 if (!file) 125 if (!file)
129 goto err_dput; 126 goto err_dput;
130 file->f_mapping = anon_inode_inode->i_mapping; 127 file->f_mapping = anon_inode_inode->i_mapping;
131 128
132 file->f_pos = 0; 129 file->f_pos = 0;
133 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 130 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
134 file->f_version = 0; 131 file->f_version = 0;
135 file->private_data = priv; 132 file->private_data = priv;
136 133
137 return file; 134 return file;
138 135
139err_dput: 136err_dput:
140 dput(dentry); 137 path_put(&path);
141err_module: 138err_module:
142 module_put(fops->owner); 139 module_put(fops->owner);
143 return ERR_PTR(error); 140 return ERR_PTR(error);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 8f7cdde4173..0118d67221b 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -60,6 +60,11 @@ do { \
60 current->pid, __func__, ##args); \ 60 current->pid, __func__, ##args); \
61} while (0) 61} while (0)
62 62
63struct rehash_entry {
64 struct task_struct *task;
65 struct list_head list;
66};
67
63/* Unified info structure. This is pointed to by both the dentry and 68/* Unified info structure. This is pointed to by both the dentry and
64 inode structures. Each file in the filesystem has an instance of this 69 inode structures. Each file in the filesystem has an instance of this
65 structure. It holds a reference to the dentry, so dentries are never 70 structure. It holds a reference to the dentry, so dentries are never
@@ -75,6 +80,9 @@ struct autofs_info {
75 struct completion expire_complete; 80 struct completion expire_complete;
76 81
77 struct list_head active; 82 struct list_head active;
83 int active_count;
84 struct list_head rehash_list;
85
78 struct list_head expiring; 86 struct list_head expiring;
79 87
80 struct autofs_sb_info *sbi; 88 struct autofs_sb_info *sbi;
@@ -95,6 +103,8 @@ struct autofs_info {
95 103
96#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 104#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
97#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ 105#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
106#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
107#define AUTOFS_INF_REHASH (1<<3) /* dentry in transit to ->lookup() */
98 108
99struct autofs_wait_queue { 109struct autofs_wait_queue {
100 wait_queue_head_t queue; 110 wait_queue_head_t queue;
@@ -161,7 +171,7 @@ static inline int autofs4_ispending(struct dentry *dentry)
161{ 171{
162 struct autofs_info *inf = autofs4_dentry_ino(dentry); 172 struct autofs_info *inf = autofs4_dentry_ino(dentry);
163 173
164 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 174 if (inf->flags & AUTOFS_INF_PENDING)
165 return 1; 175 return 1;
166 176
167 if (inf->flags & AUTOFS_INF_EXPIRING) 177 if (inf->flags & AUTOFS_INF_EXPIRING)
@@ -264,5 +274,31 @@ out:
264 return ret; 274 return ret;
265} 275}
266 276
277static inline void autofs4_add_expiring(struct dentry *dentry)
278{
279 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
280 struct autofs_info *ino = autofs4_dentry_ino(dentry);
281 if (ino) {
282 spin_lock(&sbi->lookup_lock);
283 if (list_empty(&ino->expiring))
284 list_add(&ino->expiring, &sbi->expiring_list);
285 spin_unlock(&sbi->lookup_lock);
286 }
287 return;
288}
289
290static inline void autofs4_del_expiring(struct dentry *dentry)
291{
292 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
293 struct autofs_info *ino = autofs4_dentry_ino(dentry);
294 if (ino) {
295 spin_lock(&sbi->lookup_lock);
296 if (!list_empty(&ino->expiring))
297 list_del_init(&ino->expiring);
298 spin_unlock(&sbi->lookup_lock);
299 }
300 return;
301}
302
267void autofs4_dentry_release(struct dentry *); 303void autofs4_dentry_release(struct dentry *);
268extern void autofs4_kill_sb(struct super_block *); 304extern void autofs4_kill_sb(struct super_block *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3da18d45348..74bc9aa6df3 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -27,7 +27,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
27 return 0; 27 return 0;
28 28
29 /* No point expiring a pending mount */ 29 /* No point expiring a pending mount */
30 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 30 if (ino->flags & AUTOFS_INF_PENDING)
31 return 0; 31 return 0;
32 32
33 if (!do_now) { 33 if (!do_now) {
@@ -279,6 +279,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
279 root->d_mounted--; 279 root->d_mounted--;
280 } 280 }
281 ino->flags |= AUTOFS_INF_EXPIRING; 281 ino->flags |= AUTOFS_INF_EXPIRING;
282 autofs4_add_expiring(root);
282 init_completion(&ino->expire_complete); 283 init_completion(&ino->expire_complete);
283 spin_unlock(&sbi->fs_lock); 284 spin_unlock(&sbi->fs_lock);
284 return root; 285 return root;
@@ -406,6 +407,7 @@ found:
406 expired, (int)expired->d_name.len, expired->d_name.name); 407 expired, (int)expired->d_name.len, expired->d_name.name);
407 ino = autofs4_dentry_ino(expired); 408 ino = autofs4_dentry_ino(expired);
408 ino->flags |= AUTOFS_INF_EXPIRING; 409 ino->flags |= AUTOFS_INF_EXPIRING;
410 autofs4_add_expiring(expired);
409 init_completion(&ino->expire_complete); 411 init_completion(&ino->expire_complete);
410 spin_unlock(&sbi->fs_lock); 412 spin_unlock(&sbi->fs_lock);
411 spin_lock(&dcache_lock); 413 spin_lock(&dcache_lock);
@@ -433,7 +435,7 @@ int autofs4_expire_wait(struct dentry *dentry)
433 435
434 DPRINTK("expire done status=%d", status); 436 DPRINTK("expire done status=%d", status);
435 437
436 if (d_unhashed(dentry)) 438 if (d_unhashed(dentry) && IS_DEADDIR(dentry->d_inode))
437 return -EAGAIN; 439 return -EAGAIN;
438 440
439 return status; 441 return status;
@@ -473,6 +475,7 @@ int autofs4_expire_run(struct super_block *sb,
473 spin_lock(&sbi->fs_lock); 475 spin_lock(&sbi->fs_lock);
474 ino = autofs4_dentry_ino(dentry); 476 ino = autofs4_dentry_ino(dentry);
475 ino->flags &= ~AUTOFS_INF_EXPIRING; 477 ino->flags &= ~AUTOFS_INF_EXPIRING;
478 autofs4_del_expiring(dentry);
476 complete_all(&ino->expire_complete); 479 complete_all(&ino->expire_complete);
477 spin_unlock(&sbi->fs_lock); 480 spin_unlock(&sbi->fs_lock);
478 481
@@ -503,6 +506,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
503 ino->flags &= ~AUTOFS_INF_MOUNTPOINT; 506 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
504 } 507 }
505 ino->flags &= ~AUTOFS_INF_EXPIRING; 508 ino->flags &= ~AUTOFS_INF_EXPIRING;
509 autofs4_del_expiring(dentry);
506 complete_all(&ino->expire_complete); 510 complete_all(&ino->expire_complete);
507 spin_unlock(&sbi->fs_lock); 511 spin_unlock(&sbi->fs_lock);
508 dput(dentry); 512 dput(dentry);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 69c8142da83..d0a3de24745 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -49,6 +49,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
49 ino->dentry = NULL; 49 ino->dentry = NULL;
50 ino->size = 0; 50 ino->size = 0;
51 INIT_LIST_HEAD(&ino->active); 51 INIT_LIST_HEAD(&ino->active);
52 INIT_LIST_HEAD(&ino->rehash_list);
53 ino->active_count = 0;
52 INIT_LIST_HEAD(&ino->expiring); 54 INIT_LIST_HEAD(&ino->expiring);
53 atomic_set(&ino->count, 0); 55 atomic_set(&ino->count, 0);
54 } 56 }
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index b96a3c57359..30cc9ddf4b7 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -72,6 +72,139 @@ const struct inode_operations autofs4_dir_inode_operations = {
72 .rmdir = autofs4_dir_rmdir, 72 .rmdir = autofs4_dir_rmdir,
73}; 73};
74 74
75static void autofs4_add_active(struct dentry *dentry)
76{
77 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
78 struct autofs_info *ino = autofs4_dentry_ino(dentry);
79 if (ino) {
80 spin_lock(&sbi->lookup_lock);
81 if (!ino->active_count) {
82 if (list_empty(&ino->active))
83 list_add(&ino->active, &sbi->active_list);
84 }
85 ino->active_count++;
86 spin_unlock(&sbi->lookup_lock);
87 }
88 return;
89}
90
91static void autofs4_del_active(struct dentry *dentry)
92{
93 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
94 struct autofs_info *ino = autofs4_dentry_ino(dentry);
95 if (ino) {
96 spin_lock(&sbi->lookup_lock);
97 ino->active_count--;
98 if (!ino->active_count) {
99 if (!list_empty(&ino->active))
100 list_del_init(&ino->active);
101 }
102 spin_unlock(&sbi->lookup_lock);
103 }
104 return;
105}
106
107static void autofs4_add_rehash_entry(struct autofs_info *ino,
108 struct rehash_entry *entry)
109{
110 entry->task = current;
111 INIT_LIST_HEAD(&entry->list);
112 list_add(&entry->list, &ino->rehash_list);
113 return;
114}
115
116static void autofs4_remove_rehash_entry(struct autofs_info *ino)
117{
118 struct list_head *head = &ino->rehash_list;
119 struct rehash_entry *entry;
120 list_for_each_entry(entry, head, list) {
121 if (entry->task == current) {
122 list_del(&entry->list);
123 kfree(entry);
124 break;
125 }
126 }
127 return;
128}
129
130static void autofs4_remove_rehash_entrys(struct autofs_info *ino)
131{
132 struct autofs_sb_info *sbi = ino->sbi;
133 struct rehash_entry *entry, *next;
134 struct list_head *head;
135
136 spin_lock(&sbi->fs_lock);
137 spin_lock(&sbi->lookup_lock);
138 if (!(ino->flags & AUTOFS_INF_REHASH)) {
139 spin_unlock(&sbi->lookup_lock);
140 spin_unlock(&sbi->fs_lock);
141 return;
142 }
143 ino->flags &= ~AUTOFS_INF_REHASH;
144 head = &ino->rehash_list;
145 list_for_each_entry_safe(entry, next, head, list) {
146 list_del(&entry->list);
147 kfree(entry);
148 }
149 spin_unlock(&sbi->lookup_lock);
150 spin_unlock(&sbi->fs_lock);
151 dput(ino->dentry);
152
153 return;
154}
155
156static void autofs4_revalidate_drop(struct dentry *dentry,
157 struct rehash_entry *entry)
158{
159 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
160 struct autofs_info *ino = autofs4_dentry_ino(dentry);
161 /*
162 * Add to the active list so we can pick this up in
163 * ->lookup(). Also add an entry to a rehash list so
164 * we know when there are no dentrys in flight so we
165 * know when we can rehash the dentry.
166 */
167 spin_lock(&sbi->lookup_lock);
168 if (list_empty(&ino->active))
169 list_add(&ino->active, &sbi->active_list);
170 autofs4_add_rehash_entry(ino, entry);
171 spin_unlock(&sbi->lookup_lock);
172 if (!(ino->flags & AUTOFS_INF_REHASH)) {
173 ino->flags |= AUTOFS_INF_REHASH;
174 dget(dentry);
175 spin_lock(&dentry->d_lock);
176 __d_drop(dentry);
177 spin_unlock(&dentry->d_lock);
178 }
179 return;
180}
181
182static void autofs4_revalidate_rehash(struct dentry *dentry)
183{
184 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
185 struct autofs_info *ino = autofs4_dentry_ino(dentry);
186 if (ino->flags & AUTOFS_INF_REHASH) {
187 spin_lock(&sbi->lookup_lock);
188 autofs4_remove_rehash_entry(ino);
189 if (list_empty(&ino->rehash_list)) {
190 spin_unlock(&sbi->lookup_lock);
191 ino->flags &= ~AUTOFS_INF_REHASH;
192 d_rehash(dentry);
193 dput(ino->dentry);
194 } else
195 spin_unlock(&sbi->lookup_lock);
196 }
197 return;
198}
199
200static unsigned int autofs4_need_mount(unsigned int flags)
201{
202 unsigned int res = 0;
203 if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS))
204 res = 1;
205 return res;
206}
207
75static int autofs4_dir_open(struct inode *inode, struct file *file) 208static int autofs4_dir_open(struct inode *inode, struct file *file)
76{ 209{
77 struct dentry *dentry = file->f_path.dentry; 210 struct dentry *dentry = file->f_path.dentry;
@@ -93,7 +226,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
93 * it. 226 * it.
94 */ 227 */
95 spin_lock(&dcache_lock); 228 spin_lock(&dcache_lock);
96 if (!d_mountpoint(dentry) && __simple_empty(dentry)) { 229 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
97 spin_unlock(&dcache_lock); 230 spin_unlock(&dcache_lock);
98 return -ENOENT; 231 return -ENOENT;
99 } 232 }
@@ -103,7 +236,7 @@ out:
103 return dcache_dir_open(inode, file); 236 return dcache_dir_open(inode, file);
104} 237}
105 238
106static int try_to_fill_dentry(struct dentry *dentry, int flags) 239static int try_to_fill_dentry(struct dentry *dentry)
107{ 240{
108 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 241 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
109 struct autofs_info *ino = autofs4_dentry_ino(dentry); 242 struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -116,55 +249,17 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
116 * Wait for a pending mount, triggering one if there 249 * Wait for a pending mount, triggering one if there
117 * isn't one already 250 * isn't one already
118 */ 251 */
119 if (dentry->d_inode == NULL) { 252 DPRINTK("waiting for mount name=%.*s",
120 DPRINTK("waiting for mount name=%.*s", 253 dentry->d_name.len, dentry->d_name.name);
121 dentry->d_name.len, dentry->d_name.name);
122
123 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
124
125 DPRINTK("mount done status=%d", status);
126
127 /* Turn this into a real negative dentry? */
128 if (status == -ENOENT) {
129 spin_lock(&dentry->d_lock);
130 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
131 spin_unlock(&dentry->d_lock);
132 return status;
133 } else if (status) {
134 /* Return a negative dentry, but leave it "pending" */
135 return status;
136 }
137 /* Trigger mount for path component or follow link */
138 } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
139 flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
140 current->link_count) {
141 DPRINTK("waiting for mount name=%.*s",
142 dentry->d_name.len, dentry->d_name.name);
143
144 spin_lock(&dentry->d_lock);
145 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
146 spin_unlock(&dentry->d_lock);
147 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
148 254
149 DPRINTK("mount done status=%d", status); 255 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
150 256
151 if (status) { 257 DPRINTK("mount done status=%d", status);
152 spin_lock(&dentry->d_lock);
153 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
154 spin_unlock(&dentry->d_lock);
155 return status;
156 }
157 }
158
159 /* Initialize expiry counter after successful mount */
160 if (ino)
161 ino->last_used = jiffies;
162 258
163 spin_lock(&dentry->d_lock); 259 /* Update expiry counter */
164 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 260 ino->last_used = jiffies;
165 spin_unlock(&dentry->d_lock);
166 261
167 return 0; 262 return status;
168} 263}
169 264
170/* For autofs direct mounts the follow link triggers the mount */ 265/* For autofs direct mounts the follow link triggers the mount */
@@ -202,27 +297,39 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
202 autofs4_expire_wait(dentry); 297 autofs4_expire_wait(dentry);
203 298
204 /* We trigger a mount for almost all flags */ 299 /* We trigger a mount for almost all flags */
205 lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS); 300 lookup_type = autofs4_need_mount(nd->flags);
206 if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING)) 301 spin_lock(&sbi->fs_lock);
302 spin_lock(&dcache_lock);
303 if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
304 spin_unlock(&dcache_lock);
305 spin_unlock(&sbi->fs_lock);
207 goto follow; 306 goto follow;
307 }
208 308
209 /* 309 /*
210 * If the dentry contains directories then it is an autofs 310 * If the dentry contains directories then it is an autofs
211 * multi-mount with no root mount offset. So don't try to 311 * multi-mount with no root mount offset. So don't try to
212 * mount it again. 312 * mount it again.
213 */ 313 */
214 spin_lock(&dcache_lock); 314 if (ino->flags & AUTOFS_INF_PENDING ||
215 if (dentry->d_flags & DCACHE_AUTOFS_PENDING || 315 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
216 (!d_mountpoint(dentry) && __simple_empty(dentry))) { 316 ino->flags |= AUTOFS_INF_PENDING;
217 spin_unlock(&dcache_lock); 317 spin_unlock(&dcache_lock);
318 spin_unlock(&sbi->fs_lock);
319
320 status = try_to_fill_dentry(dentry);
321
322 spin_lock(&sbi->fs_lock);
323 ino->flags &= ~AUTOFS_INF_PENDING;
324 spin_unlock(&sbi->fs_lock);
218 325
219 status = try_to_fill_dentry(dentry, 0);
220 if (status) 326 if (status)
221 goto out_error; 327 goto out_error;
222 328
223 goto follow; 329 goto follow;
224 } 330 }
225 spin_unlock(&dcache_lock); 331 spin_unlock(&dcache_lock);
332 spin_unlock(&sbi->fs_lock);
226follow: 333follow:
227 /* 334 /*
228 * If there is no root mount it must be an autofs 335 * If there is no root mount it must be an autofs
@@ -254,18 +361,47 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
254{ 361{
255 struct inode *dir = dentry->d_parent->d_inode; 362 struct inode *dir = dentry->d_parent->d_inode;
256 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 363 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
257 int oz_mode = autofs4_oz_mode(sbi); 364 struct autofs_info *ino = autofs4_dentry_ino(dentry);
365 struct rehash_entry *entry;
258 int flags = nd ? nd->flags : 0; 366 int flags = nd ? nd->flags : 0;
259 int status = 1; 367 unsigned int mutex_aquired;
368
369 DPRINTK("name = %.*s oz_mode = %d",
370 dentry->d_name.len, dentry->d_name.name, oz_mode);
371
372 /* Daemon never causes a mount to trigger */
373 if (autofs4_oz_mode(sbi))
374 return 1;
375
376 entry = kmalloc(sizeof(struct rehash_entry), GFP_KERNEL);
377 if (!entry)
378 return -ENOMEM;
379
380 mutex_aquired = mutex_trylock(&dir->i_mutex);
260 381
261 /* Pending dentry */
262 spin_lock(&sbi->fs_lock); 382 spin_lock(&sbi->fs_lock);
383 spin_lock(&dcache_lock);
384 /* Pending dentry */
263 if (autofs4_ispending(dentry)) { 385 if (autofs4_ispending(dentry)) {
264 /* The daemon never causes a mount to trigger */ 386 int status;
265 spin_unlock(&sbi->fs_lock);
266 387
267 if (oz_mode) 388 /*
268 return 1; 389 * We can only unhash and send this to ->lookup() if
390 * the directory mutex is held over d_revalidate() and
391 * ->lookup(). This prevents the VFS from incorrectly
392 * seeing the dentry as non-existent.
393 */
394 ino->flags |= AUTOFS_INF_PENDING;
395 if (!mutex_aquired) {
396 autofs4_revalidate_drop(dentry, entry);
397 spin_unlock(&dcache_lock);
398 spin_unlock(&sbi->fs_lock);
399 return 0;
400 }
401 spin_unlock(&dcache_lock);
402 spin_unlock(&sbi->fs_lock);
403 mutex_unlock(&dir->i_mutex);
404 kfree(entry);
269 405
270 /* 406 /*
271 * If the directory has gone away due to an expire 407 * If the directory has gone away due to an expire
@@ -279,46 +415,82 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
279 * A zero status is success otherwise we have a 415 * A zero status is success otherwise we have a
280 * negative error code. 416 * negative error code.
281 */ 417 */
282 status = try_to_fill_dentry(dentry, flags); 418 status = try_to_fill_dentry(dentry);
419
420 spin_lock(&sbi->fs_lock);
421 ino->flags &= ~AUTOFS_INF_PENDING;
422 spin_unlock(&sbi->fs_lock);
423
283 if (status == 0) 424 if (status == 0)
284 return 1; 425 return 1;
285 426
286 return status; 427 return status;
287 } 428 }
288 spin_unlock(&sbi->fs_lock);
289
290 /* Negative dentry.. invalidate if "old" */
291 if (dentry->d_inode == NULL)
292 return 0;
293 429
294 /* Check for a non-mountpoint directory with no contents */ 430 /* Check for a non-mountpoint directory with no contents */
295 spin_lock(&dcache_lock);
296 if (S_ISDIR(dentry->d_inode->i_mode) && 431 if (S_ISDIR(dentry->d_inode->i_mode) &&
297 !d_mountpoint(dentry) && 432 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
298 __simple_empty(dentry)) {
299 DPRINTK("dentry=%p %.*s, emptydir", 433 DPRINTK("dentry=%p %.*s, emptydir",
300 dentry, dentry->d_name.len, dentry->d_name.name); 434 dentry, dentry->d_name.len, dentry->d_name.name);
301 spin_unlock(&dcache_lock);
302 435
303 /* The daemon never causes a mount to trigger */ 436 if (autofs4_need_mount(flags) || current->link_count) {
304 if (oz_mode) 437 int status;
305 return 1;
306 438
307 /* 439 /*
308 * A zero status is success otherwise we have a 440 * We can only unhash and send this to ->lookup() if
309 * negative error code. 441 * the directory mutex is held over d_revalidate() and
310 */ 442 * ->lookup(). This prevents the VFS from incorrectly
311 status = try_to_fill_dentry(dentry, flags); 443 * seeing the dentry as non-existent.
312 if (status == 0) 444 */
313 return 1; 445 ino->flags |= AUTOFS_INF_PENDING;
446 if (!mutex_aquired) {
447 autofs4_revalidate_drop(dentry, entry);
448 spin_unlock(&dcache_lock);
449 spin_unlock(&sbi->fs_lock);
450 return 0;
451 }
452 spin_unlock(&dcache_lock);
453 spin_unlock(&sbi->fs_lock);
454 mutex_unlock(&dir->i_mutex);
455 kfree(entry);
314 456
315 return status; 457 /*
458 * A zero status is success otherwise we have a
459 * negative error code.
460 */
461 status = try_to_fill_dentry(dentry);
462
463 spin_lock(&sbi->fs_lock);
464 ino->flags &= ~AUTOFS_INF_PENDING;
465 spin_unlock(&sbi->fs_lock);
466
467 if (status == 0)
468 return 1;
469
470 return status;
471 }
316 } 472 }
317 spin_unlock(&dcache_lock); 473 spin_unlock(&dcache_lock);
474 spin_unlock(&sbi->fs_lock);
475
476 if (mutex_aquired)
477 mutex_unlock(&dir->i_mutex);
478
479 kfree(entry);
318 480
319 return 1; 481 return 1;
320} 482}
321 483
484static void autofs4_free_rehash_entrys(struct autofs_info *inf)
485{
486 struct list_head *head = &inf->rehash_list;
487 struct rehash_entry *entry, *next;
488 list_for_each_entry_safe(entry, next, head, list) {
489 list_del(&entry->list);
490 kfree(entry);
491 }
492}
493
322void autofs4_dentry_release(struct dentry *de) 494void autofs4_dentry_release(struct dentry *de)
323{ 495{
324 struct autofs_info *inf; 496 struct autofs_info *inf;
@@ -337,6 +509,8 @@ void autofs4_dentry_release(struct dentry *de)
337 list_del(&inf->active); 509 list_del(&inf->active);
338 if (!list_empty(&inf->expiring)) 510 if (!list_empty(&inf->expiring))
339 list_del(&inf->expiring); 511 list_del(&inf->expiring);
512 if (!list_empty(&inf->rehash_list))
513 autofs4_free_rehash_entrys(inf);
340 spin_unlock(&sbi->lookup_lock); 514 spin_unlock(&sbi->lookup_lock);
341 } 515 }
342 516
@@ -359,35 +533,52 @@ static const struct dentry_operations autofs4_dentry_operations = {
359 .d_release = autofs4_dentry_release, 533 .d_release = autofs4_dentry_release,
360}; 534};
361 535
362static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 536static struct dentry *autofs4_lookup_active(struct dentry *dentry)
363{ 537{
538 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
539 struct dentry *parent = dentry->d_parent;
540 struct qstr *name = &dentry->d_name;
364 unsigned int len = name->len; 541 unsigned int len = name->len;
365 unsigned int hash = name->hash; 542 unsigned int hash = name->hash;
366 const unsigned char *str = name->name; 543 const unsigned char *str = name->name;
367 struct list_head *p, *head; 544 struct list_head *p, *head;
368 545
546restart:
369 spin_lock(&dcache_lock); 547 spin_lock(&dcache_lock);
370 spin_lock(&sbi->lookup_lock); 548 spin_lock(&sbi->lookup_lock);
371 head = &sbi->active_list; 549 head = &sbi->active_list;
372 list_for_each(p, head) { 550 list_for_each(p, head) {
373 struct autofs_info *ino; 551 struct autofs_info *ino;
374 struct dentry *dentry; 552 struct dentry *active;
375 struct qstr *qstr; 553 struct qstr *qstr;
376 554
377 ino = list_entry(p, struct autofs_info, active); 555 ino = list_entry(p, struct autofs_info, active);
378 dentry = ino->dentry; 556 active = ino->dentry;
379 557
380 spin_lock(&dentry->d_lock); 558 spin_lock(&active->d_lock);
381 559
382 /* Already gone? */ 560 /* Already gone? */
383 if (atomic_read(&dentry->d_count) == 0) 561 if (atomic_read(&active->d_count) == 0)
384 goto next; 562 goto next;
385 563
386 qstr = &dentry->d_name; 564 if (active->d_inode && IS_DEADDIR(active->d_inode)) {
565 if (!list_empty(&ino->rehash_list)) {
566 dget(active);
567 spin_unlock(&active->d_lock);
568 spin_unlock(&sbi->lookup_lock);
569 spin_unlock(&dcache_lock);
570 autofs4_remove_rehash_entrys(ino);
571 dput(active);
572 goto restart;
573 }
574 goto next;
575 }
576
577 qstr = &active->d_name;
387 578
388 if (dentry->d_name.hash != hash) 579 if (active->d_name.hash != hash)
389 goto next; 580 goto next;
390 if (dentry->d_parent != parent) 581 if (active->d_parent != parent)
391 goto next; 582 goto next;
392 583
393 if (qstr->len != len) 584 if (qstr->len != len)
@@ -395,15 +586,13 @@ static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct d
395 if (memcmp(qstr->name, str, len)) 586 if (memcmp(qstr->name, str, len))
396 goto next; 587 goto next;
397 588
398 if (d_unhashed(dentry)) { 589 dget(active);
399 dget(dentry); 590 spin_unlock(&active->d_lock);
400 spin_unlock(&dentry->d_lock); 591 spin_unlock(&sbi->lookup_lock);
401 spin_unlock(&sbi->lookup_lock); 592 spin_unlock(&dcache_lock);
402 spin_unlock(&dcache_lock); 593 return active;
403 return dentry;
404 }
405next: 594next:
406 spin_unlock(&dentry->d_lock); 595 spin_unlock(&active->d_lock);
407 } 596 }
408 spin_unlock(&sbi->lookup_lock); 597 spin_unlock(&sbi->lookup_lock);
409 spin_unlock(&dcache_lock); 598 spin_unlock(&dcache_lock);
@@ -411,8 +600,11 @@ next:
411 return NULL; 600 return NULL;
412} 601}
413 602
414static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 603static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
415{ 604{
605 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
606 struct dentry *parent = dentry->d_parent;
607 struct qstr *name = &dentry->d_name;
416 unsigned int len = name->len; 608 unsigned int len = name->len;
417 unsigned int hash = name->hash; 609 unsigned int hash = name->hash;
418 const unsigned char *str = name->name; 610 const unsigned char *str = name->name;
@@ -423,23 +615,23 @@ static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct
423 head = &sbi->expiring_list; 615 head = &sbi->expiring_list;
424 list_for_each(p, head) { 616 list_for_each(p, head) {
425 struct autofs_info *ino; 617 struct autofs_info *ino;
426 struct dentry *dentry; 618 struct dentry *expiring;
427 struct qstr *qstr; 619 struct qstr *qstr;
428 620
429 ino = list_entry(p, struct autofs_info, expiring); 621 ino = list_entry(p, struct autofs_info, expiring);
430 dentry = ino->dentry; 622 expiring = ino->dentry;
431 623
432 spin_lock(&dentry->d_lock); 624 spin_lock(&expiring->d_lock);
433 625
434 /* Bad luck, we've already been dentry_iput */ 626 /* Bad luck, we've already been dentry_iput */
435 if (!dentry->d_inode) 627 if (!expiring->d_inode)
436 goto next; 628 goto next;
437 629
438 qstr = &dentry->d_name; 630 qstr = &expiring->d_name;
439 631
440 if (dentry->d_name.hash != hash) 632 if (expiring->d_name.hash != hash)
441 goto next; 633 goto next;
442 if (dentry->d_parent != parent) 634 if (expiring->d_parent != parent)
443 goto next; 635 goto next;
444 636
445 if (qstr->len != len) 637 if (qstr->len != len)
@@ -447,15 +639,13 @@ static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct
447 if (memcmp(qstr->name, str, len)) 639 if (memcmp(qstr->name, str, len))
448 goto next; 640 goto next;
449 641
450 if (d_unhashed(dentry)) { 642 dget(expiring);
451 dget(dentry); 643 spin_unlock(&expiring->d_lock);
452 spin_unlock(&dentry->d_lock); 644 spin_unlock(&sbi->lookup_lock);
453 spin_unlock(&sbi->lookup_lock); 645 spin_unlock(&dcache_lock);
454 spin_unlock(&dcache_lock); 646 return expiring;
455 return dentry;
456 }
457next: 647next:
458 spin_unlock(&dentry->d_lock); 648 spin_unlock(&expiring->d_lock);
459 } 649 }
460 spin_unlock(&sbi->lookup_lock); 650 spin_unlock(&sbi->lookup_lock);
461 spin_unlock(&dcache_lock); 651 spin_unlock(&dcache_lock);
@@ -463,13 +653,56 @@ next:
463 return NULL; 653 return NULL;
464} 654}
465 655
656static struct autofs_info *init_new_dentry(struct autofs_sb_info *sbi,
657 struct dentry *dentry, int oz_mode)
658{
659 struct autofs_info *ino;
660
661 /*
662 * Mark the dentry incomplete but don't hash it. We do this
663 * to serialize our inode creation operations (symlink and
664 * mkdir) which prevents deadlock during the callback to
665 * the daemon. Subsequent user space lookups for the same
666 * dentry are placed on the wait queue while the daemon
667 * itself is allowed passage unresticted so the create
668 * operation itself can then hash the dentry. Finally,
669 * we check for the hashed dentry and return the newly
670 * hashed dentry.
671 */
672 dentry->d_op = &autofs4_root_dentry_operations;
673
674 /*
675 * And we need to ensure that the same dentry is used for
676 * all following lookup calls until it is hashed so that
677 * the dentry flags are persistent throughout the request.
678 */
679 ino = autofs4_init_ino(NULL, sbi, 0555);
680 if (!ino)
681 return ERR_PTR(-ENOMEM);
682
683 dentry->d_fsdata = ino;
684 ino->dentry = dentry;
685
686 /*
687 * Only set the mount pending flag for new dentrys not created
688 * by the daemon.
689 */
690 if (!oz_mode)
691 ino->flags |= AUTOFS_INF_PENDING;
692
693 d_instantiate(dentry, NULL);
694
695 return ino;
696}
697
466/* Lookups in the root directory */ 698/* Lookups in the root directory */
467static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 699static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
468{ 700{
469 struct autofs_sb_info *sbi; 701 struct autofs_sb_info *sbi;
470 struct autofs_info *ino; 702 struct autofs_info *ino;
471 struct dentry *expiring, *unhashed; 703 struct dentry *expiring, *active;
472 int oz_mode; 704 int oz_mode;
705 int status = 0;
473 706
474 DPRINTK("name = %.*s", 707 DPRINTK("name = %.*s",
475 dentry->d_name.len, dentry->d_name.name); 708 dentry->d_name.len, dentry->d_name.name);
@@ -484,123 +717,100 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
484 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 717 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
485 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); 718 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
486 719
487 unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); 720 spin_lock(&sbi->fs_lock);
488 if (unhashed) 721 active = autofs4_lookup_active(dentry);
489 dentry = unhashed; 722 if (active) {
490 else { 723 dentry = active;
491 /* 724 ino = autofs4_dentry_ino(dentry);
492 * Mark the dentry incomplete but don't hash it. We do this 725 /* If this came from revalidate, rehash it */
493 * to serialize our inode creation operations (symlink and 726 autofs4_revalidate_rehash(dentry);
494 * mkdir) which prevents deadlock during the callback to 727 spin_unlock(&sbi->fs_lock);
495 * the daemon. Subsequent user space lookups for the same 728 } else {
496 * dentry are placed on the wait queue while the daemon 729 spin_unlock(&sbi->fs_lock);
497 * itself is allowed passage unresticted so the create 730 ino = init_new_dentry(sbi, dentry, oz_mode);
498 * operation itself can then hash the dentry. Finally, 731 if (IS_ERR(ino))
499 * we check for the hashed dentry and return the newly 732 return (struct dentry *) ino;
500 * hashed dentry.
501 */
502 dentry->d_op = &autofs4_root_dentry_operations;
503
504 /*
505 * And we need to ensure that the same dentry is used for
506 * all following lookup calls until it is hashed so that
507 * the dentry flags are persistent throughout the request.
508 */
509 ino = autofs4_init_ino(NULL, sbi, 0555);
510 if (!ino)
511 return ERR_PTR(-ENOMEM);
512
513 dentry->d_fsdata = ino;
514 ino->dentry = dentry;
515
516 spin_lock(&sbi->lookup_lock);
517 list_add(&ino->active, &sbi->active_list);
518 spin_unlock(&sbi->lookup_lock);
519
520 d_instantiate(dentry, NULL);
521 } 733 }
522 734
735 autofs4_add_active(dentry);
736
523 if (!oz_mode) { 737 if (!oz_mode) {
738 expiring = autofs4_lookup_expiring(dentry);
524 mutex_unlock(&dir->i_mutex); 739 mutex_unlock(&dir->i_mutex);
525 expiring = autofs4_lookup_expiring(sbi,
526 dentry->d_parent,
527 &dentry->d_name);
528 if (expiring) { 740 if (expiring) {
529 /* 741 /*
530 * If we are racing with expire the request might not 742 * If we are racing with expire the request might not
531 * be quite complete but the directory has been removed 743 * be quite complete but the directory has been removed
532 * so it must have been successful, so just wait for it. 744 * so it must have been successful, so just wait for it.
533 */ 745 */
534 ino = autofs4_dentry_ino(expiring);
535 autofs4_expire_wait(expiring); 746 autofs4_expire_wait(expiring);
536 spin_lock(&sbi->lookup_lock);
537 if (!list_empty(&ino->expiring))
538 list_del_init(&ino->expiring);
539 spin_unlock(&sbi->lookup_lock);
540 dput(expiring); 747 dput(expiring);
541 } 748 }
542 749 status = try_to_fill_dentry(dentry);
543 spin_lock(&dentry->d_lock);
544 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
545 spin_unlock(&dentry->d_lock);
546 if (dentry->d_op && dentry->d_op->d_revalidate)
547 (dentry->d_op->d_revalidate)(dentry, nd);
548 mutex_lock(&dir->i_mutex); 750 mutex_lock(&dir->i_mutex);
751 spin_lock(&sbi->fs_lock);
752 ino->flags &= ~AUTOFS_INF_PENDING;
753 spin_unlock(&sbi->fs_lock);
549 } 754 }
550 755
756 autofs4_del_active(dentry);
757
551 /* 758 /*
552 * If we are still pending, check if we had to handle 759 * If we had a mount fail, check if we had to handle
553 * a signal. If so we can force a restart.. 760 * a signal. If so we can force a restart..
554 */ 761 */
555 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) { 762 if (status) {
556 /* See if we were interrupted */ 763 /* See if we were interrupted */
557 if (signal_pending(current)) { 764 if (signal_pending(current)) {
558 sigset_t *sigset = &current->pending.signal; 765 sigset_t *sigset = &current->pending.signal;
559 if (sigismember (sigset, SIGKILL) || 766 if (sigismember (sigset, SIGKILL) ||
560 sigismember (sigset, SIGQUIT) || 767 sigismember (sigset, SIGQUIT) ||
561 sigismember (sigset, SIGINT)) { 768 sigismember (sigset, SIGINT)) {
562 if (unhashed) 769 if (active)
563 dput(unhashed); 770 dput(active);
564 return ERR_PTR(-ERESTARTNOINTR); 771 return ERR_PTR(-ERESTARTNOINTR);
565 } 772 }
566 } 773 }
567 if (!oz_mode) { 774 }
568 spin_lock(&dentry->d_lock); 775
569 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 776 /*
570 spin_unlock(&dentry->d_lock); 777 * User space can (and has done in the past) remove and re-create
778 * this directory during the callback. This can leave us with an
779 * unhashed dentry, but a successful mount! So we need to
780 * perform another cached lookup in case the dentry now exists.
781 */
782 if (!oz_mode && !have_submounts(dentry)) {
783 struct dentry *new;
784 new = d_lookup(dentry->d_parent, &dentry->d_name);
785 if (new) {
786 if (active)
787 dput(active);
788 return new;
789 } else {
790 if (!status)
791 status = -ENOENT;
571 } 792 }
572 } 793 }
573 794
574 /* 795 /*
575 * If this dentry is unhashed, then we shouldn't honour this 796 * If we had a mount failure, return status to user space.
576 * lookup. Returning ENOENT here doesn't do the right thing 797 * If the mount succeeded and we used a dentry from the active queue
577 * for all system calls, but it should be OK for the operations 798 * return it.
578 * we permit from an autofs.
579 */ 799 */
580 if (!oz_mode && d_unhashed(dentry)) { 800 if (status) {
801 dentry = ERR_PTR(status);
802 if (active)
803 dput(active);
804 return dentry;
805 } else {
581 /* 806 /*
582 * A user space application can (and has done in the past) 807 * Valid successful mount, return active dentry or NULL
583 * remove and re-create this directory during the callback. 808 * for a new dentry.
584 * This can leave us with an unhashed dentry, but a
585 * successful mount! So we need to perform another
586 * cached lookup in case the dentry now exists.
587 */ 809 */
588 struct dentry *parent = dentry->d_parent; 810 if (active)
589 struct dentry *new = d_lookup(parent, &dentry->d_name); 811 return active;
590 if (new != NULL)
591 dentry = new;
592 else
593 dentry = ERR_PTR(-ENOENT);
594
595 if (unhashed)
596 dput(unhashed);
597
598 return dentry;
599 } 812 }
600 813
601 if (unhashed)
602 return unhashed;
603
604 return NULL; 814 return NULL;
605} 815}
606 816
@@ -624,11 +834,6 @@ static int autofs4_dir_symlink(struct inode *dir,
624 if (!ino) 834 if (!ino)
625 return -ENOMEM; 835 return -ENOMEM;
626 836
627 spin_lock(&sbi->lookup_lock);
628 if (!list_empty(&ino->active))
629 list_del_init(&ino->active);
630 spin_unlock(&sbi->lookup_lock);
631
632 ino->size = strlen(symname); 837 ino->size = strlen(symname);
633 cp = kmalloc(ino->size + 1, GFP_KERNEL); 838 cp = kmalloc(ino->size + 1, GFP_KERNEL);
634 if (!cp) { 839 if (!cp) {
@@ -705,10 +910,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
705 dir->i_mtime = CURRENT_TIME; 910 dir->i_mtime = CURRENT_TIME;
706 911
707 spin_lock(&dcache_lock); 912 spin_lock(&dcache_lock);
708 spin_lock(&sbi->lookup_lock);
709 if (list_empty(&ino->expiring))
710 list_add(&ino->expiring, &sbi->expiring_list);
711 spin_unlock(&sbi->lookup_lock);
712 spin_lock(&dentry->d_lock); 913 spin_lock(&dentry->d_lock);
713 __d_drop(dentry); 914 __d_drop(dentry);
714 spin_unlock(&dentry->d_lock); 915 spin_unlock(&dentry->d_lock);
@@ -734,10 +935,6 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
734 spin_unlock(&dcache_lock); 935 spin_unlock(&dcache_lock);
735 return -ENOTEMPTY; 936 return -ENOTEMPTY;
736 } 937 }
737 spin_lock(&sbi->lookup_lock);
738 if (list_empty(&ino->expiring))
739 list_add(&ino->expiring, &sbi->expiring_list);
740 spin_unlock(&sbi->lookup_lock);
741 spin_lock(&dentry->d_lock); 938 spin_lock(&dentry->d_lock);
742 __d_drop(dentry); 939 __d_drop(dentry);
743 spin_unlock(&dentry->d_lock); 940 spin_unlock(&dentry->d_lock);
@@ -775,11 +972,6 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
775 if (!ino) 972 if (!ino)
776 return -ENOMEM; 973 return -ENOMEM;
777 974
778 spin_lock(&sbi->lookup_lock);
779 if (!list_empty(&ino->active))
780 list_del_init(&ino->active);
781 spin_unlock(&sbi->lookup_lock);
782
783 inode = autofs4_get_inode(dir->i_sb, ino); 975 inode = autofs4_get_inode(dir->i_sb, ino);
784 if (!inode) { 976 if (!inode) {
785 if (!dentry->d_fsdata) 977 if (!dentry->d_fsdata)
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf7c77..346b6940536 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
32 32
33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); 33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
34static int load_aout_library(struct file*); 34static int load_aout_library(struct file*);
35static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 35static int aout_core_dump(struct coredump_params *cprm);
36 36
37static struct linux_binfmt aout_format = { 37static struct linux_binfmt aout_format = {
38 .module = THIS_MODULE, 38 .module = THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
89 * dumping of the process results in another error.. 89 * dumping of the process results in another error..
90 */ 90 */
91 91
92static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 92static int aout_core_dump(struct coredump_params *cprm)
93{ 93{
94 struct file *file = cprm->file;
94 mm_segment_t fs; 95 mm_segment_t fs;
95 int has_dumped = 0; 96 int has_dumped = 0;
96 unsigned long dump_start, dump_size; 97 unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
108 current->flags |= PF_DUMPCORE; 109 current->flags |= PF_DUMPCORE;
109 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); 110 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
110 dump.u_ar0 = offsetof(struct user, regs); 111 dump.u_ar0 = offsetof(struct user, regs);
111 dump.signal = signr; 112 dump.signal = cprm->signr;
112 aout_dump_thread(regs, &dump); 113 aout_dump_thread(cprm->regs, &dump);
113 114
114/* If the size of the dump file exceeds the rlimit, then see what would happen 115/* If the size of the dump file exceeds the rlimit, then see what would happen
115 if we wrote the stack, but not the data area. */ 116 if we wrote the stack, but not the data area. */
116 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit) 117 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
117 dump.u_dsize = 0; 118 dump.u_dsize = 0;
118 119
119/* Make sure we have enough room to write the stack and data areas. */ 120/* Make sure we have enough room to write the stack and data areas. */
120 if ((dump.u_ssize + 1) * PAGE_SIZE > limit) 121 if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
121 dump.u_ssize = 0; 122 dump.u_ssize = 0;
122 123
123/* make sure we actually have a data and stack area to dump */ 124/* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b9b3bb51b1e..edd90c49003 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -44,8 +44,8 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
44 * If we don't support core dumping, then supply a NULL so we 44 * If we don't support core dumping, then supply a NULL so we
45 * don't even try. 45 * don't even try.
46 */ 46 */
47#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 47#ifdef CONFIG_ELF_CORE
48static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 48static int elf_core_dump(struct coredump_params *cprm);
49#else 49#else
50#define elf_core_dump NULL 50#define elf_core_dump NULL
51#endif 51#endif
@@ -767,7 +767,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
767 767
768 current->mm->start_stack = bprm->p; 768 current->mm->start_stack = bprm->p;
769 769
770 /* Now we do a little grungy work by mmaping the ELF image into 770 /* Now we do a little grungy work by mmapping the ELF image into
771 the correct location in memory. */ 771 the correct location in memory. */
772 for(i = 0, elf_ppnt = elf_phdata; 772 for(i = 0, elf_ppnt = elf_phdata;
773 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 773 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
@@ -1101,12 +1101,7 @@ out:
1101 return error; 1101 return error;
1102} 1102}
1103 1103
1104/* 1104#ifdef CONFIG_ELF_CORE
1105 * Note that some platforms still use traditional core dumps and not
1106 * the ELF core dump. Each platform can select it as appropriate.
1107 */
1108#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1109
1110/* 1105/*
1111 * ELF core dumper 1106 * ELF core dumper
1112 * 1107 *
@@ -1277,8 +1272,9 @@ static int writenote(struct memelfnote *men, struct file *file,
1277} 1272}
1278#undef DUMP_WRITE 1273#undef DUMP_WRITE
1279 1274
1280#define DUMP_WRITE(addr, nr) \ 1275#define DUMP_WRITE(addr, nr) \
1281 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1276 if ((size += (nr)) > cprm->limit || \
1277 !dump_write(cprm->file, (addr), (nr))) \
1282 goto end_coredump; 1278 goto end_coredump;
1283 1279
1284static void fill_elf_header(struct elfhdr *elf, int segs, 1280static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1906,7 +1902,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1906 * and then they are actually written out. If we run out of core limit 1902 * and then they are actually written out. If we run out of core limit
1907 * we just truncate. 1903 * we just truncate.
1908 */ 1904 */
1909static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 1905static int elf_core_dump(struct coredump_params *cprm)
1910{ 1906{
1911 int has_dumped = 0; 1907 int has_dumped = 0;
1912 mm_segment_t fs; 1908 mm_segment_t fs;
@@ -1952,7 +1948,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
1952 * notes. This also sets up the file header. 1948 * notes. This also sets up the file header.
1953 */ 1949 */
1954 if (!fill_note_info(elf, segs + 1, /* including notes section */ 1950 if (!fill_note_info(elf, segs + 1, /* including notes section */
1955 &info, signr, regs)) 1951 &info, cprm->signr, cprm->regs))
1956 goto cleanup; 1952 goto cleanup;
1957 1953
1958 has_dumped = 1; 1954 has_dumped = 1;
@@ -2014,14 +2010,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2014#endif 2010#endif
2015 2011
2016 /* write out the notes section */ 2012 /* write out the notes section */
2017 if (!write_note_info(&info, file, &foffset)) 2013 if (!write_note_info(&info, cprm->file, &foffset))
2018 goto end_coredump; 2014 goto end_coredump;
2019 2015
2020 if (elf_coredump_extra_notes_write(file, &foffset)) 2016 if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2021 goto end_coredump; 2017 goto end_coredump;
2022 2018
2023 /* Align to page */ 2019 /* Align to page */
2024 if (!dump_seek(file, dataoff - foffset)) 2020 if (!dump_seek(cprm->file, dataoff - foffset))
2025 goto end_coredump; 2021 goto end_coredump;
2026 2022
2027 for (vma = first_vma(current, gate_vma); vma != NULL; 2023 for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2038,12 +2034,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2038 page = get_dump_page(addr); 2034 page = get_dump_page(addr);
2039 if (page) { 2035 if (page) {
2040 void *kaddr = kmap(page); 2036 void *kaddr = kmap(page);
2041 stop = ((size += PAGE_SIZE) > limit) || 2037 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2042 !dump_write(file, kaddr, PAGE_SIZE); 2038 !dump_write(cprm->file, kaddr,
2039 PAGE_SIZE);
2043 kunmap(page); 2040 kunmap(page);
2044 page_cache_release(page); 2041 page_cache_release(page);
2045 } else 2042 } else
2046 stop = !dump_seek(file, PAGE_SIZE); 2043 stop = !dump_seek(cprm->file, PAGE_SIZE);
2047 if (stop) 2044 if (stop)
2048 goto end_coredump; 2045 goto end_coredump;
2049 } 2046 }
@@ -2063,7 +2060,7 @@ out:
2063 return has_dumped; 2060 return has_dumped;
2064} 2061}
2065 2062
2066#endif /* USE_ELF_CORE_DUMP */ 2063#endif /* CONFIG_ELF_CORE */
2067 2064
2068static int __init init_elf_binfmt(void) 2065static int __init init_elf_binfmt(void)
2069{ 2066{
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 38502c67987..c57d9ce5ff7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -75,14 +75,14 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *,
75static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *, 75static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
76 struct file *, struct mm_struct *); 76 struct file *, struct mm_struct *);
77 77
78#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 78#ifdef CONFIG_ELF_CORE
79static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit); 79static int elf_fdpic_core_dump(struct coredump_params *cprm);
80#endif 80#endif
81 81
82static struct linux_binfmt elf_fdpic_format = { 82static struct linux_binfmt elf_fdpic_format = {
83 .module = THIS_MODULE, 83 .module = THIS_MODULE,
84 .load_binary = load_elf_fdpic_binary, 84 .load_binary = load_elf_fdpic_binary,
85#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 85#ifdef CONFIG_ELF_CORE
86 .core_dump = elf_fdpic_core_dump, 86 .core_dump = elf_fdpic_core_dump,
87#endif 87#endif
88 .min_coredump = ELF_EXEC_PAGESIZE, 88 .min_coredump = ELF_EXEC_PAGESIZE,
@@ -171,6 +171,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
171#ifdef ELF_FDPIC_PLAT_INIT 171#ifdef ELF_FDPIC_PLAT_INIT
172 unsigned long dynaddr; 172 unsigned long dynaddr;
173#endif 173#endif
174#ifndef CONFIG_MMU
175 unsigned long stack_prot;
176#endif
174 struct file *interpreter = NULL; /* to shut gcc up */ 177 struct file *interpreter = NULL; /* to shut gcc up */
175 char *interpreter_name = NULL; 178 char *interpreter_name = NULL;
176 int executable_stack; 179 int executable_stack;
@@ -316,6 +319,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
316 * defunct, deceased, etc. after this point we have to exit via 319 * defunct, deceased, etc. after this point we have to exit via
317 * error_kill */ 320 * error_kill */
318 set_personality(PER_LINUX_FDPIC); 321 set_personality(PER_LINUX_FDPIC);
322 if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
323 current->personality |= READ_IMPLIES_EXEC;
319 set_binfmt(&elf_fdpic_format); 324 set_binfmt(&elf_fdpic_format);
320 325
321 current->mm->start_code = 0; 326 current->mm->start_code = 0;
@@ -377,10 +382,15 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
377 if (stack_size < PAGE_SIZE * 2) 382 if (stack_size < PAGE_SIZE * 2)
378 stack_size = PAGE_SIZE * 2; 383 stack_size = PAGE_SIZE * 2;
379 384
385 stack_prot = PROT_READ | PROT_WRITE;
386 if (executable_stack == EXSTACK_ENABLE_X ||
387 (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
388 stack_prot |= PROT_EXEC;
389
380 down_write(&current->mm->mmap_sem); 390 down_write(&current->mm->mmap_sem);
381 current->mm->start_brk = do_mmap(NULL, 0, stack_size, 391 current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
382 PROT_READ | PROT_WRITE | PROT_EXEC, 392 MAP_PRIVATE | MAP_ANONYMOUS |
383 MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, 393 MAP_UNINITIALIZED | MAP_GROWSDOWN,
384 0); 394 0);
385 395
386 if (IS_ERR_VALUE(current->mm->start_brk)) { 396 if (IS_ERR_VALUE(current->mm->start_brk)) {
@@ -1200,7 +1210,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1200 * 1210 *
1201 * Modelled on fs/binfmt_elf.c core dumper 1211 * Modelled on fs/binfmt_elf.c core dumper
1202 */ 1212 */
1203#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 1213#ifdef CONFIG_ELF_CORE
1204 1214
1205/* 1215/*
1206 * These are the only things you should do on a core-file: use only these 1216 * These are the only things you should do on a core-file: use only these
@@ -1325,8 +1335,9 @@ static int writenote(struct memelfnote *men, struct file *file)
1325#undef DUMP_WRITE 1335#undef DUMP_WRITE
1326#undef DUMP_SEEK 1336#undef DUMP_SEEK
1327 1337
1328#define DUMP_WRITE(addr, nr) \ 1338#define DUMP_WRITE(addr, nr) \
1329 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1339 if ((size += (nr)) > cprm->limit || \
1340 !dump_write(cprm->file, (addr), (nr))) \
1330 goto end_coredump; 1341 goto end_coredump;
1331 1342
1332static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) 1343static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1581,8 +1592,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1581 * and then they are actually written out. If we run out of core limit 1592 * and then they are actually written out. If we run out of core limit
1582 * we just truncate. 1593 * we just truncate.
1583 */ 1594 */
1584static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, 1595static int elf_fdpic_core_dump(struct coredump_params *cprm)
1585 struct file *file, unsigned long limit)
1586{ 1596{
1587#define NUM_NOTES 6 1597#define NUM_NOTES 6
1588 int has_dumped = 0; 1598 int has_dumped = 0;
@@ -1641,7 +1651,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1641 goto cleanup; 1651 goto cleanup;
1642#endif 1652#endif
1643 1653
1644 if (signr) { 1654 if (cprm->signr) {
1645 struct core_thread *ct; 1655 struct core_thread *ct;
1646 struct elf_thread_status *tmp; 1656 struct elf_thread_status *tmp;
1647 1657
@@ -1660,14 +1670,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1660 int sz; 1670 int sz;
1661 1671
1662 tmp = list_entry(t, struct elf_thread_status, list); 1672 tmp = list_entry(t, struct elf_thread_status, list);
1663 sz = elf_dump_thread_status(signr, tmp); 1673 sz = elf_dump_thread_status(cprm->signr, tmp);
1664 thread_status_size += sz; 1674 thread_status_size += sz;
1665 } 1675 }
1666 } 1676 }
1667 1677
1668 /* now collect the dump for the current */ 1678 /* now collect the dump for the current */
1669 fill_prstatus(prstatus, current, signr); 1679 fill_prstatus(prstatus, current, cprm->signr);
1670 elf_core_copy_regs(&prstatus->pr_reg, regs); 1680 elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
1671 1681
1672 segs = current->mm->map_count; 1682 segs = current->mm->map_count;
1673#ifdef ELF_CORE_EXTRA_PHDRS 1683#ifdef ELF_CORE_EXTRA_PHDRS
@@ -1702,7 +1712,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1702 1712
1703 /* Try to dump the FPU. */ 1713 /* Try to dump the FPU. */
1704 if ((prstatus->pr_fpvalid = 1714 if ((prstatus->pr_fpvalid =
1705 elf_core_copy_task_fpregs(current, regs, fpu))) 1715 elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
1706 fill_note(notes + numnote++, 1716 fill_note(notes + numnote++,
1707 "CORE", NT_PRFPREG, sizeof(*fpu), fpu); 1717 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1708#ifdef ELF_CORE_COPY_XFPREGS 1718#ifdef ELF_CORE_COPY_XFPREGS
@@ -1773,7 +1783,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1773 1783
1774 /* write out the notes section */ 1784 /* write out the notes section */
1775 for (i = 0; i < numnote; i++) 1785 for (i = 0; i < numnote; i++)
1776 if (!writenote(notes + i, file)) 1786 if (!writenote(notes + i, cprm->file))
1777 goto end_coredump; 1787 goto end_coredump;
1778 1788
1779 /* write out the thread status notes section */ 1789 /* write out the thread status notes section */
@@ -1782,25 +1792,26 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1782 list_entry(t, struct elf_thread_status, list); 1792 list_entry(t, struct elf_thread_status, list);
1783 1793
1784 for (i = 0; i < tmp->num_notes; i++) 1794 for (i = 0; i < tmp->num_notes; i++)
1785 if (!writenote(&tmp->notes[i], file)) 1795 if (!writenote(&tmp->notes[i], cprm->file))
1786 goto end_coredump; 1796 goto end_coredump;
1787 } 1797 }
1788 1798
1789 if (!dump_seek(file, dataoff)) 1799 if (!dump_seek(cprm->file, dataoff))
1790 goto end_coredump; 1800 goto end_coredump;
1791 1801
1792 if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) 1802 if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
1803 mm_flags) < 0)
1793 goto end_coredump; 1804 goto end_coredump;
1794 1805
1795#ifdef ELF_CORE_WRITE_EXTRA_DATA 1806#ifdef ELF_CORE_WRITE_EXTRA_DATA
1796 ELF_CORE_WRITE_EXTRA_DATA; 1807 ELF_CORE_WRITE_EXTRA_DATA;
1797#endif 1808#endif
1798 1809
1799 if (file->f_pos != offset) { 1810 if (cprm->file->f_pos != offset) {
1800 /* Sanity check */ 1811 /* Sanity check */
1801 printk(KERN_WARNING 1812 printk(KERN_WARNING
1802 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n", 1813 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
1803 file->f_pos, offset); 1814 cprm->file->f_pos, offset);
1804 } 1815 }
1805 1816
1806end_coredump: 1817end_coredump:
@@ -1825,4 +1836,4 @@ cleanup:
1825#undef NUM_NOTES 1836#undef NUM_NOTES
1826} 1837}
1827 1838
1828#endif /* USE_ELF_CORE_DUMP */ 1839#endif /* CONFIG_ELF_CORE */
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a2796651e75..d4a00ea1054 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
87#endif 87#endif
88 88
89static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs); 89static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
90static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 90static int flat_core_dump(struct coredump_params *cprm);
91 91
92static struct linux_binfmt flat_format = { 92static struct linux_binfmt flat_format = {
93 .module = THIS_MODULE, 93 .module = THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
102 * Currently only a stub-function. 102 * Currently only a stub-function.
103 */ 103 */
104 104
105static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 105static int flat_core_dump(struct coredump_params *cprm)
106{ 106{
107 printk("Process %s:%d received signr %d and should have core dumped\n", 107 printk("Process %s:%d received signr %d and should have core dumped\n",
108 current->comm, current->pid, (int) signr); 108 current->comm, current->pid, (int) cprm->signr);
109 return(1); 109 return(1);
110} 110}
111 111
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9c9e7..2a9b5330cc5 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
43 * don't even try. 43 * don't even try.
44 */ 44 */
45#if 0 45#if 0
46static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit); 46static int som_core_dump(struct coredump_params *cprm);
47#else 47#else
48#define som_core_dump NULL 48#define som_core_dump NULL
49#endif 49#endif
diff --git a/fs/bio.c b/fs/bio.c
index 12da5db8682..76e6713abf9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -272,7 +272,7 @@ EXPORT_SYMBOL(bio_init);
272 * for a &struct bio to become free. If a %NULL @bs is passed in, we will 272 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
273 * fall back to just using @kmalloc to allocate the required memory. 273 * fall back to just using @kmalloc to allocate the required memory.
274 * 274 *
275 * Note that the caller must set ->bi_destructor on succesful return 275 * Note that the caller must set ->bi_destructor on successful return
276 * of a bio, to do the appropriate freeing of the bio once the reference 276 * of a bio, to do the appropriate freeing of the bio once the reference
277 * count drops to zero. 277 * count drops to zero.
278 **/ 278 **/
@@ -1393,6 +1393,18 @@ void bio_check_pages_dirty(struct bio *bio)
1393 } 1393 }
1394} 1394}
1395 1395
1396#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1397void bio_flush_dcache_pages(struct bio *bi)
1398{
1399 int i;
1400 struct bio_vec *bvec;
1401
1402 bio_for_each_segment(bvec, bi, i)
1403 flush_dcache_page(bvec->bv_page);
1404}
1405EXPORT_SYMBOL(bio_flush_dcache_pages);
1406#endif
1407
1396/** 1408/**
1397 * bio_endio - end I/O on a bio 1409 * bio_endio - end I/O on a bio
1398 * @bio: bio 1410 * @bio: bio
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 8bed0557d88..73d6a735b8f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -405,7 +405,17 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
405 405
406static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) 406static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
407{ 407{
408 return sync_blockdev(I_BDEV(filp->f_mapping->host)); 408 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
409 int error;
410
411 error = sync_blockdev(bdev);
412 if (error)
413 return error;
414
415 error = blkdev_issue_flush(bdev, NULL);
416 if (error == -EOPNOTSUPP)
417 error = 0;
418 return error;
409} 419}
410 420
411/* 421/*
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index fa44e92e9b8..54f4798ab46 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
73 return acl; 73 return acl;
74} 74}
75 75
76static int btrfs_xattr_get_acl(struct inode *inode, int type, 76static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
77 void *value, size_t size) 77 void *value, size_t size, int type)
78{ 78{
79 struct posix_acl *acl; 79 struct posix_acl *acl;
80 int ret = 0; 80 int ret = 0;
81 81
82 acl = btrfs_get_acl(inode, type); 82 acl = btrfs_get_acl(dentry->d_inode, type);
83 83
84 if (IS_ERR(acl)) 84 if (IS_ERR(acl))
85 return PTR_ERR(acl); 85 return PTR_ERR(acl);
@@ -153,8 +153,8 @@ out:
153 return ret; 153 return ret;
154} 154}
155 155
156static int btrfs_xattr_set_acl(struct inode *inode, int type, 156static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
157 const void *value, size_t size) 157 const void *value, size_t size, int flags, int type)
158{ 158{
159 int ret; 159 int ret;
160 struct posix_acl *acl = NULL; 160 struct posix_acl *acl = NULL;
@@ -169,38 +169,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
169 } 169 }
170 } 170 }
171 171
172 ret = btrfs_set_acl(NULL, inode, acl, type); 172 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
173 173
174 posix_acl_release(acl); 174 posix_acl_release(acl);
175 175
176 return ret; 176 return ret;
177} 177}
178 178
179
180static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
181 void *value, size_t size)
182{
183 return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
184}
185
186static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
187 const void *value, size_t size, int flags)
188{
189 return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
190}
191
192static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
193 void *value, size_t size)
194{
195 return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
196}
197
198static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
199 const void *value, size_t size, int flags)
200{
201 return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
202}
203
204int btrfs_check_acl(struct inode *inode, int mask) 179int btrfs_check_acl(struct inode *inode, int mask)
205{ 180{
206 struct posix_acl *acl; 181 struct posix_acl *acl;
@@ -307,14 +282,16 @@ int btrfs_acl_chmod(struct inode *inode)
307 282
308struct xattr_handler btrfs_xattr_acl_default_handler = { 283struct xattr_handler btrfs_xattr_acl_default_handler = {
309 .prefix = POSIX_ACL_XATTR_DEFAULT, 284 .prefix = POSIX_ACL_XATTR_DEFAULT,
310 .get = btrfs_xattr_acl_default_get, 285 .flags = ACL_TYPE_DEFAULT,
311 .set = btrfs_xattr_acl_default_set, 286 .get = btrfs_xattr_acl_get,
287 .set = btrfs_xattr_acl_set,
312}; 288};
313 289
314struct xattr_handler btrfs_xattr_acl_access_handler = { 290struct xattr_handler btrfs_xattr_acl_access_handler = {
315 .prefix = POSIX_ACL_XATTR_ACCESS, 291 .prefix = POSIX_ACL_XATTR_ACCESS,
316 .get = btrfs_xattr_acl_access_get, 292 .flags = ACL_TYPE_ACCESS,
317 .set = btrfs_xattr_acl_access_set, 293 .get = btrfs_xattr_acl_get,
294 .set = btrfs_xattr_acl_set,
318}; 295};
319 296
320#else /* CONFIG_BTRFS_FS_POSIX_ACL */ 297#else /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index ccbdcb54ec5..46bea0f4dc7 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -256,7 +256,7 @@ out:
256 * Insert @em into @tree or perform a simple forward/backward merge with 256 * Insert @em into @tree or perform a simple forward/backward merge with
257 * existing mappings. The extent_map struct passed in will be inserted 257 * existing mappings. The extent_map struct passed in will be inserted
258 * into the tree directly, with an additional reference taken, or a 258 * into the tree directly, with an additional reference taken, or a
259 * reference dropped if the merge attempt was sucessfull. 259 * reference dropped if the merge attempt was successfull.
260 */ 260 */
261int add_extent_mapping(struct extent_map_tree *tree, 261int add_extent_mapping(struct extent_map_tree *tree,
262 struct extent_map *em) 262 struct extent_map *em)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ae96fdae1f7..c02033596f0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -832,7 +832,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
832 unsigned long last_index; 832 unsigned long last_index;
833 int will_write; 833 int will_write;
834 834
835 will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || 835 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
836 (file->f_flags & O_DIRECT)); 836 (file->f_flags & O_DIRECT));
837 837
838 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, 838 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
@@ -999,7 +999,7 @@ out_nolock:
999 if (err) 999 if (err)
1000 num_written = err; 1000 num_written = err;
1001 1001
1002 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 1002 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1003 trans = btrfs_start_transaction(root, 1); 1003 trans = btrfs_start_transaction(root, 1);
1004 ret = btrfs_log_dentry_safe(trans, root, 1004 ret = btrfs_log_dentry_safe(trans, root,
1005 file->f_dentry); 1005 file->f_dentry);
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 3797e0077b3..2906077ac79 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -84,7 +84,7 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
84static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) 84static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
85{ 85{
86 struct cachefiles_object *fsdef; 86 struct cachefiles_object *fsdef;
87 struct nameidata nd; 87 struct path path;
88 struct kstatfs stats; 88 struct kstatfs stats;
89 struct dentry *graveyard, *cachedir, *root; 89 struct dentry *graveyard, *cachedir, *root;
90 const struct cred *saved_cred; 90 const struct cred *saved_cred;
@@ -114,15 +114,12 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
114 _debug("- fsdef %p", fsdef); 114 _debug("- fsdef %p", fsdef);
115 115
116 /* look up the directory at the root of the cache */ 116 /* look up the directory at the root of the cache */
117 memset(&nd, 0, sizeof(nd)); 117 ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path);
118
119 ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd);
120 if (ret < 0) 118 if (ret < 0)
121 goto error_open_root; 119 goto error_open_root;
122 120
123 cache->mnt = mntget(nd.path.mnt); 121 cache->mnt = path.mnt;
124 root = dget(nd.path.dentry); 122 root = path.dentry;
125 path_put(&nd.path);
126 123
127 /* check parameters */ 124 /* check parameters */
128 ret = -EOPNOTSUPP; 125 ret = -EOPNOTSUPP;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 4618516dd99..c2413561ea7 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -21,6 +21,7 @@
21#include <linux/mount.h> 21#include <linux/mount.h>
22#include <linux/statfs.h> 22#include <linux/statfs.h>
23#include <linux/ctype.h> 23#include <linux/ctype.h>
24#include <linux/string.h>
24#include <linux/fs_struct.h> 25#include <linux/fs_struct.h>
25#include "internal.h" 26#include "internal.h"
26 27
@@ -257,8 +258,7 @@ static ssize_t cachefiles_daemon_write(struct file *file,
257 if (args == data) 258 if (args == data)
258 goto error; 259 goto error;
259 *args = '\0'; 260 *args = '\0';
260 for (args++; isspace(*args); args++) 261 args = skip_spaces(++args);
261 continue;
262 } 262 }
263 263
264 /* run the appropriate command handler */ 264 /* run the appropriate command handler */
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index a6c8c6fe8df..1d833256386 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/mount.h> 12#include <linux/mount.h>
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/ima.h>
15#include "internal.h" 14#include "internal.h"
16 15
17/* 16/*
@@ -923,7 +922,6 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
923 if (IS_ERR(file)) { 922 if (IS_ERR(file)) {
924 ret = PTR_ERR(file); 923 ret = PTR_ERR(file);
925 } else { 924 } else {
926 ima_counts_get(file);
927 ret = -EIO; 925 ret = -EIO;
928 if (file->f_op->write) { 926 if (file->f_op->write) {
929 pos = (loff_t) page->index << PAGE_SHIFT; 927 pos = (loff_t) page->index << PAGE_SHIFT;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 094ea65afc8..7b2600b380d 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,9 @@ have duplicated data). Fix oops in cifs_lookup. Workaround problem
5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session. 5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
6Disable use of server inode numbers when server only 6Disable use of server inode numbers when server only
7partially supports them (e.g. for one server querying inode numbers on 7partially supports them (e.g. for one server querying inode numbers on
8FindFirst fails but QPathInfo queries works). 8FindFirst fails but QPathInfo queries works). Fix oops with dfs in
9cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
10for OpenOffice when on forcedirectio mount e.g.)
9 11
10Version 1.60 12Version 1.60
11------------- 13-------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 79c1a93400b..a727b7cb075 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -423,7 +423,7 @@ A partial list of the supported mount options follows:
423 source name to use to represent the client netbios machine 423 source name to use to represent the client netbios machine
424 name when doing the RFC1001 netbios session initialize. 424 name when doing the RFC1001 netbios session initialize.
425 direct Do not do inode data caching on files opened on this mount. 425 direct Do not do inode data caching on files opened on this mount.
426 This precludes mmaping files on this mount. In some cases 426 This precludes mmapping files on this mount. In some cases
427 with fast networks and little or no caching benefits on the 427 with fast networks and little or no caching benefits on the
428 client (e.g. when the application is doing large sequential 428 client (e.g. when the application is doing large sequential
429 reads bigger than page size without rereading the same data) 429 reads bigger than page size without rereading the same data)
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index fea9e898c4b..b44ce0a0711 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -269,7 +269,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
269 int err; 269 int err;
270 270
271 mntget(newmnt); 271 mntget(newmnt);
272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist); 272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
273 switch (err) { 273 switch (err) {
274 case 0: 274 case 0:
275 path_put(&nd->path); 275 path_put(&nd->path);
@@ -371,7 +371,6 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
371 if (IS_ERR(mnt)) 371 if (IS_ERR(mnt))
372 goto out_err; 372 goto out_err;
373 373
374 nd->path.mnt->mnt_flags |= MNT_SHRINKABLE;
375 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); 374 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
376 375
377out: 376out:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 29f1da761bb..8c6a0362717 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -758,7 +758,7 @@ const struct file_operations cifs_file_ops = {
758}; 758};
759 759
760const struct file_operations cifs_file_direct_ops = { 760const struct file_operations cifs_file_direct_ops = {
761 /* no mmap, no aio, no readv - 761 /* no aio, no readv -
762 BB reevaluate whether they can be done with directio, no cache */ 762 BB reevaluate whether they can be done with directio, no cache */
763 .read = cifs_user_read, 763 .read = cifs_user_read,
764 .write = cifs_user_write, 764 .write = cifs_user_write,
@@ -767,6 +767,7 @@ const struct file_operations cifs_file_direct_ops = {
767 .lock = cifs_lock, 767 .lock = cifs_lock,
768 .fsync = cifs_fsync, 768 .fsync = cifs_fsync,
769 .flush = cifs_flush, 769 .flush = cifs_flush,
770 .mmap = cifs_file_mmap,
770 .splice_read = generic_file_splice_read, 771 .splice_read = generic_file_splice_read,
771#ifdef CONFIG_CIFS_POSIX 772#ifdef CONFIG_CIFS_POSIX
772 .unlocked_ioctl = cifs_ioctl, 773 .unlocked_ioctl = cifs_ioctl,
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5d0fde18039..4b35f7ec058 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -39,7 +39,7 @@
39 39
40/* 40/*
41 * MAX_REQ is the maximum number of requests that WE will send 41 * MAX_REQ is the maximum number of requests that WE will send
42 * on one socket concurently. It also matches the most common 42 * on one socket concurrently. It also matches the most common
43 * value of max multiplex returned by servers. We may 43 * value of max multiplex returned by servers. We may
44 * eventually want to use the negotiated value (in case 44 * eventually want to use the negotiated value (in case
45 * future servers can handle more) when we are more confident that 45 * future servers can handle more) when we are more confident that
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 2d07f890a84..3877737f96a 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1227,7 +1227,7 @@ typedef struct smb_com_setattr_rsp {
1227/* empty wct response to setattr */ 1227/* empty wct response to setattr */
1228 1228
1229/*******************************************************/ 1229/*******************************************************/
1230/* NT Transact structure defintions follow */ 1230/* NT Transact structure definitions follow */
1231/* Currently only ioctl, acl (get security descriptor) */ 1231/* Currently only ioctl, acl (get security descriptor) */
1232/* and notify are implemented */ 1232/* and notify are implemented */
1233/*******************************************************/ 1233/*******************************************************/
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 63ea83ff687..3bbcaa716b3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2287,12 +2287,12 @@ int
2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2288 char *mount_data_global, const char *devname) 2288 char *mount_data_global, const char *devname)
2289{ 2289{
2290 int rc = 0; 2290 int rc;
2291 int xid; 2291 int xid;
2292 struct smb_vol *volume_info; 2292 struct smb_vol *volume_info;
2293 struct cifsSesInfo *pSesInfo = NULL; 2293 struct cifsSesInfo *pSesInfo;
2294 struct cifsTconInfo *tcon = NULL; 2294 struct cifsTconInfo *tcon;
2295 struct TCP_Server_Info *srvTcp = NULL; 2295 struct TCP_Server_Info *srvTcp;
2296 char *full_path; 2296 char *full_path;
2297 char *mount_data = mount_data_global; 2297 char *mount_data = mount_data_global;
2298#ifdef CONFIG_CIFS_DFS_UPCALL 2298#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -2301,6 +2301,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2301 int referral_walks_count = 0; 2301 int referral_walks_count = 0;
2302try_mount_again: 2302try_mount_again:
2303#endif 2303#endif
2304 rc = 0;
2305 tcon = NULL;
2306 pSesInfo = NULL;
2307 srvTcp = NULL;
2304 full_path = NULL; 2308 full_path = NULL;
2305 2309
2306 xid = GetXid(); 2310 xid = GetXid();
@@ -2597,6 +2601,7 @@ remote_path_check:
2597 2601
2598 cleanup_volume_info(&volume_info); 2602 cleanup_volume_info(&volume_info);
2599 referral_walks_count++; 2603 referral_walks_count++;
2604 FreeXid(xid);
2600 goto try_mount_again; 2605 goto try_mount_again;
2601 } 2606 }
2602#else /* No DFS support, return error on mount */ 2607#else /* No DFS support, return error on mount */
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1f42f772865..6ccf7262d1b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -214,7 +214,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
214 posix_flags |= SMB_O_EXCL; 214 posix_flags |= SMB_O_EXCL;
215 if (oflags & O_TRUNC) 215 if (oflags & O_TRUNC)
216 posix_flags |= SMB_O_TRUNC; 216 posix_flags |= SMB_O_TRUNC;
217 if (oflags & O_SYNC) 217 /* be safe and imply O_SYNC for O_DSYNC */
218 if (oflags & O_DSYNC)
218 posix_flags |= SMB_O_SYNC; 219 posix_flags |= SMB_O_SYNC;
219 if (oflags & O_DIRECTORY) 220 if (oflags & O_DIRECTORY)
220 posix_flags |= SMB_O_DIRECTORY; 221 posix_flags |= SMB_O_DIRECTORY;
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 75949d6a5f1..6177f7cca16 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -24,7 +24,7 @@
24 */ 24 */
25 25
26 /* 26 /*
27 * See Documentation/filesystems/Exporting 27 * See Documentation/filesystems/nfs/Exporting
28 * and examples in fs/exportfs 28 * and examples in fs/exportfs
29 * 29 *
30 * Since cifs is a network file system, an "fsid" must be included for 30 * Since cifs is a network file system, an "fsid" must be included for
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 429337eb7af..057e1dae12a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -76,8 +76,10 @@ static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
76 reopening a file. They had their effect on the original open */ 76 reopening a file. They had their effect on the original open */
77 if (flags & O_APPEND) 77 if (flags & O_APPEND)
78 posix_flags |= (fmode_t)O_APPEND; 78 posix_flags |= (fmode_t)O_APPEND;
79 if (flags & O_SYNC) 79 if (flags & O_DSYNC)
80 posix_flags |= (fmode_t)O_SYNC; 80 posix_flags |= (fmode_t)O_DSYNC;
81 if (flags & __O_SYNC)
82 posix_flags |= (fmode_t)__O_SYNC;
81 if (flags & O_DIRECTORY) 83 if (flags & O_DIRECTORY)
82 posix_flags |= (fmode_t)O_DIRECTORY; 84 posix_flags |= (fmode_t)O_DIRECTORY;
83 if (flags & O_NOFOLLOW) 85 if (flags & O_NOFOLLOW)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index cababd8a52d..cf18ee76559 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -914,8 +914,8 @@ undo_setattr:
914/* 914/*
915 * If dentry->d_inode is null (usually meaning the cached dentry 915 * If dentry->d_inode is null (usually meaning the cached dentry
916 * is a negative dentry) then we would attempt a standard SMB delete, but 916 * is a negative dentry) then we would attempt a standard SMB delete, but
917 * if that fails we can not attempt the fall back mechanisms on EACESS 917 * if that fails we can not attempt the fall back mechanisms on EACCESS
918 * but will return the EACESS to the caller. Note that the VFS does not call 918 * but will return the EACCESS to the caller. Note that the VFS does not call
919 * unlink on negative dentries currently. 919 * unlink on negative dentries currently.
920 */ 920 */
921int cifs_unlink(struct inode *dir, struct dentry *dentry) 921int cifs_unlink(struct inode *dir, struct dentry *dentry)
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index 224a1f47896..b6b6dcb500b 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -371,7 +371,7 @@ E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
371 smbhash(p24 + 16, c8, p21 + 14, 1); 371 smbhash(p24 + 16, c8, p21 + 14, 1);
372} 372}
373 373
374#if 0 /* currently unsued */ 374#if 0 /* currently unused */
375static void 375static void
376D_P16(unsigned char *p14, unsigned char *in, unsigned char *out) 376D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
377{ 377{
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index 43c96ce2961..c6405ce3c50 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -17,28 +17,25 @@ static struct ctl_table_header *fs_table_header;
17 17
18static ctl_table coda_table[] = { 18static ctl_table coda_table[] = {
19 { 19 {
20 .ctl_name = CTL_UNNUMBERED,
21 .procname = "timeout", 20 .procname = "timeout",
22 .data = &coda_timeout, 21 .data = &coda_timeout,
23 .maxlen = sizeof(int), 22 .maxlen = sizeof(int),
24 .mode = 0644, 23 .mode = 0644,
25 .proc_handler = &proc_dointvec 24 .proc_handler = proc_dointvec
26 }, 25 },
27 { 26 {
28 .ctl_name = CTL_UNNUMBERED,
29 .procname = "hard", 27 .procname = "hard",
30 .data = &coda_hard, 28 .data = &coda_hard,
31 .maxlen = sizeof(int), 29 .maxlen = sizeof(int),
32 .mode = 0644, 30 .mode = 0644,
33 .proc_handler = &proc_dointvec 31 .proc_handler = proc_dointvec
34 }, 32 },
35 { 33 {
36 .ctl_name = CTL_UNNUMBERED,
37 .procname = "fake_statfs", 34 .procname = "fake_statfs",
38 .data = &coda_fake_statfs, 35 .data = &coda_fake_statfs,
39 .maxlen = sizeof(int), 36 .maxlen = sizeof(int),
40 .mode = 0600, 37 .mode = 0600,
41 .proc_handler = &proc_dointvec 38 .proc_handler = proc_dointvec
42 }, 39 },
43 {} 40 {}
44}; 41};
@@ -46,7 +43,6 @@ static ctl_table coda_table[] = {
46#ifdef CONFIG_SYSCTL 43#ifdef CONFIG_SYSCTL
47static ctl_table fs_table[] = { 44static ctl_table fs_table[] = {
48 { 45 {
49 .ctl_name = CTL_UNNUMBERED,
50 .procname = "coda", 46 .procname = "coda",
51 .mode = 0555, 47 .mode = 0555,
52 .child = coda_table 48 .child = coda_table
diff --git a/fs/compat.c b/fs/compat.c
index 6c19040ffee..00d90c2e66f 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -38,8 +38,6 @@
38#include <linux/dirent.h> 38#include <linux/dirent.h>
39#include <linux/fsnotify.h> 39#include <linux/fsnotify.h>
40#include <linux/highuid.h> 40#include <linux/highuid.h>
41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/syscall.h> 41#include <linux/nfsd/syscall.h>
44#include <linux/personality.h> 42#include <linux/personality.h>
45#include <linux/rwsem.h> 43#include <linux/rwsem.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index d84e7058c29..332dd00f089 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -111,43 +111,40 @@
111#include <linux/dvb/frontend.h> 111#include <linux/dvb/frontend.h>
112#include <linux/dvb/video.h> 112#include <linux/dvb/video.h>
113 113
114#include <linux/sort.h>
115
114#ifdef CONFIG_SPARC 116#ifdef CONFIG_SPARC
115#include <asm/fbio.h> 117#include <asm/fbio.h>
116#endif 118#endif
117 119
118static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, 120static int w_long(unsigned int fd, unsigned int cmd,
119 unsigned long arg, struct file *f) 121 compat_ulong_t __user *argp)
120{
121 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
122}
123
124static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg)
125{ 122{
126 mm_segment_t old_fs = get_fs(); 123 mm_segment_t old_fs = get_fs();
127 int err; 124 int err;
128 unsigned long val; 125 unsigned long val;
129 126
130 set_fs (KERNEL_DS); 127 set_fs (KERNEL_DS);
131 err = sys_ioctl(fd, cmd, (unsigned long)&val); 128 err = sys_ioctl(fd, cmd, (unsigned long)&val);
132 set_fs (old_fs); 129 set_fs (old_fs);
133 if (!err && put_user(val, (u32 __user *)compat_ptr(arg))) 130 if (!err && put_user(val, argp))
134 return -EFAULT; 131 return -EFAULT;
135 return err; 132 return err;
136} 133}
137 134
138static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg) 135static int rw_long(unsigned int fd, unsigned int cmd,
136 compat_ulong_t __user *argp)
139{ 137{
140 mm_segment_t old_fs = get_fs(); 138 mm_segment_t old_fs = get_fs();
141 u32 __user *argptr = compat_ptr(arg);
142 int err; 139 int err;
143 unsigned long val; 140 unsigned long val;
144 141
145 if(get_user(val, argptr)) 142 if(get_user(val, argp))
146 return -EFAULT; 143 return -EFAULT;
147 set_fs (KERNEL_DS); 144 set_fs (KERNEL_DS);
148 err = sys_ioctl(fd, cmd, (unsigned long)&val); 145 err = sys_ioctl(fd, cmd, (unsigned long)&val);
149 set_fs (old_fs); 146 set_fs (old_fs);
150 if (!err && put_user(val, argptr)) 147 if (!err && put_user(val, argp))
151 return -EFAULT; 148 return -EFAULT;
152 return err; 149 return err;
153} 150}
@@ -161,7 +158,8 @@ struct compat_video_event {
161 } u; 158 } u;
162}; 159};
163 160
164static int do_video_get_event(unsigned int fd, unsigned int cmd, unsigned long arg) 161static int do_video_get_event(unsigned int fd, unsigned int cmd,
162 struct compat_video_event __user *up)
165{ 163{
166 struct video_event kevent; 164 struct video_event kevent;
167 mm_segment_t old_fs = get_fs(); 165 mm_segment_t old_fs = get_fs();
@@ -172,8 +170,6 @@ static int do_video_get_event(unsigned int fd, unsigned int cmd, unsigned long a
172 set_fs(old_fs); 170 set_fs(old_fs);
173 171
174 if (!err) { 172 if (!err) {
175 struct compat_video_event __user *up = compat_ptr(arg);
176
177 err = put_user(kevent.type, &up->type); 173 err = put_user(kevent.type, &up->type);
178 err |= put_user(kevent.timestamp, &up->timestamp); 174 err |= put_user(kevent.timestamp, &up->timestamp);
179 err |= put_user(kevent.u.size.w, &up->u.size.w); 175 err |= put_user(kevent.u.size.w, &up->u.size.w);
@@ -192,15 +188,14 @@ struct compat_video_still_picture {
192 int32_t size; 188 int32_t size;
193}; 189};
194 190
195static int do_video_stillpicture(unsigned int fd, unsigned int cmd, unsigned long arg) 191static int do_video_stillpicture(unsigned int fd, unsigned int cmd,
192 struct compat_video_still_picture __user *up)
196{ 193{
197 struct compat_video_still_picture __user *up;
198 struct video_still_picture __user *up_native; 194 struct video_still_picture __user *up_native;
199 compat_uptr_t fp; 195 compat_uptr_t fp;
200 int32_t size; 196 int32_t size;
201 int err; 197 int err;
202 198
203 up = (struct compat_video_still_picture __user *) arg;
204 err = get_user(fp, &up->iFrame); 199 err = get_user(fp, &up->iFrame);
205 err |= get_user(size, &up->size); 200 err |= get_user(size, &up->size);
206 if (err) 201 if (err)
@@ -224,14 +219,13 @@ struct compat_video_spu_palette {
224 compat_uptr_t palette; 219 compat_uptr_t palette;
225}; 220};
226 221
227static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, unsigned long arg) 222static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
223 struct compat_video_spu_palette __user *up)
228{ 224{
229 struct compat_video_spu_palette __user *up;
230 struct video_spu_palette __user *up_native; 225 struct video_spu_palette __user *up_native;
231 compat_uptr_t palp; 226 compat_uptr_t palp;
232 int length, err; 227 int length, err;
233 228
234 up = (struct compat_video_spu_palette __user *) arg;
235 err = get_user(palp, &up->palette); 229 err = get_user(palp, &up->palette);
236 err |= get_user(length, &up->length); 230 err |= get_user(length, &up->length);
237 231
@@ -246,428 +240,6 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, unsigned
246 return err; 240 return err;
247} 241}
248 242
249#ifdef CONFIG_NET
250static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg)
251{
252 struct compat_timeval __user *up = compat_ptr(arg);
253 struct timeval ktv;
254 mm_segment_t old_fs = get_fs();
255 int err;
256
257 set_fs(KERNEL_DS);
258 err = sys_ioctl(fd, cmd, (unsigned long)&ktv);
259 set_fs(old_fs);
260 if(!err) {
261 err = put_user(ktv.tv_sec, &up->tv_sec);
262 err |= __put_user(ktv.tv_usec, &up->tv_usec);
263 }
264 return err;
265}
266
267static int do_siocgstampns(unsigned int fd, unsigned int cmd, unsigned long arg)
268{
269 struct compat_timespec __user *up = compat_ptr(arg);
270 struct timespec kts;
271 mm_segment_t old_fs = get_fs();
272 int err;
273
274 set_fs(KERNEL_DS);
275 err = sys_ioctl(fd, cmd, (unsigned long)&kts);
276 set_fs(old_fs);
277 if (!err) {
278 err = put_user(kts.tv_sec, &up->tv_sec);
279 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
280 }
281 return err;
282}
283
284struct ifmap32 {
285 compat_ulong_t mem_start;
286 compat_ulong_t mem_end;
287 unsigned short base_addr;
288 unsigned char irq;
289 unsigned char dma;
290 unsigned char port;
291};
292
293struct ifreq32 {
294#define IFHWADDRLEN 6
295#define IFNAMSIZ 16
296 union {
297 char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */
298 } ifr_ifrn;
299 union {
300 struct sockaddr ifru_addr;
301 struct sockaddr ifru_dstaddr;
302 struct sockaddr ifru_broadaddr;
303 struct sockaddr ifru_netmask;
304 struct sockaddr ifru_hwaddr;
305 short ifru_flags;
306 compat_int_t ifru_ivalue;
307 compat_int_t ifru_mtu;
308 struct ifmap32 ifru_map;
309 char ifru_slave[IFNAMSIZ]; /* Just fits the size */
310 char ifru_newname[IFNAMSIZ];
311 compat_caddr_t ifru_data;
312 /* XXXX? ifru_settings should be here */
313 } ifr_ifru;
314};
315
316struct ifconf32 {
317 compat_int_t ifc_len; /* size of buffer */
318 compat_caddr_t ifcbuf;
319};
320
321static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
322{
323 struct ifreq __user *uifr;
324 int err;
325
326 uifr = compat_alloc_user_space(sizeof(struct ifreq));
327 if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32)))
328 return -EFAULT;
329
330 err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr);
331 if (err)
332 return err;
333
334 if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32)))
335 return -EFAULT;
336
337 return 0;
338}
339
340static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg)
341{
342 struct ifconf32 ifc32;
343 struct ifconf ifc;
344 struct ifconf __user *uifc;
345 struct ifreq32 __user *ifr32;
346 struct ifreq __user *ifr;
347 unsigned int i, j;
348 int err;
349
350 if (copy_from_user(&ifc32, compat_ptr(arg), sizeof(struct ifconf32)))
351 return -EFAULT;
352
353 if (ifc32.ifcbuf == 0) {
354 ifc32.ifc_len = 0;
355 ifc.ifc_len = 0;
356 ifc.ifc_req = NULL;
357 uifc = compat_alloc_user_space(sizeof(struct ifconf));
358 } else {
359 size_t len =((ifc32.ifc_len / sizeof (struct ifreq32)) + 1) *
360 sizeof (struct ifreq);
361 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
362 ifc.ifc_len = len;
363 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
364 ifr32 = compat_ptr(ifc32.ifcbuf);
365 for (i = 0; i < ifc32.ifc_len; i += sizeof (struct ifreq32)) {
366 if (copy_in_user(ifr, ifr32, sizeof(struct ifreq32)))
367 return -EFAULT;
368 ifr++;
369 ifr32++;
370 }
371 }
372 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
373 return -EFAULT;
374
375 err = sys_ioctl (fd, SIOCGIFCONF, (unsigned long)uifc);
376 if (err)
377 return err;
378
379 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
380 return -EFAULT;
381
382 ifr = ifc.ifc_req;
383 ifr32 = compat_ptr(ifc32.ifcbuf);
384 for (i = 0, j = 0;
385 i + sizeof (struct ifreq32) <= ifc32.ifc_len && j < ifc.ifc_len;
386 i += sizeof (struct ifreq32), j += sizeof (struct ifreq)) {
387 if (copy_in_user(ifr32, ifr, sizeof (struct ifreq32)))
388 return -EFAULT;
389 ifr32++;
390 ifr++;
391 }
392
393 if (ifc32.ifcbuf == 0) {
394 /* Translate from 64-bit structure multiple to
395 * a 32-bit one.
396 */
397 i = ifc.ifc_len;
398 i = ((i / sizeof(struct ifreq)) * sizeof(struct ifreq32));
399 ifc32.ifc_len = i;
400 } else {
401 ifc32.ifc_len = i;
402 }
403 if (copy_to_user(compat_ptr(arg), &ifc32, sizeof(struct ifconf32)))
404 return -EFAULT;
405
406 return 0;
407}
408
409static int ethtool_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
410{
411 struct ifreq __user *ifr;
412 struct ifreq32 __user *ifr32;
413 u32 data;
414 void __user *datap;
415
416 ifr = compat_alloc_user_space(sizeof(*ifr));
417 ifr32 = compat_ptr(arg);
418
419 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
420 return -EFAULT;
421
422 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
423 return -EFAULT;
424
425 datap = compat_ptr(data);
426 if (put_user(datap, &ifr->ifr_ifru.ifru_data))
427 return -EFAULT;
428
429 return sys_ioctl(fd, cmd, (unsigned long) ifr);
430}
431
432static int bond_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
433{
434 struct ifreq kifr;
435 struct ifreq __user *uifr;
436 struct ifreq32 __user *ifr32 = compat_ptr(arg);
437 mm_segment_t old_fs;
438 int err;
439 u32 data;
440 void __user *datap;
441
442 switch (cmd) {
443 case SIOCBONDENSLAVE:
444 case SIOCBONDRELEASE:
445 case SIOCBONDSETHWADDR:
446 case SIOCBONDCHANGEACTIVE:
447 if (copy_from_user(&kifr, ifr32, sizeof(struct ifreq32)))
448 return -EFAULT;
449
450 old_fs = get_fs();
451 set_fs (KERNEL_DS);
452 err = sys_ioctl (fd, cmd, (unsigned long)&kifr);
453 set_fs (old_fs);
454
455 return err;
456 case SIOCBONDSLAVEINFOQUERY:
457 case SIOCBONDINFOQUERY:
458 uifr = compat_alloc_user_space(sizeof(*uifr));
459 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
460 return -EFAULT;
461
462 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
463 return -EFAULT;
464
465 datap = compat_ptr(data);
466 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
467 return -EFAULT;
468
469 return sys_ioctl (fd, cmd, (unsigned long)uifr);
470 default:
471 return -EINVAL;
472 };
473}
474
475static int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
476{
477 struct ifreq __user *u_ifreq64;
478 struct ifreq32 __user *u_ifreq32 = compat_ptr(arg);
479 char tmp_buf[IFNAMSIZ];
480 void __user *data64;
481 u32 data32;
482
483 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
484 IFNAMSIZ))
485 return -EFAULT;
486 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
487 return -EFAULT;
488 data64 = compat_ptr(data32);
489
490 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
491
492 /* Don't check these user accesses, just let that get trapped
493 * in the ioctl handler instead.
494 */
495 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
496 IFNAMSIZ))
497 return -EFAULT;
498 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
499 return -EFAULT;
500
501 return sys_ioctl(fd, cmd, (unsigned long) u_ifreq64);
502}
503
504static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg)
505{
506 struct ifreq ifr;
507 struct ifreq32 __user *uifr32;
508 struct ifmap32 __user *uifmap32;
509 mm_segment_t old_fs;
510 int err;
511
512 uifr32 = compat_ptr(arg);
513 uifmap32 = &uifr32->ifr_ifru.ifru_map;
514 switch (cmd) {
515 case SIOCSIFMAP:
516 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
517 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
518 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
519 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
520 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
521 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
522 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
523 if (err)
524 return -EFAULT;
525 break;
526 case SIOCSHWTSTAMP:
527 if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
528 return -EFAULT;
529 ifr.ifr_data = compat_ptr(uifr32->ifr_ifru.ifru_data);
530 break;
531 default:
532 if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
533 return -EFAULT;
534 break;
535 }
536 old_fs = get_fs();
537 set_fs (KERNEL_DS);
538 err = sys_ioctl (fd, cmd, (unsigned long)&ifr);
539 set_fs (old_fs);
540 if (!err) {
541 switch (cmd) {
542 /* TUNSETIFF is defined as _IOW, it should be _IORW
543 * as the data is copied back to user space, but that
544 * cannot be fixed without breaking all existing apps.
545 */
546 case TUNSETIFF:
547 case TUNGETIFF:
548 case SIOCGIFFLAGS:
549 case SIOCGIFMETRIC:
550 case SIOCGIFMTU:
551 case SIOCGIFMEM:
552 case SIOCGIFHWADDR:
553 case SIOCGIFINDEX:
554 case SIOCGIFADDR:
555 case SIOCGIFBRDADDR:
556 case SIOCGIFDSTADDR:
557 case SIOCGIFNETMASK:
558 case SIOCGIFTXQLEN:
559 if (copy_to_user(uifr32, &ifr, sizeof(*uifr32)))
560 return -EFAULT;
561 break;
562 case SIOCGIFMAP:
563 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
564 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
565 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
566 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
567 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
568 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
569 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
570 if (err)
571 err = -EFAULT;
572 break;
573 }
574 }
575 return err;
576}
577
578struct rtentry32 {
579 u32 rt_pad1;
580 struct sockaddr rt_dst; /* target address */
581 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
582 struct sockaddr rt_genmask; /* target network mask (IP) */
583 unsigned short rt_flags;
584 short rt_pad2;
585 u32 rt_pad3;
586 unsigned char rt_tos;
587 unsigned char rt_class;
588 short rt_pad4;
589 short rt_metric; /* +1 for binary compatibility! */
590 /* char * */ u32 rt_dev; /* forcing the device at add */
591 u32 rt_mtu; /* per route MTU/Window */
592 u32 rt_window; /* Window clamping */
593 unsigned short rt_irtt; /* Initial RTT */
594
595};
596
597struct in6_rtmsg32 {
598 struct in6_addr rtmsg_dst;
599 struct in6_addr rtmsg_src;
600 struct in6_addr rtmsg_gateway;
601 u32 rtmsg_type;
602 u16 rtmsg_dst_len;
603 u16 rtmsg_src_len;
604 u32 rtmsg_metric;
605 u32 rtmsg_info;
606 u32 rtmsg_flags;
607 s32 rtmsg_ifindex;
608};
609
610static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
611{
612 int ret;
613 void *r = NULL;
614 struct in6_rtmsg r6;
615 struct rtentry r4;
616 char devname[16];
617 u32 rtdev;
618 mm_segment_t old_fs = get_fs();
619
620 struct socket *mysock = sockfd_lookup(fd, &ret);
621
622 if (mysock && mysock->sk && mysock->sk->sk_family == AF_INET6) { /* ipv6 */
623 struct in6_rtmsg32 __user *ur6 = compat_ptr(arg);
624 ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
625 3 * sizeof(struct in6_addr));
626 ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
627 ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
628 ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
629 ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
630 ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
631 ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
632 ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
633
634 r = (void *) &r6;
635 } else { /* ipv4 */
636 struct rtentry32 __user *ur4 = compat_ptr(arg);
637 ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
638 3 * sizeof(struct sockaddr));
639 ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
640 ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
641 ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
642 ret |= __get_user (r4.rt_window, &(ur4->rt_window));
643 ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
644 ret |= __get_user (rtdev, &(ur4->rt_dev));
645 if (rtdev) {
646 ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
647 r4.rt_dev = devname; devname[15] = 0;
648 } else
649 r4.rt_dev = NULL;
650
651 r = (void *) &r4;
652 }
653
654 if (ret) {
655 ret = -EFAULT;
656 goto out;
657 }
658
659 set_fs (KERNEL_DS);
660 ret = sys_ioctl (fd, cmd, (unsigned long) r);
661 set_fs (old_fs);
662
663out:
664 if (mysock)
665 sockfd_put(mysock);
666
667 return ret;
668}
669#endif
670
671#ifdef CONFIG_BLOCK 243#ifdef CONFIG_BLOCK
672typedef struct sg_io_hdr32 { 244typedef struct sg_io_hdr32 {
673 compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ 245 compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */
@@ -721,16 +293,15 @@ static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iov
721 return 0; 293 return 0;
722} 294}
723 295
724static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 296static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
297 sg_io_hdr32_t __user *sgio32)
725{ 298{
726 sg_io_hdr_t __user *sgio; 299 sg_io_hdr_t __user *sgio;
727 sg_io_hdr32_t __user *sgio32;
728 u16 iovec_count; 300 u16 iovec_count;
729 u32 data; 301 u32 data;
730 void __user *dxferp; 302 void __user *dxferp;
731 int err; 303 int err;
732 304
733 sgio32 = compat_ptr(arg);
734 if (get_user(iovec_count, &sgio32->iovec_count)) 305 if (get_user(iovec_count, &sgio32->iovec_count))
735 return -EFAULT; 306 return -EFAULT;
736 307
@@ -820,11 +391,11 @@ struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
820 int unused; 391 int unused;
821}; 392};
822 393
823static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 394static int sg_grt_trans(unsigned int fd, unsigned int cmd, struct
395 compat_sg_req_info __user *o)
824{ 396{
825 int err, i; 397 int err, i;
826 sg_req_info_t __user *r; 398 sg_req_info_t __user *r;
827 struct compat_sg_req_info __user *o = (void __user *)arg;
828 r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE); 399 r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE);
829 err = sys_ioctl(fd,cmd,(unsigned long)r); 400 err = sys_ioctl(fd,cmd,(unsigned long)r);
830 if (err < 0) 401 if (err < 0)
@@ -852,9 +423,9 @@ struct sock_fprog32 {
852#define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32) 423#define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32)
853#define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32) 424#define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32)
854 425
855static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 426static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd,
427 struct sock_fprog32 __user *u_fprog32)
856{ 428{
857 struct sock_fprog32 __user *u_fprog32 = compat_ptr(arg);
858 struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog)); 429 struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog));
859 void __user *fptr64; 430 void __user *fptr64;
860 u32 fptr32; 431 u32 fptr32;
@@ -891,15 +462,14 @@ struct ppp_idle32 {
891}; 462};
892#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32) 463#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32)
893 464
894static int ppp_gidle(unsigned int fd, unsigned int cmd, unsigned long arg) 465static int ppp_gidle(unsigned int fd, unsigned int cmd,
466 struct ppp_idle32 __user *idle32)
895{ 467{
896 struct ppp_idle __user *idle; 468 struct ppp_idle __user *idle;
897 struct ppp_idle32 __user *idle32;
898 __kernel_time_t xmit, recv; 469 __kernel_time_t xmit, recv;
899 int err; 470 int err;
900 471
901 idle = compat_alloc_user_space(sizeof(*idle)); 472 idle = compat_alloc_user_space(sizeof(*idle));
902 idle32 = compat_ptr(arg);
903 473
904 err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle); 474 err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle);
905 475
@@ -913,15 +483,14 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd, unsigned long arg)
913 return err; 483 return err;
914} 484}
915 485
916static int ppp_scompress(unsigned int fd, unsigned int cmd, unsigned long arg) 486static int ppp_scompress(unsigned int fd, unsigned int cmd,
487 struct ppp_option_data32 __user *odata32)
917{ 488{
918 struct ppp_option_data __user *odata; 489 struct ppp_option_data __user *odata;
919 struct ppp_option_data32 __user *odata32;
920 __u32 data; 490 __u32 data;
921 void __user *datap; 491 void __user *datap;
922 492
923 odata = compat_alloc_user_space(sizeof(*odata)); 493 odata = compat_alloc_user_space(sizeof(*odata));
924 odata32 = compat_ptr(arg);
925 494
926 if (get_user(data, &odata32->ptr)) 495 if (get_user(data, &odata32->ptr))
927 return -EFAULT; 496 return -EFAULT;
@@ -937,35 +506,6 @@ static int ppp_scompress(unsigned int fd, unsigned int cmd, unsigned long arg)
937 return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata); 506 return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata);
938} 507}
939 508
940static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
941{
942 int err;
943
944 switch (cmd) {
945 case PPPIOCGIDLE32:
946 err = ppp_gidle(fd, cmd, arg);
947 break;
948
949 case PPPIOCSCOMPRESS32:
950 err = ppp_scompress(fd, cmd, arg);
951 break;
952
953 default:
954 do {
955 static int count;
956 if (++count <= 20)
957 printk("ppp_ioctl: Unknown cmd fd(%d) "
958 "cmd(%08x) arg(%08x)\n",
959 (int)fd, (unsigned int)cmd, (unsigned int)arg);
960 } while(0);
961 err = -EINVAL;
962 break;
963 };
964
965 return err;
966}
967
968
969#ifdef CONFIG_BLOCK 509#ifdef CONFIG_BLOCK
970struct mtget32 { 510struct mtget32 {
971 compat_long_t mt_type; 511 compat_long_t mt_type;
@@ -983,7 +523,7 @@ struct mtpos32 {
983}; 523};
984#define MTIOCPOS32 _IOR('m', 3, struct mtpos32) 524#define MTIOCPOS32 _IOR('m', 3, struct mtpos32)
985 525
986static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 526static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
987{ 527{
988 mm_segment_t old_fs = get_fs(); 528 mm_segment_t old_fs = get_fs();
989 struct mtget get; 529 struct mtget get;
@@ -1003,15 +543,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1003 kcmd = MTIOCGET; 543 kcmd = MTIOCGET;
1004 karg = &get; 544 karg = &get;
1005 break; 545 break;
1006 default:
1007 do {
1008 static int count;
1009 if (++count <= 20)
1010 printk("mt_ioctl: Unknown cmd fd(%d) "
1011 "cmd(%08x) arg(%08x)\n",
1012 (int)fd, (unsigned int)cmd, (unsigned int)arg);
1013 } while(0);
1014 return -EINVAL;
1015 } 546 }
1016 set_fs (KERNEL_DS); 547 set_fs (KERNEL_DS);
1017 err = sys_ioctl (fd, kcmd, (unsigned long)karg); 548 err = sys_ioctl (fd, kcmd, (unsigned long)karg);
@@ -1020,11 +551,11 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1020 return err; 551 return err;
1021 switch (cmd) { 552 switch (cmd) {
1022 case MTIOCPOS32: 553 case MTIOCPOS32:
1023 upos32 = compat_ptr(arg); 554 upos32 = argp;
1024 err = __put_user(pos.mt_blkno, &upos32->mt_blkno); 555 err = __put_user(pos.mt_blkno, &upos32->mt_blkno);
1025 break; 556 break;
1026 case MTIOCGET32: 557 case MTIOCGET32:
1027 umget32 = compat_ptr(arg); 558 umget32 = argp;
1028 err = __put_user(get.mt_type, &umget32->mt_type); 559 err = __put_user(get.mt_type, &umget32->mt_type);
1029 err |= __put_user(get.mt_resid, &umget32->mt_resid); 560 err |= __put_user(get.mt_resid, &umget32->mt_resid);
1030 err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg); 561 err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg);
@@ -1039,162 +570,8 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1039 570
1040#endif /* CONFIG_BLOCK */ 571#endif /* CONFIG_BLOCK */
1041 572
1042#ifdef CONFIG_VT 573static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
1043 574 compat_uid_t __user *argp)
1044static int vt_check(struct file *file)
1045{
1046 struct tty_struct *tty;
1047 struct inode *inode = file->f_path.dentry->d_inode;
1048 struct vc_data *vc;
1049
1050 if (file->f_op->unlocked_ioctl != tty_ioctl)
1051 return -EINVAL;
1052
1053 tty = (struct tty_struct *)file->private_data;
1054 if (tty_paranoia_check(tty, inode, "tty_ioctl"))
1055 return -EINVAL;
1056
1057 if (tty->ops->ioctl != vt_ioctl)
1058 return -EINVAL;
1059
1060 vc = (struct vc_data *)tty->driver_data;
1061 if (!vc_cons_allocated(vc->vc_num)) /* impossible? */
1062 return -ENOIOCTLCMD;
1063
1064 /*
1065 * To have permissions to do most of the vt ioctls, we either have
1066 * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG.
1067 */
1068 if (current->signal->tty == tty || capable(CAP_SYS_TTY_CONFIG))
1069 return 1;
1070 return 0;
1071}
1072
1073struct consolefontdesc32 {
1074 unsigned short charcount; /* characters in font (256 or 512) */
1075 unsigned short charheight; /* scan lines per character (1-32) */
1076 compat_caddr_t chardata; /* font data in expanded form */
1077};
1078
1079static int do_fontx_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file)
1080{
1081 struct consolefontdesc32 __user *user_cfd = compat_ptr(arg);
1082 struct console_font_op op;
1083 compat_caddr_t data;
1084 int i, perm;
1085
1086 perm = vt_check(file);
1087 if (perm < 0) return perm;
1088
1089 switch (cmd) {
1090 case PIO_FONTX:
1091 if (!perm)
1092 return -EPERM;
1093 op.op = KD_FONT_OP_SET;
1094 op.flags = 0;
1095 op.width = 8;
1096 if (get_user(op.height, &user_cfd->charheight) ||
1097 get_user(op.charcount, &user_cfd->charcount) ||
1098 get_user(data, &user_cfd->chardata))
1099 return -EFAULT;
1100 op.data = compat_ptr(data);
1101 return con_font_op(vc_cons[fg_console].d, &op);
1102 case GIO_FONTX:
1103 op.op = KD_FONT_OP_GET;
1104 op.flags = 0;
1105 op.width = 8;
1106 if (get_user(op.height, &user_cfd->charheight) ||
1107 get_user(op.charcount, &user_cfd->charcount) ||
1108 get_user(data, &user_cfd->chardata))
1109 return -EFAULT;
1110 if (!data)
1111 return 0;
1112 op.data = compat_ptr(data);
1113 i = con_font_op(vc_cons[fg_console].d, &op);
1114 if (i)
1115 return i;
1116 if (put_user(op.height, &user_cfd->charheight) ||
1117 put_user(op.charcount, &user_cfd->charcount) ||
1118 put_user((compat_caddr_t)(unsigned long)op.data,
1119 &user_cfd->chardata))
1120 return -EFAULT;
1121 return 0;
1122 }
1123 return -EINVAL;
1124}
1125
1126struct console_font_op32 {
1127 compat_uint_t op; /* operation code KD_FONT_OP_* */
1128 compat_uint_t flags; /* KD_FONT_FLAG_* */
1129 compat_uint_t width, height; /* font size */
1130 compat_uint_t charcount;
1131 compat_caddr_t data; /* font data with height fixed to 32 */
1132};
1133
1134static int do_kdfontop_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file)
1135{
1136 struct console_font_op op;
1137 struct console_font_op32 __user *fontop = compat_ptr(arg);
1138 int perm = vt_check(file), i;
1139 struct vc_data *vc;
1140
1141 if (perm < 0) return perm;
1142
1143 if (copy_from_user(&op, fontop, sizeof(struct console_font_op32)))
1144 return -EFAULT;
1145 if (!perm && op.op != KD_FONT_OP_GET)
1146 return -EPERM;
1147 op.data = compat_ptr(((struct console_font_op32 *)&op)->data);
1148 op.flags |= KD_FONT_FLAG_OLD;
1149 vc = ((struct tty_struct *)file->private_data)->driver_data;
1150 i = con_font_op(vc, &op);
1151 if (i)
1152 return i;
1153 ((struct console_font_op32 *)&op)->data = (unsigned long)op.data;
1154 if (copy_to_user(fontop, &op, sizeof(struct console_font_op32)))
1155 return -EFAULT;
1156 return 0;
1157}
1158
1159struct unimapdesc32 {
1160 unsigned short entry_ct;
1161 compat_caddr_t entries;
1162};
1163
1164static int do_unimap_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file)
1165{
1166 struct unimapdesc32 tmp;
1167 struct unimapdesc32 __user *user_ud = compat_ptr(arg);
1168 int perm = vt_check(file);
1169 struct vc_data *vc;
1170
1171 if (perm < 0)
1172 return perm;
1173 if (copy_from_user(&tmp, user_ud, sizeof tmp))
1174 return -EFAULT;
1175 if (tmp.entries)
1176 if (!access_ok(VERIFY_WRITE, compat_ptr(tmp.entries),
1177 tmp.entry_ct*sizeof(struct unipair)))
1178 return -EFAULT;
1179 vc = ((struct tty_struct *)file->private_data)->driver_data;
1180 switch (cmd) {
1181 case PIO_UNIMAP:
1182 if (!perm)
1183 return -EPERM;
1184 return con_set_unimap(vc, tmp.entry_ct,
1185 compat_ptr(tmp.entries));
1186 case GIO_UNIMAP:
1187 if (!perm && fg_console != vc->vc_num)
1188 return -EPERM;
1189 return con_get_unimap(vc, tmp.entry_ct, &(user_ud->entry_ct),
1190 compat_ptr(tmp.entries));
1191 }
1192 return 0;
1193}
1194
1195#endif /* CONFIG_VT */
1196
1197static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long arg)
1198{ 575{
1199 mm_segment_t old_fs = get_fs(); 576 mm_segment_t old_fs = get_fs();
1200 __kernel_uid_t kuid; 577 __kernel_uid_t kuid;
@@ -1207,184 +584,15 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long a
1207 set_fs(old_fs); 584 set_fs(old_fs);
1208 585
1209 if (err >= 0) 586 if (err >= 0)
1210 err = put_user(kuid, (compat_uid_t __user *)compat_ptr(arg)); 587 err = put_user(kuid, argp);
1211 588
1212 return err; 589 return err;
1213} 590}
1214 591
1215struct atmif_sioc32 { 592static int ioc_settimeout(unsigned int fd, unsigned int cmd,
1216 compat_int_t number; 593 compat_ulong_t __user *argp)
1217 compat_int_t length;
1218 compat_caddr_t arg;
1219};
1220
1221struct atm_iobuf32 {
1222 compat_int_t length;
1223 compat_caddr_t buffer;
1224};
1225
1226#define ATM_GETLINKRATE32 _IOW('a', ATMIOC_ITF+1, struct atmif_sioc32)
1227#define ATM_GETNAMES32 _IOW('a', ATMIOC_ITF+3, struct atm_iobuf32)
1228#define ATM_GETTYPE32 _IOW('a', ATMIOC_ITF+4, struct atmif_sioc32)
1229#define ATM_GETESI32 _IOW('a', ATMIOC_ITF+5, struct atmif_sioc32)
1230#define ATM_GETADDR32 _IOW('a', ATMIOC_ITF+6, struct atmif_sioc32)
1231#define ATM_RSTADDR32 _IOW('a', ATMIOC_ITF+7, struct atmif_sioc32)
1232#define ATM_ADDADDR32 _IOW('a', ATMIOC_ITF+8, struct atmif_sioc32)
1233#define ATM_DELADDR32 _IOW('a', ATMIOC_ITF+9, struct atmif_sioc32)
1234#define ATM_GETCIRANGE32 _IOW('a', ATMIOC_ITF+10, struct atmif_sioc32)
1235#define ATM_SETCIRANGE32 _IOW('a', ATMIOC_ITF+11, struct atmif_sioc32)
1236#define ATM_SETESI32 _IOW('a', ATMIOC_ITF+12, struct atmif_sioc32)
1237#define ATM_SETESIF32 _IOW('a', ATMIOC_ITF+13, struct atmif_sioc32)
1238#define ATM_GETSTAT32 _IOW('a', ATMIOC_SARCOM+0, struct atmif_sioc32)
1239#define ATM_GETSTATZ32 _IOW('a', ATMIOC_SARCOM+1, struct atmif_sioc32)
1240#define ATM_GETLOOP32 _IOW('a', ATMIOC_SARCOM+2, struct atmif_sioc32)
1241#define ATM_SETLOOP32 _IOW('a', ATMIOC_SARCOM+3, struct atmif_sioc32)
1242#define ATM_QUERYLOOP32 _IOW('a', ATMIOC_SARCOM+4, struct atmif_sioc32)
1243
1244static struct {
1245 unsigned int cmd32;
1246 unsigned int cmd;
1247} atm_ioctl_map[] = {
1248 { ATM_GETLINKRATE32, ATM_GETLINKRATE },
1249 { ATM_GETNAMES32, ATM_GETNAMES },
1250 { ATM_GETTYPE32, ATM_GETTYPE },
1251 { ATM_GETESI32, ATM_GETESI },
1252 { ATM_GETADDR32, ATM_GETADDR },
1253 { ATM_RSTADDR32, ATM_RSTADDR },
1254 { ATM_ADDADDR32, ATM_ADDADDR },
1255 { ATM_DELADDR32, ATM_DELADDR },
1256 { ATM_GETCIRANGE32, ATM_GETCIRANGE },
1257 { ATM_SETCIRANGE32, ATM_SETCIRANGE },
1258 { ATM_SETESI32, ATM_SETESI },
1259 { ATM_SETESIF32, ATM_SETESIF },
1260 { ATM_GETSTAT32, ATM_GETSTAT },
1261 { ATM_GETSTATZ32, ATM_GETSTATZ },
1262 { ATM_GETLOOP32, ATM_GETLOOP },
1263 { ATM_SETLOOP32, ATM_SETLOOP },
1264 { ATM_QUERYLOOP32, ATM_QUERYLOOP }
1265};
1266
1267#define NR_ATM_IOCTL ARRAY_SIZE(atm_ioctl_map)
1268
1269static int do_atm_iobuf(unsigned int fd, unsigned int cmd, unsigned long arg)
1270{ 594{
1271 struct atm_iobuf __user *iobuf; 595 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, argp);
1272 struct atm_iobuf32 __user *iobuf32;
1273 u32 data;
1274 void __user *datap;
1275 int len, err;
1276
1277 iobuf = compat_alloc_user_space(sizeof(*iobuf));
1278 iobuf32 = compat_ptr(arg);
1279
1280 if (get_user(len, &iobuf32->length) ||
1281 get_user(data, &iobuf32->buffer))
1282 return -EFAULT;
1283 datap = compat_ptr(data);
1284 if (put_user(len, &iobuf->length) ||
1285 put_user(datap, &iobuf->buffer))
1286 return -EFAULT;
1287
1288 err = sys_ioctl(fd, cmd, (unsigned long)iobuf);
1289
1290 if (!err) {
1291 if (copy_in_user(&iobuf32->length, &iobuf->length,
1292 sizeof(int)))
1293 err = -EFAULT;
1294 }
1295
1296 return err;
1297}
1298
1299static int do_atmif_sioc(unsigned int fd, unsigned int cmd, unsigned long arg)
1300{
1301 struct atmif_sioc __user *sioc;
1302 struct atmif_sioc32 __user *sioc32;
1303 u32 data;
1304 void __user *datap;
1305 int err;
1306
1307 sioc = compat_alloc_user_space(sizeof(*sioc));
1308 sioc32 = compat_ptr(arg);
1309
1310 if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) ||
1311 get_user(data, &sioc32->arg))
1312 return -EFAULT;
1313 datap = compat_ptr(data);
1314 if (put_user(datap, &sioc->arg))
1315 return -EFAULT;
1316
1317 err = sys_ioctl(fd, cmd, (unsigned long) sioc);
1318
1319 if (!err) {
1320 if (copy_in_user(&sioc32->length, &sioc->length,
1321 sizeof(int)))
1322 err = -EFAULT;
1323 }
1324 return err;
1325}
1326
1327static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg)
1328{
1329 int i;
1330 unsigned int cmd = 0;
1331
1332 switch (cmd32) {
1333 case SONET_GETSTAT:
1334 case SONET_GETSTATZ:
1335 case SONET_GETDIAG:
1336 case SONET_SETDIAG:
1337 case SONET_CLRDIAG:
1338 case SONET_SETFRAMING:
1339 case SONET_GETFRAMING:
1340 case SONET_GETFRSENSE:
1341 return do_atmif_sioc(fd, cmd32, arg);
1342 }
1343
1344 for (i = 0; i < NR_ATM_IOCTL; i++) {
1345 if (cmd32 == atm_ioctl_map[i].cmd32) {
1346 cmd = atm_ioctl_map[i].cmd;
1347 break;
1348 }
1349 }
1350 if (i == NR_ATM_IOCTL)
1351 return -EINVAL;
1352
1353 switch (cmd) {
1354 case ATM_GETNAMES:
1355 return do_atm_iobuf(fd, cmd, arg);
1356
1357 case ATM_GETLINKRATE:
1358 case ATM_GETTYPE:
1359 case ATM_GETESI:
1360 case ATM_GETADDR:
1361 case ATM_RSTADDR:
1362 case ATM_ADDADDR:
1363 case ATM_DELADDR:
1364 case ATM_GETCIRANGE:
1365 case ATM_SETCIRANGE:
1366 case ATM_SETESI:
1367 case ATM_SETESIF:
1368 case ATM_GETSTAT:
1369 case ATM_GETSTATZ:
1370 case ATM_GETLOOP:
1371 case ATM_SETLOOP:
1372 case ATM_QUERYLOOP:
1373 return do_atmif_sioc(fd, cmd, arg);
1374 }
1375
1376 return -EINVAL;
1377}
1378
1379static __used int
1380ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
1381{
1382 return -EINVAL;
1383}
1384
1385static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
1386{
1387 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
1388} 596}
1389 597
1390/* Bluetooth ioctls */ 598/* Bluetooth ioctls */
@@ -1442,7 +650,8 @@ static int set_raw32_request(struct raw_config_request *req, struct raw32_config
1442 return ret ? -EFAULT : 0; 650 return ret ? -EFAULT : 0;
1443} 651}
1444 652
1445static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg) 653static int raw_ioctl(unsigned fd, unsigned cmd,
654 struct raw32_config_request __user *user_req)
1446{ 655{
1447 int ret; 656 int ret;
1448 657
@@ -1450,7 +659,6 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
1450 case RAW_SETBIND: 659 case RAW_SETBIND:
1451 case RAW_GETBIND: { 660 case RAW_GETBIND: {
1452 struct raw_config_request req; 661 struct raw_config_request req;
1453 struct raw32_config_request __user *user_req = compat_ptr(arg);
1454 mm_segment_t oldfs = get_fs(); 662 mm_segment_t oldfs = get_fs();
1455 663
1456 if ((ret = get_raw32_request(&req, user_req))) 664 if ((ret = get_raw32_request(&req, user_req)))
@@ -1465,9 +673,6 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
1465 } 673 }
1466 break; 674 break;
1467 } 675 }
1468 default:
1469 ret = sys_ioctl(fd, cmd, arg);
1470 break;
1471 } 676 }
1472 return ret; 677 return ret;
1473} 678}
@@ -1495,11 +700,11 @@ struct serial_struct32 {
1495 compat_int_t reserved[1]; 700 compat_int_t reserved[1];
1496}; 701};
1497 702
1498static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg) 703static int serial_struct_ioctl(unsigned fd, unsigned cmd,
704 struct serial_struct32 __user *ss32)
1499{ 705{
1500 typedef struct serial_struct SS; 706 typedef struct serial_struct SS;
1501 typedef struct serial_struct32 SS32; 707 typedef struct serial_struct32 SS32;
1502 struct serial_struct32 __user *ss32 = compat_ptr(arg);
1503 int err; 708 int err;
1504 struct serial_struct ss; 709 struct serial_struct ss;
1505 mm_segment_t oldseg = get_fs(); 710 mm_segment_t oldseg = get_fs();
@@ -1537,96 +742,6 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
1537 return err; 742 return err;
1538} 743}
1539 744
1540struct usbdevfs_ctrltransfer32 {
1541 u8 bRequestType;
1542 u8 bRequest;
1543 u16 wValue;
1544 u16 wIndex;
1545 u16 wLength;
1546 u32 timeout; /* in milliseconds */
1547 compat_caddr_t data;
1548};
1549
1550#define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
1551
1552static int do_usbdevfs_control(unsigned int fd, unsigned int cmd, unsigned long arg)
1553{
1554 struct usbdevfs_ctrltransfer32 __user *p32 = compat_ptr(arg);
1555 struct usbdevfs_ctrltransfer __user *p;
1556 __u32 udata;
1557 p = compat_alloc_user_space(sizeof(*p));
1558 if (copy_in_user(p, p32, (sizeof(*p32) - sizeof(compat_caddr_t))) ||
1559 get_user(udata, &p32->data) ||
1560 put_user(compat_ptr(udata), &p->data))
1561 return -EFAULT;
1562 return sys_ioctl(fd, USBDEVFS_CONTROL, (unsigned long)p);
1563}
1564
1565
1566struct usbdevfs_bulktransfer32 {
1567 compat_uint_t ep;
1568 compat_uint_t len;
1569 compat_uint_t timeout; /* in milliseconds */
1570 compat_caddr_t data;
1571};
1572
1573#define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32)
1574
1575static int do_usbdevfs_bulk(unsigned int fd, unsigned int cmd, unsigned long arg)
1576{
1577 struct usbdevfs_bulktransfer32 __user *p32 = compat_ptr(arg);
1578 struct usbdevfs_bulktransfer __user *p;
1579 compat_uint_t n;
1580 compat_caddr_t addr;
1581
1582 p = compat_alloc_user_space(sizeof(*p));
1583
1584 if (get_user(n, &p32->ep) || put_user(n, &p->ep) ||
1585 get_user(n, &p32->len) || put_user(n, &p->len) ||
1586 get_user(n, &p32->timeout) || put_user(n, &p->timeout) ||
1587 get_user(addr, &p32->data) || put_user(compat_ptr(addr), &p->data))
1588 return -EFAULT;
1589
1590 return sys_ioctl(fd, USBDEVFS_BULK, (unsigned long)p);
1591}
1592
1593
1594/*
1595 * USBDEVFS_SUBMITURB, USBDEVFS_REAPURB and USBDEVFS_REAPURBNDELAY
1596 * are handled in usbdevfs core. -Christopher Li
1597 */
1598
1599struct usbdevfs_disconnectsignal32 {
1600 compat_int_t signr;
1601 compat_caddr_t context;
1602};
1603
1604#define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32)
1605
1606static int do_usbdevfs_discsignal(unsigned int fd, unsigned int cmd, unsigned long arg)
1607{
1608 struct usbdevfs_disconnectsignal kdis;
1609 struct usbdevfs_disconnectsignal32 __user *udis;
1610 mm_segment_t old_fs;
1611 u32 uctx;
1612 int err;
1613
1614 udis = compat_ptr(arg);
1615
1616 if (get_user(kdis.signr, &udis->signr) ||
1617 __get_user(uctx, &udis->context))
1618 return -EFAULT;
1619
1620 kdis.context = compat_ptr(uctx);
1621
1622 old_fs = get_fs();
1623 set_fs(KERNEL_DS);
1624 err = sys_ioctl(fd, USBDEVFS_DISCSIGNAL, (unsigned long) &kdis);
1625 set_fs(old_fs);
1626
1627 return err;
1628}
1629
1630/* 745/*
1631 * I2C layer ioctls 746 * I2C layer ioctls
1632 */ 747 */
@@ -1655,9 +770,9 @@ struct i2c_rdwr_aligned {
1655 struct i2c_msg msgs[0]; 770 struct i2c_msg msgs[0];
1656}; 771};
1657 772
1658static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) 773static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
774 struct i2c_rdwr_ioctl_data32 __user *udata)
1659{ 775{
1660 struct i2c_rdwr_ioctl_data32 __user *udata = compat_ptr(arg);
1661 struct i2c_rdwr_aligned __user *tdata; 776 struct i2c_rdwr_aligned __user *tdata;
1662 struct i2c_msg __user *tmsgs; 777 struct i2c_msg __user *tmsgs;
1663 struct i2c_msg32 __user *umsgs; 778 struct i2c_msg32 __user *umsgs;
@@ -1691,10 +806,10 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar
1691 return sys_ioctl(fd, cmd, (unsigned long)tdata); 806 return sys_ioctl(fd, cmd, (unsigned long)tdata);
1692} 807}
1693 808
1694static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) 809static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
810 struct i2c_smbus_ioctl_data32 __user *udata)
1695{ 811{
1696 struct i2c_smbus_ioctl_data __user *tdata; 812 struct i2c_smbus_ioctl_data __user *tdata;
1697 struct i2c_smbus_ioctl_data32 __user *udata;
1698 compat_caddr_t datap; 813 compat_caddr_t datap;
1699 814
1700 tdata = compat_alloc_user_space(sizeof(*tdata)); 815 tdata = compat_alloc_user_space(sizeof(*tdata));
@@ -1703,7 +818,6 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
1703 if (!access_ok(VERIFY_WRITE, tdata, sizeof(*tdata))) 818 if (!access_ok(VERIFY_WRITE, tdata, sizeof(*tdata)))
1704 return -EFAULT; 819 return -EFAULT;
1705 820
1706 udata = compat_ptr(arg);
1707 if (!access_ok(VERIFY_READ, udata, sizeof(*udata))) 821 if (!access_ok(VERIFY_READ, udata, sizeof(*udata)))
1708 return -EFAULT; 822 return -EFAULT;
1709 823
@@ -1718,27 +832,12 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
1718 return sys_ioctl(fd, cmd, (unsigned long)tdata); 832 return sys_ioctl(fd, cmd, (unsigned long)tdata);
1719} 833}
1720 834
1721/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
1722 * for some operations; this forces use of the newer bridge-utils that
1723 * use compatible ioctls
1724 */
1725static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
1726{
1727 u32 tmp;
1728
1729 if (get_user(tmp, (u32 __user *) arg))
1730 return -EFAULT;
1731 if (tmp == BRCTL_GET_VERSION)
1732 return BRCTL_VERSION + 1;
1733 return -EINVAL;
1734}
1735
1736#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t) 835#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t)
1737#define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t) 836#define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t)
1738#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t) 837#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
1739#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t) 838#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t)
1740 839
1741static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg) 840static int rtc_ioctl(unsigned fd, unsigned cmd, void __user *argp)
1742{ 841{
1743 mm_segment_t oldfs = get_fs(); 842 mm_segment_t oldfs = get_fs();
1744 compat_ulong_t val32; 843 compat_ulong_t val32;
@@ -1756,29 +855,14 @@ static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
1756 if (ret) 855 if (ret)
1757 return ret; 856 return ret;
1758 val32 = kval; 857 val32 = kval;
1759 return put_user(val32, (unsigned int __user *)arg); 858 return put_user(val32, (unsigned int __user *)argp);
1760 case RTC_IRQP_SET32: 859 case RTC_IRQP_SET32:
1761 return sys_ioctl(fd, RTC_IRQP_SET, arg); 860 return sys_ioctl(fd, RTC_IRQP_SET, (unsigned long)argp);
1762 case RTC_EPOCH_SET32: 861 case RTC_EPOCH_SET32:
1763 return sys_ioctl(fd, RTC_EPOCH_SET, arg); 862 return sys_ioctl(fd, RTC_EPOCH_SET, (unsigned long)argp);
1764 default:
1765 /* unreached */
1766 return -ENOIOCTLCMD;
1767 } 863 }
1768}
1769 864
1770static int 865 return -ENOIOCTLCMD;
1771lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1772{
1773 struct compat_timeval __user *tc = (struct compat_timeval __user *)arg;
1774 struct timeval __user *tn = compat_alloc_user_space(sizeof(struct timeval));
1775 struct timeval ts;
1776 if (get_user(ts.tv_sec, &tc->tv_sec) ||
1777 get_user(ts.tv_usec, &tc->tv_usec) ||
1778 put_user(ts.tv_sec, &tn->tv_sec) ||
1779 put_user(ts.tv_usec, &tn->tv_usec))
1780 return -EFAULT;
1781 return sys_ioctl(fd, cmd, (unsigned long)tn);
1782} 866}
1783 867
1784/* on ia32 l_start is on a 32-bit boundary */ 868/* on ia32 l_start is on a 32-bit boundary */
@@ -1798,9 +882,9 @@ struct space_resv_32 {
1798#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32) 882#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32)
1799 883
1800/* just account for different alignment */ 884/* just account for different alignment */
1801static int compat_ioctl_preallocate(struct file *file, unsigned long arg) 885static int compat_ioctl_preallocate(struct file *file,
886 struct space_resv_32 __user *p32)
1802{ 887{
1803 struct space_resv_32 __user *p32 = compat_ptr(arg);
1804 struct space_resv __user *p = compat_alloc_user_space(sizeof(*p)); 888 struct space_resv __user *p = compat_alloc_user_space(sizeof(*p));
1805 889
1806 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || 890 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
@@ -1816,27 +900,13 @@ static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
1816} 900}
1817#endif 901#endif
1818 902
903/*
904 * simple reversible transform to make our table more evenly
905 * distributed after sorting.
906 */
907#define XFORM(i) (((i) ^ ((i) << 27) ^ ((i) << 17)) & 0xffffffff)
1819 908
1820typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, 909#define COMPATIBLE_IOCTL(cmd) XFORM(cmd),
1821 unsigned long, struct file *);
1822
1823struct ioctl_trans {
1824 unsigned long cmd;
1825 ioctl_trans_handler_t handler;
1826 struct ioctl_trans *next;
1827};
1828
1829#define HANDLE_IOCTL(cmd,handler) \
1830 { (cmd), (ioctl_trans_handler_t)(handler) },
1831
1832/* pointer to compatible structure or no argument */
1833#define COMPATIBLE_IOCTL(cmd) \
1834 { (cmd), do_ioctl32_pointer },
1835
1836/* argument is an unsigned long integer, not a pointer */
1837#define ULONG_IOCTL(cmd) \
1838 { (cmd), (ioctl_trans_handler_t)sys_ioctl },
1839
1840/* ioctl should not be warned about even if it's not implemented. 910/* ioctl should not be warned about even if it's not implemented.
1841 Valid reasons to use this: 911 Valid reasons to use this:
1842 - It is implemented with ->compat_ioctl on some device, but programs 912 - It is implemented with ->compat_ioctl on some device, but programs
@@ -1846,7 +916,7 @@ struct ioctl_trans {
1846 Most other reasons are not valid. */ 916 Most other reasons are not valid. */
1847#define IGNORE_IOCTL(cmd) COMPATIBLE_IOCTL(cmd) 917#define IGNORE_IOCTL(cmd) COMPATIBLE_IOCTL(cmd)
1848 918
1849static struct ioctl_trans ioctl_start[] = { 919static unsigned int ioctl_pointer[] = {
1850/* compatible ioctls first */ 920/* compatible ioctls first */
1851COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */ 921COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */
1852COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */ 922COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */
@@ -1857,7 +927,6 @@ COMPATIBLE_IOCTL(TCSETA)
1857COMPATIBLE_IOCTL(TCSETAW) 927COMPATIBLE_IOCTL(TCSETAW)
1858COMPATIBLE_IOCTL(TCSETAF) 928COMPATIBLE_IOCTL(TCSETAF)
1859COMPATIBLE_IOCTL(TCSBRK) 929COMPATIBLE_IOCTL(TCSBRK)
1860ULONG_IOCTL(TCSBRKP)
1861COMPATIBLE_IOCTL(TCXONC) 930COMPATIBLE_IOCTL(TCXONC)
1862COMPATIBLE_IOCTL(TCFLSH) 931COMPATIBLE_IOCTL(TCFLSH)
1863COMPATIBLE_IOCTL(TCGETS) 932COMPATIBLE_IOCTL(TCGETS)
@@ -1867,7 +936,6 @@ COMPATIBLE_IOCTL(TCSETSF)
1867COMPATIBLE_IOCTL(TIOCLINUX) 936COMPATIBLE_IOCTL(TIOCLINUX)
1868COMPATIBLE_IOCTL(TIOCSBRK) 937COMPATIBLE_IOCTL(TIOCSBRK)
1869COMPATIBLE_IOCTL(TIOCCBRK) 938COMPATIBLE_IOCTL(TIOCCBRK)
1870ULONG_IOCTL(TIOCMIWAIT)
1871COMPATIBLE_IOCTL(TIOCGICOUNT) 939COMPATIBLE_IOCTL(TIOCGICOUNT)
1872/* Little t */ 940/* Little t */
1873COMPATIBLE_IOCTL(TIOCGETD) 941COMPATIBLE_IOCTL(TIOCGETD)
@@ -1889,7 +957,6 @@ COMPATIBLE_IOCTL(TIOCSTI)
1889COMPATIBLE_IOCTL(TIOCOUTQ) 957COMPATIBLE_IOCTL(TIOCOUTQ)
1890COMPATIBLE_IOCTL(TIOCSPGRP) 958COMPATIBLE_IOCTL(TIOCSPGRP)
1891COMPATIBLE_IOCTL(TIOCGPGRP) 959COMPATIBLE_IOCTL(TIOCGPGRP)
1892ULONG_IOCTL(TIOCSCTTY)
1893COMPATIBLE_IOCTL(TIOCGPTN) 960COMPATIBLE_IOCTL(TIOCGPTN)
1894COMPATIBLE_IOCTL(TIOCSPTLCK) 961COMPATIBLE_IOCTL(TIOCSPTLCK)
1895COMPATIBLE_IOCTL(TIOCSERGETLSR) 962COMPATIBLE_IOCTL(TIOCSERGETLSR)
@@ -1912,44 +979,11 @@ COMPATIBLE_IOCTL(FIGETBSZ)
1912/* 'X' - originally XFS but some now in the VFS */ 979/* 'X' - originally XFS but some now in the VFS */
1913COMPATIBLE_IOCTL(FIFREEZE) 980COMPATIBLE_IOCTL(FIFREEZE)
1914COMPATIBLE_IOCTL(FITHAW) 981COMPATIBLE_IOCTL(FITHAW)
1915/* RAID */
1916COMPATIBLE_IOCTL(RAID_VERSION)
1917COMPATIBLE_IOCTL(GET_ARRAY_INFO)
1918COMPATIBLE_IOCTL(GET_DISK_INFO)
1919COMPATIBLE_IOCTL(PRINT_RAID_DEBUG)
1920COMPATIBLE_IOCTL(RAID_AUTORUN)
1921COMPATIBLE_IOCTL(CLEAR_ARRAY)
1922COMPATIBLE_IOCTL(ADD_NEW_DISK)
1923ULONG_IOCTL(HOT_REMOVE_DISK)
1924COMPATIBLE_IOCTL(SET_ARRAY_INFO)
1925COMPATIBLE_IOCTL(SET_DISK_INFO)
1926COMPATIBLE_IOCTL(WRITE_RAID_INFO)
1927COMPATIBLE_IOCTL(UNPROTECT_ARRAY)
1928COMPATIBLE_IOCTL(PROTECT_ARRAY)
1929ULONG_IOCTL(HOT_ADD_DISK)
1930ULONG_IOCTL(SET_DISK_FAULTY)
1931COMPATIBLE_IOCTL(RUN_ARRAY)
1932COMPATIBLE_IOCTL(STOP_ARRAY)
1933COMPATIBLE_IOCTL(STOP_ARRAY_RO)
1934COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
1935COMPATIBLE_IOCTL(GET_BITMAP_FILE)
1936ULONG_IOCTL(SET_BITMAP_FILE)
1937/* Big K */
1938COMPATIBLE_IOCTL(PIO_FONT)
1939COMPATIBLE_IOCTL(GIO_FONT)
1940COMPATIBLE_IOCTL(PIO_CMAP)
1941COMPATIBLE_IOCTL(GIO_CMAP)
1942ULONG_IOCTL(KDSIGACCEPT)
1943COMPATIBLE_IOCTL(KDGETKEYCODE) 982COMPATIBLE_IOCTL(KDGETKEYCODE)
1944COMPATIBLE_IOCTL(KDSETKEYCODE) 983COMPATIBLE_IOCTL(KDSETKEYCODE)
1945ULONG_IOCTL(KIOCSOUND)
1946ULONG_IOCTL(KDMKTONE)
1947COMPATIBLE_IOCTL(KDGKBTYPE) 984COMPATIBLE_IOCTL(KDGKBTYPE)
1948ULONG_IOCTL(KDSETMODE)
1949COMPATIBLE_IOCTL(KDGETMODE) 985COMPATIBLE_IOCTL(KDGETMODE)
1950ULONG_IOCTL(KDSKBMODE)
1951COMPATIBLE_IOCTL(KDGKBMODE) 986COMPATIBLE_IOCTL(KDGKBMODE)
1952ULONG_IOCTL(KDSKBMETA)
1953COMPATIBLE_IOCTL(KDGKBMETA) 987COMPATIBLE_IOCTL(KDGKBMETA)
1954COMPATIBLE_IOCTL(KDGKBENT) 988COMPATIBLE_IOCTL(KDGKBENT)
1955COMPATIBLE_IOCTL(KDSKBENT) 989COMPATIBLE_IOCTL(KDSKBENT)
@@ -1959,15 +993,7 @@ COMPATIBLE_IOCTL(KDGKBDIACR)
1959COMPATIBLE_IOCTL(KDSKBDIACR) 993COMPATIBLE_IOCTL(KDSKBDIACR)
1960COMPATIBLE_IOCTL(KDKBDREP) 994COMPATIBLE_IOCTL(KDKBDREP)
1961COMPATIBLE_IOCTL(KDGKBLED) 995COMPATIBLE_IOCTL(KDGKBLED)
1962ULONG_IOCTL(KDSKBLED)
1963COMPATIBLE_IOCTL(KDGETLED) 996COMPATIBLE_IOCTL(KDGETLED)
1964ULONG_IOCTL(KDSETLED)
1965COMPATIBLE_IOCTL(GIO_SCRNMAP)
1966COMPATIBLE_IOCTL(PIO_SCRNMAP)
1967COMPATIBLE_IOCTL(GIO_UNISCRNMAP)
1968COMPATIBLE_IOCTL(PIO_UNISCRNMAP)
1969COMPATIBLE_IOCTL(PIO_FONTRESET)
1970COMPATIBLE_IOCTL(PIO_UNIMAPCLR)
1971#ifdef CONFIG_BLOCK 997#ifdef CONFIG_BLOCK
1972/* Big S */ 998/* Big S */
1973COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN) 999COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN)
@@ -1979,32 +1005,6 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
1979COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) 1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
1980COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) 1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
1981#endif 1007#endif
1982/* Big T */
1983COMPATIBLE_IOCTL(TUNSETNOCSUM)
1984COMPATIBLE_IOCTL(TUNSETDEBUG)
1985COMPATIBLE_IOCTL(TUNSETPERSIST)
1986COMPATIBLE_IOCTL(TUNSETOWNER)
1987COMPATIBLE_IOCTL(TUNSETLINK)
1988COMPATIBLE_IOCTL(TUNSETGROUP)
1989COMPATIBLE_IOCTL(TUNGETFEATURES)
1990COMPATIBLE_IOCTL(TUNSETOFFLOAD)
1991COMPATIBLE_IOCTL(TUNSETTXFILTER)
1992COMPATIBLE_IOCTL(TUNGETSNDBUF)
1993COMPATIBLE_IOCTL(TUNSETSNDBUF)
1994/* Big V */
1995COMPATIBLE_IOCTL(VT_SETMODE)
1996COMPATIBLE_IOCTL(VT_GETMODE)
1997COMPATIBLE_IOCTL(VT_GETSTATE)
1998COMPATIBLE_IOCTL(VT_OPENQRY)
1999ULONG_IOCTL(VT_ACTIVATE)
2000ULONG_IOCTL(VT_WAITACTIVE)
2001ULONG_IOCTL(VT_RELDISP)
2002ULONG_IOCTL(VT_DISALLOCATE)
2003COMPATIBLE_IOCTL(VT_RESIZE)
2004COMPATIBLE_IOCTL(VT_RESIZEX)
2005COMPATIBLE_IOCTL(VT_LOCKSWITCH)
2006COMPATIBLE_IOCTL(VT_UNLOCKSWITCH)
2007COMPATIBLE_IOCTL(VT_GETHIFONTMASK)
2008/* Little p (/dev/rtc, /dev/envctrl, etc.) */ 1008/* Little p (/dev/rtc, /dev/envctrl, etc.) */
2009COMPATIBLE_IOCTL(RTC_AIE_ON) 1009COMPATIBLE_IOCTL(RTC_AIE_ON)
2010COMPATIBLE_IOCTL(RTC_AIE_OFF) 1010COMPATIBLE_IOCTL(RTC_AIE_OFF)
@@ -2032,36 +1032,13 @@ COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
2032COMPATIBLE_IOCTL(MTIOCTOP) 1032COMPATIBLE_IOCTL(MTIOCTOP)
2033/* Socket level stuff */ 1033/* Socket level stuff */
2034COMPATIBLE_IOCTL(FIOQSIZE) 1034COMPATIBLE_IOCTL(FIOQSIZE)
2035COMPATIBLE_IOCTL(FIOSETOWN)
2036COMPATIBLE_IOCTL(SIOCSPGRP)
2037COMPATIBLE_IOCTL(FIOGETOWN)
2038COMPATIBLE_IOCTL(SIOCGPGRP)
2039COMPATIBLE_IOCTL(SIOCATMARK)
2040COMPATIBLE_IOCTL(SIOCSIFLINK)
2041COMPATIBLE_IOCTL(SIOCSIFENCAP)
2042COMPATIBLE_IOCTL(SIOCGIFENCAP)
2043COMPATIBLE_IOCTL(SIOCSIFNAME)
2044COMPATIBLE_IOCTL(SIOCSARP)
2045COMPATIBLE_IOCTL(SIOCGARP)
2046COMPATIBLE_IOCTL(SIOCDARP)
2047COMPATIBLE_IOCTL(SIOCSRARP)
2048COMPATIBLE_IOCTL(SIOCGRARP)
2049COMPATIBLE_IOCTL(SIOCDRARP)
2050COMPATIBLE_IOCTL(SIOCADDDLCI)
2051COMPATIBLE_IOCTL(SIOCDELDLCI)
2052COMPATIBLE_IOCTL(SIOCGMIIPHY)
2053COMPATIBLE_IOCTL(SIOCGMIIREG)
2054COMPATIBLE_IOCTL(SIOCSMIIREG)
2055COMPATIBLE_IOCTL(SIOCGIFVLAN)
2056COMPATIBLE_IOCTL(SIOCSIFVLAN)
2057COMPATIBLE_IOCTL(SIOCBRADDBR)
2058COMPATIBLE_IOCTL(SIOCBRDELBR)
2059#ifdef CONFIG_BLOCK 1035#ifdef CONFIG_BLOCK
1036/* loop */
1037IGNORE_IOCTL(LOOP_CLR_FD)
2060/* SG stuff */ 1038/* SG stuff */
2061COMPATIBLE_IOCTL(SG_SET_TIMEOUT) 1039COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
2062COMPATIBLE_IOCTL(SG_GET_TIMEOUT) 1040COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
2063COMPATIBLE_IOCTL(SG_EMULATED_HOST) 1041COMPATIBLE_IOCTL(SG_EMULATED_HOST)
2064ULONG_IOCTL(SG_SET_TRANSFORM)
2065COMPATIBLE_IOCTL(SG_GET_TRANSFORM) 1042COMPATIBLE_IOCTL(SG_GET_TRANSFORM)
2066COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE) 1043COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE)
2067COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE) 1044COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE)
@@ -2115,8 +1092,6 @@ COMPATIBLE_IOCTL(PPPIOCGCHAN)
2115/* PPPOX */ 1092/* PPPOX */
2116COMPATIBLE_IOCTL(PPPOEIOCSFWD) 1093COMPATIBLE_IOCTL(PPPOEIOCSFWD)
2117COMPATIBLE_IOCTL(PPPOEIOCDFWD) 1094COMPATIBLE_IOCTL(PPPOEIOCDFWD)
2118/* LP */
2119COMPATIBLE_IOCTL(LPGETSTATUS)
2120/* ppdev */ 1095/* ppdev */
2121COMPATIBLE_IOCTL(PPSETMODE) 1096COMPATIBLE_IOCTL(PPSETMODE)
2122COMPATIBLE_IOCTL(PPRSTATUS) 1097COMPATIBLE_IOCTL(PPRSTATUS)
@@ -2298,8 +1273,6 @@ COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
2298COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) 1273COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
2299COMPATIBLE_IOCTL(OSS_GETVERSION) 1274COMPATIBLE_IOCTL(OSS_GETVERSION)
2300/* AUTOFS */ 1275/* AUTOFS */
2301ULONG_IOCTL(AUTOFS_IOC_READY)
2302ULONG_IOCTL(AUTOFS_IOC_FAIL)
2303COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC) 1276COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
2304COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER) 1277COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
2305COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) 1278COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
@@ -2311,22 +1284,6 @@ COMPATIBLE_IOCTL(RAW_SETBIND)
2311COMPATIBLE_IOCTL(RAW_GETBIND) 1284COMPATIBLE_IOCTL(RAW_GETBIND)
2312/* SMB ioctls which do not need any translations */ 1285/* SMB ioctls which do not need any translations */
2313COMPATIBLE_IOCTL(SMB_IOC_NEWCONN) 1286COMPATIBLE_IOCTL(SMB_IOC_NEWCONN)
2314/* Little a */
2315COMPATIBLE_IOCTL(ATMSIGD_CTRL)
2316COMPATIBLE_IOCTL(ATMARPD_CTRL)
2317COMPATIBLE_IOCTL(ATMLEC_CTRL)
2318COMPATIBLE_IOCTL(ATMLEC_MCAST)
2319COMPATIBLE_IOCTL(ATMLEC_DATA)
2320COMPATIBLE_IOCTL(ATM_SETSC)
2321COMPATIBLE_IOCTL(SIOCSIFATMTCP)
2322COMPATIBLE_IOCTL(SIOCMKCLIP)
2323COMPATIBLE_IOCTL(ATMARP_MKIP)
2324COMPATIBLE_IOCTL(ATMARP_SETENTRY)
2325COMPATIBLE_IOCTL(ATMARP_ENCAP)
2326COMPATIBLE_IOCTL(ATMTCP_CREATE)
2327COMPATIBLE_IOCTL(ATMTCP_REMOVE)
2328COMPATIBLE_IOCTL(ATMMPC_CTRL)
2329COMPATIBLE_IOCTL(ATMMPC_DATA)
2330/* Watchdog */ 1287/* Watchdog */
2331COMPATIBLE_IOCTL(WDIOC_GETSUPPORT) 1288COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
2332COMPATIBLE_IOCTL(WDIOC_GETSTATUS) 1289COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
@@ -2408,30 +1365,11 @@ COMPATIBLE_IOCTL(PCIIOC_CONTROLLER)
2408COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO) 1365COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO)
2409COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM) 1366COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM)
2410COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE) 1367COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE)
2411/* USB */
2412COMPATIBLE_IOCTL(USBDEVFS_RESETEP)
2413COMPATIBLE_IOCTL(USBDEVFS_SETINTERFACE)
2414COMPATIBLE_IOCTL(USBDEVFS_SETCONFIGURATION)
2415COMPATIBLE_IOCTL(USBDEVFS_GETDRIVER)
2416COMPATIBLE_IOCTL(USBDEVFS_DISCARDURB)
2417COMPATIBLE_IOCTL(USBDEVFS_CLAIMINTERFACE)
2418COMPATIBLE_IOCTL(USBDEVFS_RELEASEINTERFACE)
2419COMPATIBLE_IOCTL(USBDEVFS_CONNECTINFO)
2420COMPATIBLE_IOCTL(USBDEVFS_HUB_PORTINFO)
2421COMPATIBLE_IOCTL(USBDEVFS_RESET)
2422COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32)
2423COMPATIBLE_IOCTL(USBDEVFS_REAPURB32)
2424COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32)
2425COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT)
2426/* NBD */ 1368/* NBD */
2427ULONG_IOCTL(NBD_SET_SOCK)
2428ULONG_IOCTL(NBD_SET_BLKSIZE)
2429ULONG_IOCTL(NBD_SET_SIZE)
2430COMPATIBLE_IOCTL(NBD_DO_IT) 1369COMPATIBLE_IOCTL(NBD_DO_IT)
2431COMPATIBLE_IOCTL(NBD_CLEAR_SOCK) 1370COMPATIBLE_IOCTL(NBD_CLEAR_SOCK)
2432COMPATIBLE_IOCTL(NBD_CLEAR_QUE) 1371COMPATIBLE_IOCTL(NBD_CLEAR_QUE)
2433COMPATIBLE_IOCTL(NBD_PRINT_DEBUG) 1372COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
2434ULONG_IOCTL(NBD_SET_SIZE_BLOCKS)
2435COMPATIBLE_IOCTL(NBD_DISCONNECT) 1373COMPATIBLE_IOCTL(NBD_DISCONNECT)
2436/* i2c */ 1374/* i2c */
2437COMPATIBLE_IOCTL(I2C_SLAVE) 1375COMPATIBLE_IOCTL(I2C_SLAVE)
@@ -2531,131 +1469,13 @@ COMPATIBLE_IOCTL(JSIOCGAXES)
2531COMPATIBLE_IOCTL(JSIOCGBUTTONS) 1469COMPATIBLE_IOCTL(JSIOCGBUTTONS)
2532COMPATIBLE_IOCTL(JSIOCGNAME(0)) 1470COMPATIBLE_IOCTL(JSIOCGNAME(0))
2533 1471
2534/* now things that need handlers */
2535#ifdef CONFIG_NET
2536HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32)
2537HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf)
2538HANDLE_IOCTL(SIOCGIFFLAGS, dev_ifsioc)
2539HANDLE_IOCTL(SIOCSIFFLAGS, dev_ifsioc)
2540HANDLE_IOCTL(SIOCGIFMETRIC, dev_ifsioc)
2541HANDLE_IOCTL(SIOCSIFMETRIC, dev_ifsioc)
2542HANDLE_IOCTL(SIOCGIFMTU, dev_ifsioc)
2543HANDLE_IOCTL(SIOCSIFMTU, dev_ifsioc)
2544HANDLE_IOCTL(SIOCGIFMEM, dev_ifsioc)
2545HANDLE_IOCTL(SIOCSIFMEM, dev_ifsioc)
2546HANDLE_IOCTL(SIOCGIFHWADDR, dev_ifsioc)
2547HANDLE_IOCTL(SIOCSIFHWADDR, dev_ifsioc)
2548HANDLE_IOCTL(SIOCADDMULTI, dev_ifsioc)
2549HANDLE_IOCTL(SIOCDELMULTI, dev_ifsioc)
2550HANDLE_IOCTL(SIOCGIFINDEX, dev_ifsioc)
2551HANDLE_IOCTL(SIOCGIFMAP, dev_ifsioc)
2552HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
2553HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
2554HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
2555HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc)
2556HANDLE_IOCTL(SIOCSHWTSTAMP, dev_ifsioc)
2557
2558/* ioctls used by appletalk ddp.c */
2559HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc)
2560HANDLE_IOCTL(SIOCDIFADDR, dev_ifsioc)
2561HANDLE_IOCTL(SIOCSARP, dev_ifsioc)
2562HANDLE_IOCTL(SIOCDARP, dev_ifsioc)
2563
2564HANDLE_IOCTL(SIOCGIFBRDADDR, dev_ifsioc)
2565HANDLE_IOCTL(SIOCSIFBRDADDR, dev_ifsioc)
2566HANDLE_IOCTL(SIOCGIFDSTADDR, dev_ifsioc)
2567HANDLE_IOCTL(SIOCSIFDSTADDR, dev_ifsioc)
2568HANDLE_IOCTL(SIOCGIFNETMASK, dev_ifsioc)
2569HANDLE_IOCTL(SIOCSIFNETMASK, dev_ifsioc)
2570HANDLE_IOCTL(SIOCSIFPFLAGS, dev_ifsioc)
2571HANDLE_IOCTL(SIOCGIFPFLAGS, dev_ifsioc)
2572HANDLE_IOCTL(SIOCGIFTXQLEN, dev_ifsioc)
2573HANDLE_IOCTL(SIOCSIFTXQLEN, dev_ifsioc)
2574HANDLE_IOCTL(TUNSETIFF, dev_ifsioc)
2575HANDLE_IOCTL(TUNGETIFF, dev_ifsioc)
2576HANDLE_IOCTL(SIOCETHTOOL, ethtool_ioctl)
2577HANDLE_IOCTL(SIOCBONDENSLAVE, bond_ioctl)
2578HANDLE_IOCTL(SIOCBONDRELEASE, bond_ioctl)
2579HANDLE_IOCTL(SIOCBONDSETHWADDR, bond_ioctl)
2580HANDLE_IOCTL(SIOCBONDSLAVEINFOQUERY, bond_ioctl)
2581HANDLE_IOCTL(SIOCBONDINFOQUERY, bond_ioctl)
2582HANDLE_IOCTL(SIOCBONDCHANGEACTIVE, bond_ioctl)
2583HANDLE_IOCTL(SIOCADDRT, routing_ioctl)
2584HANDLE_IOCTL(SIOCDELRT, routing_ioctl)
2585HANDLE_IOCTL(SIOCBRADDIF, dev_ifsioc)
2586HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
2587/* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */
2588HANDLE_IOCTL(SIOCRTMSG, ret_einval)
2589HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
2590HANDLE_IOCTL(SIOCGSTAMPNS, do_siocgstampns)
2591#endif
2592#ifdef CONFIG_BLOCK
2593HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
2594HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
2595#endif
2596HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
2597HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
2598HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
2599HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
2600#ifdef CONFIG_BLOCK
2601HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
2602HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
2603#endif
2604#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
2605HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
2606#ifdef CONFIG_VT
2607HANDLE_IOCTL(PIO_FONTX, do_fontx_ioctl)
2608HANDLE_IOCTL(GIO_FONTX, do_fontx_ioctl)
2609HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl)
2610HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
2611HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
2612#endif
2613/* One SMB ioctl needs translations. */
2614#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
2615HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
2616HANDLE_IOCTL(ATM_GETLINKRATE32, do_atm_ioctl)
2617HANDLE_IOCTL(ATM_GETNAMES32, do_atm_ioctl)
2618HANDLE_IOCTL(ATM_GETTYPE32, do_atm_ioctl)
2619HANDLE_IOCTL(ATM_GETESI32, do_atm_ioctl)
2620HANDLE_IOCTL(ATM_GETADDR32, do_atm_ioctl)
2621HANDLE_IOCTL(ATM_RSTADDR32, do_atm_ioctl)
2622HANDLE_IOCTL(ATM_ADDADDR32, do_atm_ioctl)
2623HANDLE_IOCTL(ATM_DELADDR32, do_atm_ioctl)
2624HANDLE_IOCTL(ATM_GETCIRANGE32, do_atm_ioctl)
2625HANDLE_IOCTL(ATM_SETCIRANGE32, do_atm_ioctl)
2626HANDLE_IOCTL(ATM_SETESI32, do_atm_ioctl)
2627HANDLE_IOCTL(ATM_SETESIF32, do_atm_ioctl)
2628HANDLE_IOCTL(ATM_GETSTAT32, do_atm_ioctl)
2629HANDLE_IOCTL(ATM_GETSTATZ32, do_atm_ioctl)
2630HANDLE_IOCTL(ATM_GETLOOP32, do_atm_ioctl)
2631HANDLE_IOCTL(ATM_SETLOOP32, do_atm_ioctl)
2632HANDLE_IOCTL(ATM_QUERYLOOP32, do_atm_ioctl)
2633HANDLE_IOCTL(SONET_GETSTAT, do_atm_ioctl)
2634HANDLE_IOCTL(SONET_GETSTATZ, do_atm_ioctl)
2635HANDLE_IOCTL(SONET_GETDIAG, do_atm_ioctl)
2636HANDLE_IOCTL(SONET_SETDIAG, do_atm_ioctl)
2637HANDLE_IOCTL(SONET_CLRDIAG, do_atm_ioctl)
2638HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl)
2639HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
2640HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
2641/* block stuff */
2642#ifdef CONFIG_BLOCK
2643/* loop */
2644IGNORE_IOCTL(LOOP_CLR_FD)
2645/* Raw devices */
2646HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
2647HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
2648#endif
2649/* Serial */
2650HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
2651HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
2652#ifdef TIOCGLTC 1472#ifdef TIOCGLTC
2653COMPATIBLE_IOCTL(TIOCGLTC) 1473COMPATIBLE_IOCTL(TIOCGLTC)
2654COMPATIBLE_IOCTL(TIOCSLTC) 1474COMPATIBLE_IOCTL(TIOCSLTC)
2655#endif 1475#endif
2656#ifdef TIOCSTART 1476#ifdef TIOCSTART
2657/* 1477/*
2658 * For these two we have defintions in ioctls.h and/or termios.h on 1478 * For these two we have definitions in ioctls.h and/or termios.h on
2659 * some architectures but no actual implemention. Some applications 1479 * some architectures but no actual implemention. Some applications
2660 * like bash call them if they are defined in the headers, so we provide 1480 * like bash call them if they are defined in the headers, so we provide
2661 * entries here to avoid syslog message spew. 1481 * entries here to avoid syslog message spew.
@@ -2663,43 +1483,6 @@ COMPATIBLE_IOCTL(TIOCSLTC)
2663COMPATIBLE_IOCTL(TIOCSTART) 1483COMPATIBLE_IOCTL(TIOCSTART)
2664COMPATIBLE_IOCTL(TIOCSTOP) 1484COMPATIBLE_IOCTL(TIOCSTOP)
2665#endif 1485#endif
2666/* Usbdevfs */
2667HANDLE_IOCTL(USBDEVFS_CONTROL32, do_usbdevfs_control)
2668HANDLE_IOCTL(USBDEVFS_BULK32, do_usbdevfs_bulk)
2669HANDLE_IOCTL(USBDEVFS_DISCSIGNAL32, do_usbdevfs_discsignal)
2670COMPATIBLE_IOCTL(USBDEVFS_IOCTL32)
2671/* i2c */
2672HANDLE_IOCTL(I2C_FUNCS, w_long)
2673HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl)
2674HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl)
2675/* bridge */
2676HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
2677HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
2678/* Not implemented in the native kernel */
2679IGNORE_IOCTL(SIOCGIFCOUNT)
2680HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
2681HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl)
2682HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl)
2683HANDLE_IOCTL(RTC_EPOCH_SET32, rtc_ioctl)
2684
2685/* dvb */
2686HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event)
2687HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture)
2688HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette)
2689
2690/* parport */
2691COMPATIBLE_IOCTL(LPTIME)
2692COMPATIBLE_IOCTL(LPCHAR)
2693COMPATIBLE_IOCTL(LPABORTOPEN)
2694COMPATIBLE_IOCTL(LPCAREFUL)
2695COMPATIBLE_IOCTL(LPWAIT)
2696COMPATIBLE_IOCTL(LPSETIRQ)
2697COMPATIBLE_IOCTL(LPGETSTATUS)
2698COMPATIBLE_IOCTL(LPGETSTATUS)
2699COMPATIBLE_IOCTL(LPRESET)
2700/*LPGETSTATS not implemented, but no kernels seem to compile it in anyways*/
2701COMPATIBLE_IOCTL(LPGETFLAGS)
2702HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
2703 1486
2704/* fat 'r' ioctls. These are handled by fat with ->compat_ioctl, 1487/* fat 'r' ioctls. These are handled by fat with ->compat_ioctl,
2705 but we don't want warnings on other file systems. So declare 1488 but we don't want warnings on other file systems. So declare
@@ -2727,12 +1510,108 @@ IGNORE_IOCTL(FBIOGCURSOR32)
2727#endif 1510#endif
2728}; 1511};
2729 1512
2730#define IOCTL_HASHSIZE 256 1513/*
2731static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE]; 1514 * Convert common ioctl arguments based on their command number
2732 1515 *
2733static inline unsigned long ioctl32_hash(unsigned long cmd) 1516 * Please do not add any code in here. Instead, implement
1517 * a compat_ioctl operation in the place that handleѕ the
1518 * ioctl for the native case.
1519 */
1520static long do_ioctl_trans(int fd, unsigned int cmd,
1521 unsigned long arg, struct file *file)
2734{ 1522{
2735 return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE; 1523 void __user *argp = compat_ptr(arg);
1524
1525 switch (cmd) {
1526 case PPPIOCGIDLE32:
1527 return ppp_gidle(fd, cmd, argp);
1528 case PPPIOCSCOMPRESS32:
1529 return ppp_scompress(fd, cmd, argp);
1530 case PPPIOCSPASS32:
1531 case PPPIOCSACTIVE32:
1532 return ppp_sock_fprog_ioctl_trans(fd, cmd, argp);
1533#ifdef CONFIG_BLOCK
1534 case SG_IO:
1535 return sg_ioctl_trans(fd, cmd, argp);
1536 case SG_GET_REQUEST_TABLE:
1537 return sg_grt_trans(fd, cmd, argp);
1538 case MTIOCGET32:
1539 case MTIOCPOS32:
1540 return mt_ioctl_trans(fd, cmd, argp);
1541 /* Raw devices */
1542 case RAW_SETBIND:
1543 case RAW_GETBIND:
1544 return raw_ioctl(fd, cmd, argp);
1545#endif
1546#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
1547 case AUTOFS_IOC_SETTIMEOUT32:
1548 return ioc_settimeout(fd, cmd, argp);
1549 /* One SMB ioctl needs translations. */
1550#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
1551 case SMB_IOC_GETMOUNTUID_32:
1552 return do_smb_getmountuid(fd, cmd, argp);
1553 /* Serial */
1554 case TIOCGSERIAL:
1555 case TIOCSSERIAL:
1556 return serial_struct_ioctl(fd, cmd, argp);
1557 /* i2c */
1558 case I2C_FUNCS:
1559 return w_long(fd, cmd, argp);
1560 case I2C_RDWR:
1561 return do_i2c_rdwr_ioctl(fd, cmd, argp);
1562 case I2C_SMBUS:
1563 return do_i2c_smbus_ioctl(fd, cmd, argp);
1564 /* Not implemented in the native kernel */
1565 case RTC_IRQP_READ32:
1566 case RTC_IRQP_SET32:
1567 case RTC_EPOCH_READ32:
1568 case RTC_EPOCH_SET32:
1569 return rtc_ioctl(fd, cmd, argp);
1570
1571 /* dvb */
1572 case VIDEO_GET_EVENT:
1573 return do_video_get_event(fd, cmd, argp);
1574 case VIDEO_STILLPICTURE:
1575 return do_video_stillpicture(fd, cmd, argp);
1576 case VIDEO_SET_SPU_PALETTE:
1577 return do_video_set_spu_palette(fd, cmd, argp);
1578 }
1579
1580 /*
1581 * These take an integer instead of a pointer as 'arg',
1582 * so we must not do a compat_ptr() translation.
1583 */
1584 switch (cmd) {
1585 /* Big T */
1586 case TCSBRKP:
1587 case TIOCMIWAIT:
1588 case TIOCSCTTY:
1589 /* RAID */
1590 case HOT_REMOVE_DISK:
1591 case HOT_ADD_DISK:
1592 case SET_DISK_FAULTY:
1593 case SET_BITMAP_FILE:
1594 /* Big K */
1595 case KDSIGACCEPT:
1596 case KIOCSOUND:
1597 case KDMKTONE:
1598 case KDSETMODE:
1599 case KDSKBMODE:
1600 case KDSKBMETA:
1601 case KDSKBLED:
1602 case KDSETLED:
1603 /* AUTOFS */
1604 case AUTOFS_IOC_READY:
1605 case AUTOFS_IOC_FAIL:
1606 /* NBD */
1607 case NBD_SET_SOCK:
1608 case NBD_SET_BLKSIZE:
1609 case NBD_SET_SIZE:
1610 case NBD_SET_SIZE_BLOCKS:
1611 return do_vfs_ioctl(file, fd, cmd, arg);
1612 }
1613
1614 return -ENOIOCTLCMD;
2736} 1615}
2737 1616
2738static void compat_ioctl_error(struct file *filp, unsigned int fd, 1617static void compat_ioctl_error(struct file *filp, unsigned int fd,
@@ -2764,12 +1643,33 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
2764 free_page((unsigned long)path); 1643 free_page((unsigned long)path);
2765} 1644}
2766 1645
1646static int compat_ioctl_check_table(unsigned int xcmd)
1647{
1648 int i;
1649 const int max = ARRAY_SIZE(ioctl_pointer) - 1;
1650
1651 BUILD_BUG_ON(max >= (1 << 16));
1652
1653 /* guess initial offset into table, assuming a
1654 normalized distribution */
1655 i = ((xcmd >> 16) * max) >> 16;
1656
1657 /* do linear search up first, until greater or equal */
1658 while (ioctl_pointer[i] < xcmd && i < max)
1659 i++;
1660
1661 /* then do linear search down */
1662 while (ioctl_pointer[i] > xcmd && i > 0)
1663 i--;
1664
1665 return ioctl_pointer[i] == xcmd;
1666}
1667
2767asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, 1668asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2768 unsigned long arg) 1669 unsigned long arg)
2769{ 1670{
2770 struct file *filp; 1671 struct file *filp;
2771 int error = -EBADF; 1672 int error = -EBADF;
2772 struct ioctl_trans *t;
2773 int fput_needed; 1673 int fput_needed;
2774 1674
2775 filp = fget_light(fd, &fput_needed); 1675 filp = fget_light(fd, &fput_needed);
@@ -2797,7 +1697,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2797#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 1697#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
2798 case FS_IOC_RESVSP_32: 1698 case FS_IOC_RESVSP_32:
2799 case FS_IOC_RESVSP64_32: 1699 case FS_IOC_RESVSP64_32:
2800 error = compat_ioctl_preallocate(filp, arg); 1700 error = compat_ioctl_preallocate(filp, compat_ptr(arg));
2801 goto out_fput; 1701 goto out_fput;
2802#else 1702#else
2803 case FS_IOC_RESVSP: 1703 case FS_IOC_RESVSP:
@@ -2826,18 +1726,11 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2826 break; 1726 break;
2827 } 1727 }
2828 1728
2829 for (t = ioctl32_hash_table[ioctl32_hash(cmd)]; t; t = t->next) { 1729 if (compat_ioctl_check_table(XFORM(cmd)))
2830 if (t->cmd == cmd) 1730 goto found_handler;
2831 goto found_handler;
2832 }
2833 1731
2834#ifdef CONFIG_NET 1732 error = do_ioctl_trans(fd, cmd, arg, filp);
2835 if (S_ISSOCK(filp->f_path.dentry->d_inode->i_mode) && 1733 if (error == -ENOIOCTLCMD) {
2836 cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
2837 error = siocdevprivate_ioctl(fd, cmd, arg);
2838 } else
2839#endif
2840 {
2841 static int count; 1734 static int count;
2842 1735
2843 if (++count <= 50) 1736 if (++count <= 50)
@@ -2848,13 +1741,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2848 goto out_fput; 1741 goto out_fput;
2849 1742
2850 found_handler: 1743 found_handler:
2851 if (t->handler) { 1744 arg = (unsigned long)compat_ptr(arg);
2852 lock_kernel();
2853 error = t->handler(fd, cmd, arg, filp);
2854 unlock_kernel();
2855 goto out_fput;
2856 }
2857
2858 do_ioctl: 1745 do_ioctl:
2859 error = do_vfs_ioctl(filp, fd, cmd, arg); 1746 error = do_vfs_ioctl(filp, fd, cmd, arg);
2860 out_fput: 1747 out_fput:
@@ -2863,35 +1750,22 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2863 return error; 1750 return error;
2864} 1751}
2865 1752
2866static void ioctl32_insert_translation(struct ioctl_trans *trans) 1753static int __init init_sys32_ioctl_cmp(const void *p, const void *q)
2867{ 1754{
2868 unsigned long hash; 1755 unsigned int a, b;
2869 struct ioctl_trans *t; 1756 a = *(unsigned int *)p;
2870 1757 b = *(unsigned int *)q;
2871 hash = ioctl32_hash (trans->cmd); 1758 if (a > b)
2872 if (!ioctl32_hash_table[hash]) 1759 return 1;
2873 ioctl32_hash_table[hash] = trans; 1760 if (a < b)
2874 else { 1761 return -1;
2875 t = ioctl32_hash_table[hash]; 1762 return 0;
2876 while (t->next)
2877 t = t->next;
2878 trans->next = NULL;
2879 t->next = trans;
2880 }
2881} 1763}
2882 1764
2883static int __init init_sys32_ioctl(void) 1765static int __init init_sys32_ioctl(void)
2884{ 1766{
2885 int i; 1767 sort(ioctl_pointer, ARRAY_SIZE(ioctl_pointer), sizeof(*ioctl_pointer),
2886 1768 init_sys32_ioctl_cmp, NULL);
2887 for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) {
2888 if (ioctl_start[i].next) {
2889 printk("ioctl translation %d bad\n",i);
2890 return -1;
2891 }
2892
2893 ioctl32_insert_translation(&ioctl_start[i]);
2894 }
2895 return 0; 1769 return 0;
2896} 1770}
2897__initcall(init_sys32_ioctl); 1771__initcall(init_sys32_ioctl);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index c8afa6b1d91..32a5f46b115 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -121,8 +121,10 @@ static int get_target(const char *symname, struct path *path,
121 ret = -ENOENT; 121 ret = -ENOENT;
122 path_put(path); 122 path_put(path);
123 } 123 }
124 } else 124 } else {
125 ret = -EPERM; 125 ret = -EPERM;
126 path_put(path);
127 }
126 } 128 }
127 129
128 return ret; 130 return ret;
diff --git a/fs/dcache.c b/fs/dcache.c
index a100fa35a48..953173a293a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -978,6 +978,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
978 q.hash = full_name_hash(q.name, q.len); 978 q.hash = full_name_hash(q.name, q.len);
979 return d_alloc(parent, &q); 979 return d_alloc(parent, &q);
980} 980}
981EXPORT_SYMBOL(d_alloc_name);
981 982
982/* the caller must hold dcache_lock */ 983/* the caller must hold dcache_lock */
983static void __d_instantiate(struct dentry *dentry, struct inode *inode) 984static void __d_instantiate(struct dentry *dentry, struct inode *inode)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d22438ef767..b486169f42b 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -32,7 +32,9 @@ static struct vfsmount *debugfs_mount;
32static int debugfs_mount_count; 32static int debugfs_mount_count;
33static bool debugfs_registered; 33static bool debugfs_registered;
34 34
35static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev) 35static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev,
36 void *data, const struct file_operations *fops)
37
36{ 38{
37 struct inode *inode = new_inode(sb); 39 struct inode *inode = new_inode(sb);
38 40
@@ -44,14 +46,18 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
44 init_special_inode(inode, mode, dev); 46 init_special_inode(inode, mode, dev);
45 break; 47 break;
46 case S_IFREG: 48 case S_IFREG:
47 inode->i_fop = &debugfs_file_operations; 49 inode->i_fop = fops ? fops : &debugfs_file_operations;
50 inode->i_private = data;
48 break; 51 break;
49 case S_IFLNK: 52 case S_IFLNK:
50 inode->i_op = &debugfs_link_operations; 53 inode->i_op = &debugfs_link_operations;
54 inode->i_fop = fops;
55 inode->i_private = data;
51 break; 56 break;
52 case S_IFDIR: 57 case S_IFDIR:
53 inode->i_op = &simple_dir_inode_operations; 58 inode->i_op = &simple_dir_inode_operations;
54 inode->i_fop = &simple_dir_operations; 59 inode->i_fop = fops ? fops : &simple_dir_operations;
60 inode->i_private = data;
55 61
56 /* directory inodes start off with i_nlink == 2 62 /* directory inodes start off with i_nlink == 2
57 * (for "." entry) */ 63 * (for "." entry) */
@@ -64,7 +70,8 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
64 70
65/* SMP-safe */ 71/* SMP-safe */
66static int debugfs_mknod(struct inode *dir, struct dentry *dentry, 72static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
67 int mode, dev_t dev) 73 int mode, dev_t dev, void *data,
74 const struct file_operations *fops)
68{ 75{
69 struct inode *inode; 76 struct inode *inode;
70 int error = -EPERM; 77 int error = -EPERM;
@@ -72,7 +79,7 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
72 if (dentry->d_inode) 79 if (dentry->d_inode)
73 return -EEXIST; 80 return -EEXIST;
74 81
75 inode = debugfs_get_inode(dir->i_sb, mode, dev); 82 inode = debugfs_get_inode(dir->i_sb, mode, dev, data, fops);
76 if (inode) { 83 if (inode) {
77 d_instantiate(dentry, inode); 84 d_instantiate(dentry, inode);
78 dget(dentry); 85 dget(dentry);
@@ -81,12 +88,13 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
81 return error; 88 return error;
82} 89}
83 90
84static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 91static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode,
92 void *data, const struct file_operations *fops)
85{ 93{
86 int res; 94 int res;
87 95
88 mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; 96 mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
89 res = debugfs_mknod(dir, dentry, mode, 0); 97 res = debugfs_mknod(dir, dentry, mode, 0, data, fops);
90 if (!res) { 98 if (!res) {
91 inc_nlink(dir); 99 inc_nlink(dir);
92 fsnotify_mkdir(dir, dentry); 100 fsnotify_mkdir(dir, dentry);
@@ -94,18 +102,20 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
94 return res; 102 return res;
95} 103}
96 104
97static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode) 105static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode,
106 void *data, const struct file_operations *fops)
98{ 107{
99 mode = (mode & S_IALLUGO) | S_IFLNK; 108 mode = (mode & S_IALLUGO) | S_IFLNK;
100 return debugfs_mknod(dir, dentry, mode, 0); 109 return debugfs_mknod(dir, dentry, mode, 0, data, fops);
101} 110}
102 111
103static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode) 112static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode,
113 void *data, const struct file_operations *fops)
104{ 114{
105 int res; 115 int res;
106 116
107 mode = (mode & S_IALLUGO) | S_IFREG; 117 mode = (mode & S_IALLUGO) | S_IFREG;
108 res = debugfs_mknod(dir, dentry, mode, 0); 118 res = debugfs_mknod(dir, dentry, mode, 0, data, fops);
109 if (!res) 119 if (!res)
110 fsnotify_create(dir, dentry); 120 fsnotify_create(dir, dentry);
111 return res; 121 return res;
@@ -139,7 +149,9 @@ static struct file_system_type debug_fs_type = {
139 149
140static int debugfs_create_by_name(const char *name, mode_t mode, 150static int debugfs_create_by_name(const char *name, mode_t mode,
141 struct dentry *parent, 151 struct dentry *parent,
142 struct dentry **dentry) 152 struct dentry **dentry,
153 void *data,
154 const struct file_operations *fops)
143{ 155{
144 int error = 0; 156 int error = 0;
145 157
@@ -164,13 +176,16 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
164 if (!IS_ERR(*dentry)) { 176 if (!IS_ERR(*dentry)) {
165 switch (mode & S_IFMT) { 177 switch (mode & S_IFMT) {
166 case S_IFDIR: 178 case S_IFDIR:
167 error = debugfs_mkdir(parent->d_inode, *dentry, mode); 179 error = debugfs_mkdir(parent->d_inode, *dentry, mode,
180 data, fops);
168 break; 181 break;
169 case S_IFLNK: 182 case S_IFLNK:
170 error = debugfs_link(parent->d_inode, *dentry, mode); 183 error = debugfs_link(parent->d_inode, *dentry, mode,
184 data, fops);
171 break; 185 break;
172 default: 186 default:
173 error = debugfs_create(parent->d_inode, *dentry, mode); 187 error = debugfs_create(parent->d_inode, *dentry, mode,
188 data, fops);
174 break; 189 break;
175 } 190 }
176 dput(*dentry); 191 dput(*dentry);
@@ -184,7 +199,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
184/** 199/**
185 * debugfs_create_file - create a file in the debugfs filesystem 200 * debugfs_create_file - create a file in the debugfs filesystem
186 * @name: a pointer to a string containing the name of the file to create. 201 * @name: a pointer to a string containing the name of the file to create.
187 * @mode: the permission that the file should have 202 * @mode: the permission that the file should have.
188 * @parent: a pointer to the parent dentry for this file. This should be a 203 * @parent: a pointer to the parent dentry for this file. This should be a
189 * directory dentry if set. If this paramater is NULL, then the 204 * directory dentry if set. If this paramater is NULL, then the
190 * file will be created in the root of the debugfs filesystem. 205 * file will be created in the root of the debugfs filesystem.
@@ -195,8 +210,8 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
195 * this file. 210 * this file.
196 * 211 *
197 * This is the basic "create a file" function for debugfs. It allows for a 212 * This is the basic "create a file" function for debugfs. It allows for a
198 * wide range of flexibility in createing a file, or a directory (if you 213 * wide range of flexibility in creating a file, or a directory (if you want
199 * want to create a directory, the debugfs_create_dir() function is 214 * to create a directory, the debugfs_create_dir() function is
200 * recommended to be used instead.) 215 * recommended to be used instead.)
201 * 216 *
202 * This function will return a pointer to a dentry if it succeeds. This 217 * This function will return a pointer to a dentry if it succeeds. This
@@ -221,19 +236,13 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode,
221 if (error) 236 if (error)
222 goto exit; 237 goto exit;
223 238
224 error = debugfs_create_by_name(name, mode, parent, &dentry); 239 error = debugfs_create_by_name(name, mode, parent, &dentry,
240 data, fops);
225 if (error) { 241 if (error) {
226 dentry = NULL; 242 dentry = NULL;
227 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 243 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
228 goto exit; 244 goto exit;
229 } 245 }
230
231 if (dentry->d_inode) {
232 if (data)
233 dentry->d_inode->i_private = data;
234 if (fops)
235 dentry->d_inode->i_fop = fops;
236 }
237exit: 246exit:
238 return dentry; 247 return dentry;
239} 248}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index d5f8c96964b..8882ecc0f1b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -517,11 +517,23 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
517 517
518struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) 518struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
519{ 519{
520 struct dentry *dentry;
521 struct tty_struct *tty;
522
520 BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); 523 BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
521 524
525 /* Ensure dentry has not been deleted by devpts_pty_kill() */
526 dentry = d_find_alias(pts_inode);
527 if (!dentry)
528 return NULL;
529
530 tty = NULL;
522 if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) 531 if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
523 return (struct tty_struct *)pts_inode->i_private; 532 tty = (struct tty_struct *)pts_inode->i_private;
524 return NULL; 533
534 dput(dentry);
535
536 return tty;
525} 537}
526 538
527void devpts_pty_kill(struct tty_struct *tty) 539void devpts_pty_kill(struct tty_struct *tty)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 8b10b87dc01..e82adc2debb 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -53,13 +53,6 @@
53 * 53 *
54 * If blkfactor is zero then the user's request was aligned to the filesystem's 54 * If blkfactor is zero then the user's request was aligned to the filesystem's
55 * blocksize. 55 * blocksize.
56 *
57 * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
58 * This determines whether we need to do the fancy locking which prevents
59 * direct-IO from being able to read uninitialised disk blocks. If its zero
60 * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
61 * not held for the entire direct write (taken briefly, initially, during a
62 * direct read though, but its never held for the duration of a direct-IO).
63 */ 56 */
64 57
65struct dio { 58struct dio {
@@ -68,7 +61,7 @@ struct dio {
68 struct inode *inode; 61 struct inode *inode;
69 int rw; 62 int rw;
70 loff_t i_size; /* i_size when submitted */ 63 loff_t i_size; /* i_size when submitted */
71 int lock_type; /* doesn't change */ 64 int flags; /* doesn't change */
72 unsigned blkbits; /* doesn't change */ 65 unsigned blkbits; /* doesn't change */
73 unsigned blkfactor; /* When we're using an alignment which 66 unsigned blkfactor; /* When we're using an alignment which
74 is finer than the filesystem's soft 67 is finer than the filesystem's soft
@@ -104,6 +97,18 @@ struct dio {
104 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ 97 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
105 sector_t cur_page_block; /* Where it starts */ 98 sector_t cur_page_block; /* Where it starts */
106 99
100 /* BIO completion state */
101 spinlock_t bio_lock; /* protects BIO fields below */
102 unsigned long refcount; /* direct_io_worker() and bios */
103 struct bio *bio_list; /* singly linked via bi_private */
104 struct task_struct *waiter; /* waiting task (NULL if none) */
105
106 /* AIO related stuff */
107 struct kiocb *iocb; /* kiocb */
108 int is_async; /* is IO async ? */
109 int io_error; /* IO error in completion path */
110 ssize_t result; /* IO result */
111
107 /* 112 /*
108 * Page fetching state. These variables belong to dio_refill_pages(). 113 * Page fetching state. These variables belong to dio_refill_pages().
109 */ 114 */
@@ -115,22 +120,16 @@ struct dio {
115 * Page queue. These variables belong to dio_refill_pages() and 120 * Page queue. These variables belong to dio_refill_pages() and
116 * dio_get_page(). 121 * dio_get_page().
117 */ 122 */
118 struct page *pages[DIO_PAGES]; /* page buffer */
119 unsigned head; /* next page to process */ 123 unsigned head; /* next page to process */
120 unsigned tail; /* last valid page + 1 */ 124 unsigned tail; /* last valid page + 1 */
121 int page_errors; /* errno from get_user_pages() */ 125 int page_errors; /* errno from get_user_pages() */
122 126
123 /* BIO completion state */ 127 /*
124 spinlock_t bio_lock; /* protects BIO fields below */ 128 * pages[] (and any fields placed after it) are not zeroed out at
125 unsigned long refcount; /* direct_io_worker() and bios */ 129 * allocation time. Don't add new fields after pages[] unless you
126 struct bio *bio_list; /* singly linked via bi_private */ 130 * wish that they not be zeroed.
127 struct task_struct *waiter; /* waiting task (NULL if none) */ 131 */
128 132 struct page *pages[DIO_PAGES]; /* page buffer */
129 /* AIO related stuff */
130 struct kiocb *iocb; /* kiocb */
131 int is_async; /* is IO async ? */
132 int io_error; /* IO error in completion path */
133 ssize_t result; /* IO result */
134}; 133};
135 134
136/* 135/*
@@ -240,7 +239,8 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
240 if (dio->end_io && dio->result) 239 if (dio->end_io && dio->result)
241 dio->end_io(dio->iocb, offset, transferred, 240 dio->end_io(dio->iocb, offset, transferred,
242 dio->map_bh.b_private); 241 dio->map_bh.b_private);
243 if (dio->lock_type == DIO_LOCKING) 242
243 if (dio->flags & DIO_LOCKING)
244 /* lockdep: non-owner release */ 244 /* lockdep: non-owner release */
245 up_read_non_owner(&dio->inode->i_alloc_sem); 245 up_read_non_owner(&dio->inode->i_alloc_sem);
246 246
@@ -515,21 +515,24 @@ static int get_more_blocks(struct dio *dio)
515 map_bh->b_state = 0; 515 map_bh->b_state = 0;
516 map_bh->b_size = fs_count << dio->inode->i_blkbits; 516 map_bh->b_size = fs_count << dio->inode->i_blkbits;
517 517
518 /*
519 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
520 * forbid block creations: only overwrites are permitted.
521 * We will return early to the caller once we see an
522 * unmapped buffer head returned, and the caller will fall
523 * back to buffered I/O.
524 *
525 * Otherwise the decision is left to the get_blocks method,
526 * which may decide to handle it or also return an unmapped
527 * buffer head.
528 */
518 create = dio->rw & WRITE; 529 create = dio->rw & WRITE;
519 if (dio->lock_type == DIO_LOCKING) { 530 if (dio->flags & DIO_SKIP_HOLES) {
520 if (dio->block_in_file < (i_size_read(dio->inode) >> 531 if (dio->block_in_file < (i_size_read(dio->inode) >>
521 dio->blkbits)) 532 dio->blkbits))
522 create = 0; 533 create = 0;
523 } else if (dio->lock_type == DIO_NO_LOCKING) {
524 create = 0;
525 } 534 }
526 535
527 /*
528 * For writes inside i_size we forbid block creations: only
529 * overwrites are permitted. We fall back to buffered writes
530 * at a higher level for inside-i_size block-instantiating
531 * writes.
532 */
533 ret = (*dio->get_block)(dio->inode, fs_startblk, 536 ret = (*dio->get_block)(dio->inode, fs_startblk,
534 map_bh, create); 537 map_bh, create);
535 } 538 }
@@ -1028,9 +1031,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1028 if (dio->bio) 1031 if (dio->bio)
1029 dio_bio_submit(dio); 1032 dio_bio_submit(dio);
1030 1033
1031 /* All IO is now issued, send it on its way */
1032 blk_run_address_space(inode->i_mapping);
1033
1034 /* 1034 /*
1035 * It is possible that, we return short IO due to end of file. 1035 * It is possible that, we return short IO due to end of file.
1036 * In that case, we need to release all the pages we got hold on. 1036 * In that case, we need to release all the pages we got hold on.
@@ -1042,7 +1042,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1042 * we can let i_mutex go now that its achieved its purpose 1042 * we can let i_mutex go now that its achieved its purpose
1043 * of protecting us from looking up uninitialized blocks. 1043 * of protecting us from looking up uninitialized blocks.
1044 */ 1044 */
1045 if ((rw == READ) && (dio->lock_type == DIO_LOCKING)) 1045 if (rw == READ && (dio->flags & DIO_LOCKING))
1046 mutex_unlock(&dio->inode->i_mutex); 1046 mutex_unlock(&dio->inode->i_mutex);
1047 1047
1048 /* 1048 /*
@@ -1057,8 +1057,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1057 ((rw & READ) || (dio->result == dio->size))) 1057 ((rw & READ) || (dio->result == dio->size)))
1058 ret = -EIOCBQUEUED; 1058 ret = -EIOCBQUEUED;
1059 1059
1060 if (ret != -EIOCBQUEUED) 1060 if (ret != -EIOCBQUEUED) {
1061 /* All IO is now issued, send it on its way */
1062 blk_run_address_space(inode->i_mapping);
1061 dio_await_completion(dio); 1063 dio_await_completion(dio);
1064 }
1062 1065
1063 /* 1066 /*
1064 * Sync will always be dropping the final ref and completing the 1067 * Sync will always be dropping the final ref and completing the
@@ -1086,30 +1089,28 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1086 1089
1087/* 1090/*
1088 * This is a library function for use by filesystem drivers. 1091 * This is a library function for use by filesystem drivers.
1089 * The locking rules are governed by the dio_lock_type parameter.
1090 * 1092 *
1091 * DIO_NO_LOCKING (no locking, for raw block device access) 1093 * The locking rules are governed by the flags parameter:
1092 * For writes, i_mutex is not held on entry; it is never taken. 1094 * - if the flags value contains DIO_LOCKING we use a fancy locking
1095 * scheme for dumb filesystems.
1096 * For writes this function is called under i_mutex and returns with
1097 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1098 * taken and dropped again before returning.
1099 * For reads and writes i_alloc_sem is taken in shared mode and released
1100 * on I/O completion (which may happen asynchronously after returning to
1101 * the caller).
1093 * 1102 *
1094 * DIO_LOCKING (simple locking for regular files) 1103 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1095 * For writes we are called under i_mutex and return with i_mutex held, even 1104 * internal locking but rather rely on the filesystem to synchronize
1096 * though it is internally dropped. 1105 * direct I/O reads/writes versus each other and truncate.
1097 * For reads, i_mutex is not held on entry, but it is taken and dropped before 1106 * For reads and writes both i_mutex and i_alloc_sem are not held on
1098 * returning. 1107 * entry and are never taken.
1099 *
1100 * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
1101 * uninitialised data, allowing parallel direct readers and writers)
1102 * For writes we are called without i_mutex, return without it, never touch it.
1103 * For reads we are called under i_mutex and return with i_mutex held, even
1104 * though it may be internally dropped.
1105 *
1106 * Additional i_alloc_sem locking requirements described inline below.
1107 */ 1108 */
1108ssize_t 1109ssize_t
1109__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1110__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1110 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1111 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1111 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1112 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1112 int dio_lock_type) 1113 int flags)
1113{ 1114{
1114 int seg; 1115 int seg;
1115 size_t size; 1116 size_t size;
@@ -1120,11 +1121,9 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1120 ssize_t retval = -EINVAL; 1121 ssize_t retval = -EINVAL;
1121 loff_t end = offset; 1122 loff_t end = offset;
1122 struct dio *dio; 1123 struct dio *dio;
1123 int release_i_mutex = 0;
1124 int acquire_i_mutex = 0;
1125 1124
1126 if (rw & WRITE) 1125 if (rw & WRITE)
1127 rw = WRITE_ODIRECT; 1126 rw = WRITE_ODIRECT_PLUG;
1128 1127
1129 if (bdev) 1128 if (bdev)
1130 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); 1129 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
@@ -1151,48 +1150,41 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1151 } 1150 }
1152 } 1151 }
1153 1152
1154 dio = kzalloc(sizeof(*dio), GFP_KERNEL); 1153 dio = kmalloc(sizeof(*dio), GFP_KERNEL);
1155 retval = -ENOMEM; 1154 retval = -ENOMEM;
1156 if (!dio) 1155 if (!dio)
1157 goto out; 1156 goto out;
1158
1159 /* 1157 /*
1160 * For block device access DIO_NO_LOCKING is used, 1158 * Believe it or not, zeroing out the page array caused a .5%
1161 * neither readers nor writers do any locking at all 1159 * performance regression in a database benchmark. So, we take
1162 * For regular files using DIO_LOCKING, 1160 * care to only zero out what's needed.
1163 * readers need to grab i_mutex and i_alloc_sem
1164 * writers need to grab i_alloc_sem only (i_mutex is already held)
1165 * For regular files using DIO_OWN_LOCKING,
1166 * neither readers nor writers take any locks here
1167 */ 1161 */
1168 dio->lock_type = dio_lock_type; 1162 memset(dio, 0, offsetof(struct dio, pages));
1169 if (dio_lock_type != DIO_NO_LOCKING) { 1163
1164 dio->flags = flags;
1165 if (dio->flags & DIO_LOCKING) {
1170 /* watch out for a 0 len io from a tricksy fs */ 1166 /* watch out for a 0 len io from a tricksy fs */
1171 if (rw == READ && end > offset) { 1167 if (rw == READ && end > offset) {
1172 struct address_space *mapping; 1168 struct address_space *mapping =
1169 iocb->ki_filp->f_mapping;
1173 1170
1174 mapping = iocb->ki_filp->f_mapping; 1171 /* will be released by direct_io_worker */
1175 if (dio_lock_type != DIO_OWN_LOCKING) { 1172 mutex_lock(&inode->i_mutex);
1176 mutex_lock(&inode->i_mutex);
1177 release_i_mutex = 1;
1178 }
1179 1173
1180 retval = filemap_write_and_wait_range(mapping, offset, 1174 retval = filemap_write_and_wait_range(mapping, offset,
1181 end - 1); 1175 end - 1);
1182 if (retval) { 1176 if (retval) {
1177 mutex_unlock(&inode->i_mutex);
1183 kfree(dio); 1178 kfree(dio);
1184 goto out; 1179 goto out;
1185 } 1180 }
1186
1187 if (dio_lock_type == DIO_OWN_LOCKING) {
1188 mutex_unlock(&inode->i_mutex);
1189 acquire_i_mutex = 1;
1190 }
1191 } 1181 }
1192 1182
1193 if (dio_lock_type == DIO_LOCKING) 1183 /*
1194 /* lockdep: not the owner will release it */ 1184 * Will be released at I/O completion, possibly in a
1195 down_read_non_owner(&inode->i_alloc_sem); 1185 * different thread.
1186 */
1187 down_read_non_owner(&inode->i_alloc_sem);
1196 } 1188 }
1197 1189
1198 /* 1190 /*
@@ -1210,24 +1202,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1210 /* 1202 /*
1211 * In case of error extending write may have instantiated a few 1203 * In case of error extending write may have instantiated a few
1212 * blocks outside i_size. Trim these off again for DIO_LOCKING. 1204 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1213 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by 1205 *
1214 * it's own meaner. 1206 * NOTE: filesystems with their own locking have to handle this
1207 * on their own.
1215 */ 1208 */
1216 if (unlikely(retval < 0 && (rw & WRITE))) { 1209 if (flags & DIO_LOCKING) {
1217 loff_t isize = i_size_read(inode); 1210 if (unlikely((rw & WRITE) && retval < 0)) {
1218 1211 loff_t isize = i_size_read(inode);
1219 if (end > isize && dio_lock_type == DIO_LOCKING) 1212 if (end > isize)
1220 vmtruncate(inode, isize); 1213 vmtruncate(inode, isize);
1214 }
1221 } 1215 }
1222 1216
1223 if (rw == READ && dio_lock_type == DIO_LOCKING)
1224 release_i_mutex = 0;
1225
1226out: 1217out:
1227 if (release_i_mutex)
1228 mutex_unlock(&inode->i_mutex);
1229 else if (acquire_i_mutex)
1230 mutex_lock(&inode->i_mutex);
1231 return retval; 1218 return retval;
1232} 1219}
1233EXPORT_SYMBOL(__blockdev_direct_IO); 1220EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index fd9859f92fa..0df24385081 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -410,10 +410,10 @@ static struct config_group *make_cluster(struct config_group *g,
410 struct dlm_comms *cms = NULL; 410 struct dlm_comms *cms = NULL;
411 void *gps = NULL; 411 void *gps = NULL;
412 412
413 cl = kzalloc(sizeof(struct dlm_cluster), GFP_KERNEL); 413 cl = kzalloc(sizeof(struct dlm_cluster), GFP_NOFS);
414 gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); 414 gps = kcalloc(3, sizeof(struct config_group *), GFP_NOFS);
415 sps = kzalloc(sizeof(struct dlm_spaces), GFP_KERNEL); 415 sps = kzalloc(sizeof(struct dlm_spaces), GFP_NOFS);
416 cms = kzalloc(sizeof(struct dlm_comms), GFP_KERNEL); 416 cms = kzalloc(sizeof(struct dlm_comms), GFP_NOFS);
417 417
418 if (!cl || !gps || !sps || !cms) 418 if (!cl || !gps || !sps || !cms)
419 goto fail; 419 goto fail;
@@ -482,9 +482,9 @@ static struct config_group *make_space(struct config_group *g, const char *name)
482 struct dlm_nodes *nds = NULL; 482 struct dlm_nodes *nds = NULL;
483 void *gps = NULL; 483 void *gps = NULL;
484 484
485 sp = kzalloc(sizeof(struct dlm_space), GFP_KERNEL); 485 sp = kzalloc(sizeof(struct dlm_space), GFP_NOFS);
486 gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); 486 gps = kcalloc(2, sizeof(struct config_group *), GFP_NOFS);
487 nds = kzalloc(sizeof(struct dlm_nodes), GFP_KERNEL); 487 nds = kzalloc(sizeof(struct dlm_nodes), GFP_NOFS);
488 488
489 if (!sp || !gps || !nds) 489 if (!sp || !gps || !nds)
490 goto fail; 490 goto fail;
@@ -536,7 +536,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
536{ 536{
537 struct dlm_comm *cm; 537 struct dlm_comm *cm;
538 538
539 cm = kzalloc(sizeof(struct dlm_comm), GFP_KERNEL); 539 cm = kzalloc(sizeof(struct dlm_comm), GFP_NOFS);
540 if (!cm) 540 if (!cm)
541 return ERR_PTR(-ENOMEM); 541 return ERR_PTR(-ENOMEM);
542 542
@@ -569,7 +569,7 @@ static struct config_item *make_node(struct config_group *g, const char *name)
569 struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); 569 struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent);
570 struct dlm_node *nd; 570 struct dlm_node *nd;
571 571
572 nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); 572 nd = kzalloc(sizeof(struct dlm_node), GFP_NOFS);
573 if (!nd) 573 if (!nd)
574 return ERR_PTR(-ENOMEM); 574 return ERR_PTR(-ENOMEM);
575 575
@@ -705,7 +705,7 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
705 if (cm->addr_count >= DLM_MAX_ADDR_COUNT) 705 if (cm->addr_count >= DLM_MAX_ADDR_COUNT)
706 return -ENOSPC; 706 return -ENOSPC;
707 707
708 addr = kzalloc(sizeof(*addr), GFP_KERNEL); 708 addr = kzalloc(sizeof(*addr), GFP_NOFS);
709 if (!addr) 709 if (!addr)
710 return -ENOMEM; 710 return -ENOMEM;
711 711
@@ -868,7 +868,7 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
868 868
869 ids_count = sp->members_count; 869 ids_count = sp->members_count;
870 870
871 ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL); 871 ids = kcalloc(ids_count, sizeof(int), GFP_NOFS);
872 if (!ids) { 872 if (!ids) {
873 rv = -ENOMEM; 873 rv = -ENOMEM;
874 goto out; 874 goto out;
@@ -886,7 +886,7 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
886 if (!new_count) 886 if (!new_count)
887 goto out_ids; 887 goto out_ids;
888 888
889 new = kcalloc(new_count, sizeof(int), GFP_KERNEL); 889 new = kcalloc(new_count, sizeof(int), GFP_NOFS);
890 if (!new) { 890 if (!new) {
891 kfree(ids); 891 kfree(ids);
892 rv = -ENOMEM; 892 rv = -ENOMEM;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 1c8bb8c3a82..375a2359b3b 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -404,7 +404,7 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos)
404 if (bucket >= ls->ls_rsbtbl_size) 404 if (bucket >= ls->ls_rsbtbl_size)
405 return NULL; 405 return NULL;
406 406
407 ri = kzalloc(sizeof(struct rsbtbl_iter), GFP_KERNEL); 407 ri = kzalloc(sizeof(struct rsbtbl_iter), GFP_NOFS);
408 if (!ri) 408 if (!ri)
409 return NULL; 409 return NULL;
410 if (n == 0) 410 if (n == 0)
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index c4dfa1dcc86..7b84c1dbc82 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -49,8 +49,7 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
49 spin_unlock(&ls->ls_recover_list_lock); 49 spin_unlock(&ls->ls_recover_list_lock);
50 50
51 if (!found) 51 if (!found)
52 de = kzalloc(sizeof(struct dlm_direntry) + len, 52 de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_NOFS);
53 ls->ls_allocation);
54 return de; 53 return de;
55} 54}
56 55
@@ -212,7 +211,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
212 211
213 dlm_dir_clear(ls); 212 dlm_dir_clear(ls);
214 213
215 last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation); 214 last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
216 if (!last_name) 215 if (!last_name)
217 goto out; 216 goto out;
218 217
@@ -323,7 +322,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
323 if (namelen > DLM_RESNAME_MAXLEN) 322 if (namelen > DLM_RESNAME_MAXLEN)
324 return -EINVAL; 323 return -EINVAL;
325 324
326 de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation); 325 de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_NOFS);
327 if (!de) 326 if (!de)
328 return -ENOMEM; 327 return -ENOMEM;
329 328
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index d01ca0a711d..826d3dc6e0a 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -473,7 +473,6 @@ struct dlm_ls {
473 int ls_low_nodeid; 473 int ls_low_nodeid;
474 int ls_total_weight; 474 int ls_total_weight;
475 int *ls_node_array; 475 int *ls_node_array;
476 gfp_t ls_allocation;
477 476
478 struct dlm_rsb ls_stub_rsb; /* for returning errors */ 477 struct dlm_rsb ls_stub_rsb; /* for returning errors */
479 struct dlm_lkb ls_stub_lkb; /* for returning errors */ 478 struct dlm_lkb ls_stub_lkb; /* for returning errors */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index eb507c453c5..9c0c1db1e10 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -2689,7 +2689,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
2689 pass into lowcomms_commit and a message buffer (mb) that we 2689 pass into lowcomms_commit and a message buffer (mb) that we
2690 write our data into */ 2690 write our data into */
2691 2691
2692 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb); 2692 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
2693 if (!mh) 2693 if (!mh)
2694 return -ENOBUFS; 2694 return -ENOBUFS;
2695 2695
@@ -4512,7 +4512,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4512 } 4512 }
4513 4513
4514 if (flags & DLM_LKF_VALBLK) { 4514 if (flags & DLM_LKF_VALBLK) {
4515 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 4515 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
4516 if (!ua->lksb.sb_lvbptr) { 4516 if (!ua->lksb.sb_lvbptr) {
4517 kfree(ua); 4517 kfree(ua);
4518 __put_lkb(ls, lkb); 4518 __put_lkb(ls, lkb);
@@ -4582,7 +4582,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4582 ua = lkb->lkb_ua; 4582 ua = lkb->lkb_ua;
4583 4583
4584 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { 4584 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
4585 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 4585 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
4586 if (!ua->lksb.sb_lvbptr) { 4586 if (!ua->lksb.sb_lvbptr) {
4587 error = -ENOMEM; 4587 error = -ENOMEM;
4588 goto out_put; 4588 goto out_put;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index d489fcc8671..c010ecfc0d2 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -430,7 +430,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
430 430
431 error = -ENOMEM; 431 error = -ENOMEM;
432 432
433 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); 433 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
434 if (!ls) 434 if (!ls)
435 goto out; 435 goto out;
436 memcpy(ls->ls_name, name, namelen); 436 memcpy(ls->ls_name, name, namelen);
@@ -443,11 +443,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
443 if (flags & DLM_LSFL_TIMEWARN) 443 if (flags & DLM_LSFL_TIMEWARN)
444 set_bit(LSFL_TIMEWARN, &ls->ls_flags); 444 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
445 445
446 if (flags & DLM_LSFL_FS)
447 ls->ls_allocation = GFP_NOFS;
448 else
449 ls->ls_allocation = GFP_KERNEL;
450
451 /* ls_exflags are forced to match among nodes, and we don't 446 /* ls_exflags are forced to match among nodes, and we don't
452 need to require all nodes to have some flags set */ 447 need to require all nodes to have some flags set */
453 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS | 448 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
@@ -456,7 +451,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
456 size = dlm_config.ci_rsbtbl_size; 451 size = dlm_config.ci_rsbtbl_size;
457 ls->ls_rsbtbl_size = size; 452 ls->ls_rsbtbl_size = size;
458 453
459 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); 454 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS);
460 if (!ls->ls_rsbtbl) 455 if (!ls->ls_rsbtbl)
461 goto out_lsfree; 456 goto out_lsfree;
462 for (i = 0; i < size; i++) { 457 for (i = 0; i < size; i++) {
@@ -468,7 +463,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
468 size = dlm_config.ci_lkbtbl_size; 463 size = dlm_config.ci_lkbtbl_size;
469 ls->ls_lkbtbl_size = size; 464 ls->ls_lkbtbl_size = size;
470 465
471 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); 466 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS);
472 if (!ls->ls_lkbtbl) 467 if (!ls->ls_lkbtbl)
473 goto out_rsbfree; 468 goto out_rsbfree;
474 for (i = 0; i < size; i++) { 469 for (i = 0; i < size; i++) {
@@ -480,7 +475,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
480 size = dlm_config.ci_dirtbl_size; 475 size = dlm_config.ci_dirtbl_size;
481 ls->ls_dirtbl_size = size; 476 ls->ls_dirtbl_size = size;
482 477
483 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); 478 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS);
484 if (!ls->ls_dirtbl) 479 if (!ls->ls_dirtbl)
485 goto out_lkbfree; 480 goto out_lkbfree;
486 for (i = 0; i < size; i++) { 481 for (i = 0; i < size; i++) {
@@ -527,7 +522,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
527 mutex_init(&ls->ls_requestqueue_mutex); 522 mutex_init(&ls->ls_requestqueue_mutex);
528 mutex_init(&ls->ls_clear_proc_locks); 523 mutex_init(&ls->ls_clear_proc_locks);
529 524
530 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); 525 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
531 if (!ls->ls_recover_buf) 526 if (!ls->ls_recover_buf)
532 goto out_dirfree; 527 goto out_dirfree;
533 528
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 70736eb4b51..52cab160893 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1060,7 +1060,7 @@ static void init_local(void)
1060 if (dlm_our_addr(&sas, i)) 1060 if (dlm_our_addr(&sas, i))
1061 break; 1061 break;
1062 1062
1063 addr = kmalloc(sizeof(*addr), GFP_KERNEL); 1063 addr = kmalloc(sizeof(*addr), GFP_NOFS);
1064 if (!addr) 1064 if (!addr)
1065 break; 1065 break;
1066 memcpy(addr, &sas, sizeof(*addr)); 1066 memcpy(addr, &sas, sizeof(*addr));
@@ -1099,7 +1099,7 @@ static int sctp_listen_for_all(void)
1099 struct sockaddr_storage localaddr; 1099 struct sockaddr_storage localaddr;
1100 struct sctp_event_subscribe subscribe; 1100 struct sctp_event_subscribe subscribe;
1101 int result = -EINVAL, num = 1, i, addr_len; 1101 int result = -EINVAL, num = 1, i, addr_len;
1102 struct connection *con = nodeid2con(0, GFP_KERNEL); 1102 struct connection *con = nodeid2con(0, GFP_NOFS);
1103 int bufsize = NEEDED_RMEM; 1103 int bufsize = NEEDED_RMEM;
1104 1104
1105 if (!con) 1105 if (!con)
@@ -1171,7 +1171,7 @@ out:
1171static int tcp_listen_for_all(void) 1171static int tcp_listen_for_all(void)
1172{ 1172{
1173 struct socket *sock = NULL; 1173 struct socket *sock = NULL;
1174 struct connection *con = nodeid2con(0, GFP_KERNEL); 1174 struct connection *con = nodeid2con(0, GFP_NOFS);
1175 int result = -EINVAL; 1175 int result = -EINVAL;
1176 1176
1177 if (!con) 1177 if (!con)
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index b128775913b..84f70bfb0ba 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -48,7 +48,7 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid)
48 struct dlm_member *memb; 48 struct dlm_member *memb;
49 int w, error; 49 int w, error;
50 50
51 memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); 51 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
52 if (!memb) 52 if (!memb)
53 return -ENOMEM; 53 return -ENOMEM;
54 54
@@ -143,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls)
143 143
144 ls->ls_total_weight = total; 144 ls->ls_total_weight = total;
145 145
146 array = kmalloc(sizeof(int) * total, ls->ls_allocation); 146 array = kmalloc(sizeof(int) * total, GFP_NOFS);
147 if (!array) 147 if (!array)
148 return; 148 return;
149 149
@@ -226,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
226 continue; 226 continue;
227 log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); 227 log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
228 228
229 memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); 229 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
230 if (!memb) 230 if (!memb)
231 return -ENOMEM; 231 return -ENOMEM;
232 memb->nodeid = rv->new[i]; 232 memb->nodeid = rv->new[i];
@@ -341,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls)
341 int *ids = NULL, *new = NULL; 341 int *ids = NULL, *new = NULL;
342 int error, ids_count = 0, new_count = 0; 342 int error, ids_count = 0, new_count = 0;
343 343
344 rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation); 344 rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS);
345 if (!rv) 345 if (!rv)
346 return -ENOMEM; 346 return -ENOMEM;
347 347
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index c1775b84eba..8e0d00db004 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -39,7 +39,7 @@ char *dlm_allocate_lvb(struct dlm_ls *ls)
39{ 39{
40 char *p; 40 char *p;
41 41
42 p = kzalloc(ls->ls_lvblen, ls->ls_allocation); 42 p = kzalloc(ls->ls_lvblen, GFP_NOFS);
43 return p; 43 return p;
44} 44}
45 45
@@ -57,7 +57,7 @@ struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
57 57
58 DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); 58 DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
59 59
60 r = kzalloc(sizeof(*r) + namelen, ls->ls_allocation); 60 r = kzalloc(sizeof(*r) + namelen, GFP_NOFS);
61 return r; 61 return r;
62} 62}
63 63
@@ -72,7 +72,7 @@ struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
72{ 72{
73 struct dlm_lkb *lkb; 73 struct dlm_lkb *lkb;
74 74
75 lkb = kmem_cache_zalloc(lkb_cache, ls->ls_allocation); 75 lkb = kmem_cache_zalloc(lkb_cache, GFP_NOFS);
76 return lkb; 76 return lkb;
77} 77}
78 78
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 55ea369f43a..052095cd592 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -26,7 +26,7 @@ static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
26 struct sk_buff *skb; 26 struct sk_buff *skb;
27 void *data; 27 void *data;
28 28
29 skb = genlmsg_new(size, GFP_KERNEL); 29 skb = genlmsg_new(size, GFP_NOFS);
30 if (!skb) 30 if (!skb)
31 return -ENOMEM; 31 return -ENOMEM;
32 32
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 16f682e26c0..b5f89aef3b2 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -82,7 +82,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
82 if (!ls) 82 if (!ls)
83 return -EINVAL; 83 return -EINVAL;
84 84
85 xop = kzalloc(sizeof(*xop), GFP_KERNEL); 85 xop = kzalloc(sizeof(*xop), GFP_NOFS);
86 if (!xop) { 86 if (!xop) {
87 rv = -ENOMEM; 87 rv = -ENOMEM;
88 goto out; 88 goto out;
@@ -143,7 +143,7 @@ out:
143} 143}
144EXPORT_SYMBOL_GPL(dlm_posix_lock); 144EXPORT_SYMBOL_GPL(dlm_posix_lock);
145 145
146/* Returns failure iff a succesful lock operation should be canceled */ 146/* Returns failure iff a successful lock operation should be canceled */
147static int dlm_plock_callback(struct plock_op *op) 147static int dlm_plock_callback(struct plock_op *op)
148{ 148{
149 struct file *file; 149 struct file *file;
@@ -211,7 +211,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
211 if (!ls) 211 if (!ls)
212 return -EINVAL; 212 return -EINVAL;
213 213
214 op = kzalloc(sizeof(*op), GFP_KERNEL); 214 op = kzalloc(sizeof(*op), GFP_NOFS);
215 if (!op) { 215 if (!op) {
216 rv = -ENOMEM; 216 rv = -ENOMEM;
217 goto out; 217 goto out;
@@ -266,7 +266,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
266 if (!ls) 266 if (!ls)
267 return -EINVAL; 267 return -EINVAL;
268 268
269 op = kzalloc(sizeof(*op), GFP_KERNEL); 269 op = kzalloc(sizeof(*op), GFP_NOFS);
270 if (!op) { 270 if (!op) {
271 rv = -ENOMEM; 271 rv = -ENOMEM;
272 goto out; 272 goto out;
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 67522c268c1..3c83a49a48a 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
38 char *mb; 38 char *mb;
39 int mb_len = sizeof(struct dlm_rcom) + len; 39 int mb_len = sizeof(struct dlm_rcom) + len;
40 40
41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb); 41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
42 if (!mh) { 42 if (!mh) {
43 log_print("create_rcom to %d type %d len %d ENOBUFS", 43 log_print("create_rcom to %d type %d len %d ENOBUFS",
44 to_nodeid, type, len); 44 to_nodeid, type, len);
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 7a2307c0891..a44fa22890e 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
35 struct rq_entry *e; 35 struct rq_entry *e;
36 int length = ms->m_header.h_length - sizeof(struct dlm_message); 36 int length = ms->m_header.h_length - sizeof(struct dlm_message);
37 37
38 e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation); 38 e = kmalloc(sizeof(struct rq_entry) + length, GFP_NOFS);
39 if (!e) { 39 if (!e) {
40 log_print("dlm_add_requestqueue: out of memory len %d", length); 40 log_print("dlm_add_requestqueue: out of memory len %d", length);
41 return; 41 return;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index ebce994ab0b..e73a4bb572a 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -267,7 +267,7 @@ static int device_user_lock(struct dlm_user_proc *proc,
267 goto out; 267 goto out;
268 } 268 }
269 269
270 ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL); 270 ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
271 if (!ua) 271 if (!ua)
272 goto out; 272 goto out;
273 ua->proc = proc; 273 ua->proc = proc;
@@ -307,7 +307,7 @@ static int device_user_unlock(struct dlm_user_proc *proc,
307 if (!ls) 307 if (!ls)
308 return -ENOENT; 308 return -ENOENT;
309 309
310 ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL); 310 ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
311 if (!ua) 311 if (!ua)
312 goto out; 312 goto out;
313 ua->proc = proc; 313 ua->proc = proc;
@@ -352,7 +352,7 @@ static int dlm_device_register(struct dlm_ls *ls, char *name)
352 352
353 error = -ENOMEM; 353 error = -ENOMEM;
354 len = strlen(name) + strlen(name_prefix) + 2; 354 len = strlen(name) + strlen(name_prefix) + 2;
355 ls->ls_device.name = kzalloc(len, GFP_KERNEL); 355 ls->ls_device.name = kzalloc(len, GFP_NOFS);
356 if (!ls->ls_device.name) 356 if (!ls->ls_device.name)
357 goto fail; 357 goto fail;
358 358
@@ -520,7 +520,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
520#endif 520#endif
521 return -EINVAL; 521 return -EINVAL;
522 522
523 kbuf = kzalloc(count + 1, GFP_KERNEL); 523 kbuf = kzalloc(count + 1, GFP_NOFS);
524 if (!kbuf) 524 if (!kbuf)
525 return -ENOMEM; 525 return -ENOMEM;
526 526
@@ -546,7 +546,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
546 546
547 /* add 1 after namelen so that the name string is terminated */ 547 /* add 1 after namelen so that the name string is terminated */
548 kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1, 548 kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1,
549 GFP_KERNEL); 549 GFP_NOFS);
550 if (!kbuf) { 550 if (!kbuf) {
551 kfree(k32buf); 551 kfree(k32buf);
552 return -ENOMEM; 552 return -ENOMEM;
@@ -648,7 +648,7 @@ static int device_open(struct inode *inode, struct file *file)
648 if (!ls) 648 if (!ls)
649 return -ENOENT; 649 return -ENOENT;
650 650
651 proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); 651 proc = kzalloc(sizeof(struct dlm_user_proc), GFP_NOFS);
652 if (!proc) { 652 if (!proc) {
653 dlm_put_lockspace(ls); 653 dlm_put_lockspace(ls);
654 return -ENOMEM; 654 return -ENOMEM;
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75b..8f006a0d607 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
62 struct inode *lower_inode = 62 struct inode *lower_inode =
63 ecryptfs_inode_to_lower(dentry->d_inode); 63 ecryptfs_inode_to_lower(dentry->d_inode);
64 64
65 fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL); 65 fsstack_copy_attr_all(dentry->d_inode, lower_inode);
66 } 66 }
67out: 67out:
68 return rc; 68 return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0d..7f854503293 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
626 lower_new_dir_dentry->d_inode, lower_new_dentry); 626 lower_new_dir_dentry->d_inode, lower_new_dentry);
627 if (rc) 627 if (rc)
628 goto out_lock; 628 goto out_lock;
629 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); 629 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
630 if (new_dir != old_dir) 630 if (new_dir != old_dir)
631 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL); 631 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
632out_lock: 632out_lock:
633 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 633 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
634 dput(lower_new_dentry->d_parent); 634 dput(lower_new_dentry->d_parent);
@@ -715,31 +715,31 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
715 /* Released in ecryptfs_put_link(); only release here on error */ 715 /* Released in ecryptfs_put_link(); only release here on error */
716 buf = kmalloc(len, GFP_KERNEL); 716 buf = kmalloc(len, GFP_KERNEL);
717 if (!buf) { 717 if (!buf) {
718 rc = -ENOMEM; 718 buf = ERR_PTR(-ENOMEM);
719 goto out; 719 goto out;
720 } 720 }
721 old_fs = get_fs(); 721 old_fs = get_fs();
722 set_fs(get_ds()); 722 set_fs(get_ds());
723 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 723 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
724 set_fs(old_fs); 724 set_fs(old_fs);
725 if (rc < 0) 725 if (rc < 0) {
726 goto out_free; 726 kfree(buf);
727 else 727 buf = ERR_PTR(rc);
728 } else
728 buf[rc] = '\0'; 729 buf[rc] = '\0';
729 rc = 0;
730 nd_set_link(nd, buf);
731 goto out;
732out_free:
733 kfree(buf);
734out: 730out:
735 return ERR_PTR(rc); 731 nd_set_link(nd, buf);
732 return NULL;
736} 733}
737 734
738static void 735static void
739ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) 736ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
740{ 737{
741 /* Free the char* */ 738 char *buf = nd_get_link(nd);
742 kfree(nd_get_link(nd)); 739 if (!IS_ERR(buf)) {
740 /* Free the char* */
741 kfree(buf);
742 }
743} 743}
744 744
745/** 745/**
@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
967 rc = notify_change(lower_dentry, ia); 967 rc = notify_change(lower_dentry, ia);
968 mutex_unlock(&lower_dentry->d_inode->i_mutex); 968 mutex_unlock(&lower_dentry->d_inode->i_mutex);
969out: 969out:
970 fsstack_copy_attr_all(inode, lower_inode, NULL); 970 fsstack_copy_attr_all(inode, lower_inode);
971 return rc; 971 return rc;
972} 972}
973 973
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c6ac85d6c70..567bc4b9f70 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -35,7 +35,6 @@
35#include <linux/key.h> 35#include <linux/key.h>
36#include <linux/parser.h> 36#include <linux/parser.h>
37#include <linux/fs_stack.h> 37#include <linux/fs_stack.h>
38#include <linux/ima.h>
39#include "ecryptfs_kernel.h" 38#include "ecryptfs_kernel.h"
40 39
41/** 40/**
@@ -119,7 +118,6 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
119 const struct cred *cred = current_cred(); 118 const struct cred *cred = current_cred();
120 struct ecryptfs_inode_info *inode_info = 119 struct ecryptfs_inode_info *inode_info =
121 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 120 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
122 int opened_lower_file = 0;
123 int rc = 0; 121 int rc = 0;
124 122
125 mutex_lock(&inode_info->lower_file_mutex); 123 mutex_lock(&inode_info->lower_file_mutex);
@@ -136,12 +134,9 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
136 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 134 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
137 "rc = [%d]\n", lower_dentry, lower_mnt, rc); 135 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
138 inode_info->lower_file = NULL; 136 inode_info->lower_file = NULL;
139 } else 137 }
140 opened_lower_file = 1;
141 } 138 }
142 mutex_unlock(&inode_info->lower_file_mutex); 139 mutex_unlock(&inode_info->lower_file_mutex);
143 if (opened_lower_file)
144 ima_counts_get(inode_info->lower_file);
145 return rc; 140 return rc;
146} 141}
147 142
@@ -194,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
194 init_special_inode(inode, lower_inode->i_mode, 189 init_special_inode(inode, lower_inode->i_mode,
195 lower_inode->i_rdev); 190 lower_inode->i_rdev);
196 dentry->d_op = &ecryptfs_dops; 191 dentry->d_op = &ecryptfs_dops;
197 fsstack_copy_attr_all(inode, lower_inode, NULL); 192 fsstack_copy_attr_all(inode, lower_inode);
198 /* This size will be overwritten for real files w/ headers and 193 /* This size will be overwritten for real files w/ headers and
199 * other metadata */ 194 * other metadata */
200 fsstack_copy_inode_size(inode, lower_inode); 195 fsstack_copy_inode_size(inode, lower_inode);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8b47e4200e6..d26402ff06e 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -339,7 +339,7 @@ struct file *eventfd_file_create(unsigned int count, int flags)
339 ctx->flags = flags; 339 ctx->flags = flags;
340 340
341 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, 341 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
342 flags & EFD_SHARED_FCNTL_FLAGS); 342 O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
343 if (IS_ERR(file)) 343 if (IS_ERR(file))
344 eventfd_free_ctx(ctx); 344 eventfd_free_ctx(ctx);
345 345
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 085c5c06342..bd056a5b4ef 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -251,10 +251,10 @@ ctl_table epoll_table[] = {
251 .data = &max_user_watches, 251 .data = &max_user_watches,
252 .maxlen = sizeof(int), 252 .maxlen = sizeof(int),
253 .mode = 0644, 253 .mode = 0644,
254 .proc_handler = &proc_dointvec_minmax, 254 .proc_handler = proc_dointvec_minmax,
255 .extra1 = &zero, 255 .extra1 = &zero,
256 }, 256 },
257 { .ctl_name = 0 } 257 { }
258}; 258};
259#endif /* CONFIG_SYSCTL */ 259#endif /* CONFIG_SYSCTL */
260 260
@@ -1206,7 +1206,7 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1206 * a file structure and a free file descriptor. 1206 * a file structure and a free file descriptor.
1207 */ 1207 */
1208 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1208 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1209 flags & O_CLOEXEC); 1209 O_RDWR | (flags & O_CLOEXEC));
1210 if (error < 0) 1210 if (error < 0)
1211 ep_free(ep); 1211 ep_free(ep);
1212 1212
diff --git a/fs/exec.c b/fs/exec.c
index ba112bd4a33..632b02e34ec 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -46,7 +46,6 @@
46#include <linux/proc_fs.h> 46#include <linux/proc_fs.h>
47#include <linux/mount.h> 47#include <linux/mount.h>
48#include <linux/security.h> 48#include <linux/security.h>
49#include <linux/ima.h>
50#include <linux/syscalls.h> 49#include <linux/syscalls.h>
51#include <linux/tsacct_kern.h> 50#include <linux/tsacct_kern.h>
52#include <linux/cn_proc.h> 51#include <linux/cn_proc.h>
@@ -827,7 +826,9 @@ static int de_thread(struct task_struct *tsk)
827 attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); 826 attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
828 transfer_pid(leader, tsk, PIDTYPE_PGID); 827 transfer_pid(leader, tsk, PIDTYPE_PGID);
829 transfer_pid(leader, tsk, PIDTYPE_SID); 828 transfer_pid(leader, tsk, PIDTYPE_SID);
829
830 list_replace_rcu(&leader->tasks, &tsk->tasks); 830 list_replace_rcu(&leader->tasks, &tsk->tasks);
831 list_replace_init(&leader->sibling, &tsk->sibling);
831 832
832 tsk->group_leader = tsk; 833 tsk->group_leader = tsk;
833 leader->group_leader = tsk; 834 leader->group_leader = tsk;
@@ -924,6 +925,15 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
924void set_task_comm(struct task_struct *tsk, char *buf) 925void set_task_comm(struct task_struct *tsk, char *buf)
925{ 926{
926 task_lock(tsk); 927 task_lock(tsk);
928
929 /*
930 * Threads may access current->comm without holding
931 * the task lock, so write the string carefully.
932 * Readers without a lock may see incomplete new
933 * names but are safe from non-terminating string reads.
934 */
935 memset(tsk->comm, 0, TASK_COMM_LEN);
936 wmb();
927 strlcpy(tsk->comm, buf, sizeof(tsk->comm)); 937 strlcpy(tsk->comm, buf, sizeof(tsk->comm));
928 task_unlock(tsk); 938 task_unlock(tsk);
929 perf_event_comm(tsk); 939 perf_event_comm(tsk);
@@ -1209,9 +1219,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1209 retval = security_bprm_check(bprm); 1219 retval = security_bprm_check(bprm);
1210 if (retval) 1220 if (retval)
1211 return retval; 1221 return retval;
1212 retval = ima_bprm_check(bprm);
1213 if (retval)
1214 return retval;
1215 1222
1216 /* kernel module loader fixup */ 1223 /* kernel module loader fixup */
1217 /* so we don't try to load run modprobe in kernel space. */ 1224 /* so we don't try to load run modprobe in kernel space. */
@@ -1756,17 +1763,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1756 struct mm_struct *mm = current->mm; 1763 struct mm_struct *mm = current->mm;
1757 struct linux_binfmt * binfmt; 1764 struct linux_binfmt * binfmt;
1758 struct inode * inode; 1765 struct inode * inode;
1759 struct file * file;
1760 const struct cred *old_cred; 1766 const struct cred *old_cred;
1761 struct cred *cred; 1767 struct cred *cred;
1762 int retval = 0; 1768 int retval = 0;
1763 int flag = 0; 1769 int flag = 0;
1764 int ispipe = 0; 1770 int ispipe = 0;
1765 unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1766 char **helper_argv = NULL; 1771 char **helper_argv = NULL;
1767 int helper_argc = 0; 1772 int helper_argc = 0;
1768 int dump_count = 0; 1773 int dump_count = 0;
1769 static atomic_t core_dump_count = ATOMIC_INIT(0); 1774 static atomic_t core_dump_count = ATOMIC_INIT(0);
1775 struct coredump_params cprm = {
1776 .signr = signr,
1777 .regs = regs,
1778 .limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
1779 };
1770 1780
1771 audit_core_dumps(signr); 1781 audit_core_dumps(signr);
1772 1782
@@ -1822,15 +1832,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1822 ispipe = format_corename(corename, signr); 1832 ispipe = format_corename(corename, signr);
1823 unlock_kernel(); 1833 unlock_kernel();
1824 1834
1825 if ((!ispipe) && (core_limit < binfmt->min_coredump)) 1835 if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
1826 goto fail_unlock; 1836 goto fail_unlock;
1827 1837
1828 if (ispipe) { 1838 if (ispipe) {
1829 if (core_limit == 0) { 1839 if (cprm.limit == 0) {
1830 /* 1840 /*
1831 * Normally core limits are irrelevant to pipes, since 1841 * Normally core limits are irrelevant to pipes, since
1832 * we're not writing to the file system, but we use 1842 * we're not writing to the file system, but we use
1833 * core_limit of 0 here as a speacial value. Any 1843 * cprm.limit of 0 here as a speacial value. Any
1834 * non-zero limit gets set to RLIM_INFINITY below, but 1844 * non-zero limit gets set to RLIM_INFINITY below, but
1835 * a limit of 0 skips the dump. This is a consistent 1845 * a limit of 0 skips the dump. This is a consistent
1836 * way to catch recursive crashes. We can still crash 1846 * way to catch recursive crashes. We can still crash
@@ -1863,25 +1873,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1863 goto fail_dropcount; 1873 goto fail_dropcount;
1864 } 1874 }
1865 1875
1866 core_limit = RLIM_INFINITY; 1876 cprm.limit = RLIM_INFINITY;
1867 1877
1868 /* SIGPIPE can happen, but it's just never processed */ 1878 /* SIGPIPE can happen, but it's just never processed */
1869 if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, 1879 if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
1870 &file)) { 1880 &cprm.file)) {
1871 printk(KERN_INFO "Core dump to %s pipe failed\n", 1881 printk(KERN_INFO "Core dump to %s pipe failed\n",
1872 corename); 1882 corename);
1873 goto fail_dropcount; 1883 goto fail_dropcount;
1874 } 1884 }
1875 } else 1885 } else
1876 file = filp_open(corename, 1886 cprm.file = filp_open(corename,
1877 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 1887 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
1878 0600); 1888 0600);
1879 if (IS_ERR(file)) 1889 if (IS_ERR(cprm.file))
1880 goto fail_dropcount; 1890 goto fail_dropcount;
1881 inode = file->f_path.dentry->d_inode; 1891 inode = cprm.file->f_path.dentry->d_inode;
1882 if (inode->i_nlink > 1) 1892 if (inode->i_nlink > 1)
1883 goto close_fail; /* multiple links - don't dump */ 1893 goto close_fail; /* multiple links - don't dump */
1884 if (!ispipe && d_unhashed(file->f_path.dentry)) 1894 if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
1885 goto close_fail; 1895 goto close_fail;
1886 1896
1887 /* AK: actually i see no reason to not allow this for named pipes etc., 1897 /* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1894,21 +1904,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1894 */ 1904 */
1895 if (inode->i_uid != current_fsuid()) 1905 if (inode->i_uid != current_fsuid())
1896 goto close_fail; 1906 goto close_fail;
1897 if (!file->f_op) 1907 if (!cprm.file->f_op)
1898 goto close_fail; 1908 goto close_fail;
1899 if (!file->f_op->write) 1909 if (!cprm.file->f_op->write)
1900 goto close_fail; 1910 goto close_fail;
1901 if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0) 1911 if (!ispipe &&
1912 do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
1902 goto close_fail; 1913 goto close_fail;
1903 1914
1904 retval = binfmt->core_dump(signr, regs, file, core_limit); 1915 retval = binfmt->core_dump(&cprm);
1905 1916
1906 if (retval) 1917 if (retval)
1907 current->signal->group_exit_code |= 0x80; 1918 current->signal->group_exit_code |= 0x80;
1908close_fail: 1919close_fail:
1909 if (ispipe && core_pipe_limit) 1920 if (ispipe && core_pipe_limit)
1910 wait_for_dump_helpers(file); 1921 wait_for_dump_helpers(cprm.file);
1911 filp_close(file, NULL); 1922 filp_close(cprm.file, NULL);
1912fail_dropcount: 1923fail_dropcount:
1913 if (dump_count) 1924 if (dump_count)
1914 atomic_dec(&core_dump_count); 1925 atomic_dec(&core_dump_count);
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index cc2d22db119..2d0f757fda3 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -12,5 +12,5 @@
12# Kbuild - Gets included from the Kernels Makefile and build system 12# Kbuild - Gets included from the Kernels Makefile and build system
13# 13#
14 14
15exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o 15exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
16obj-$(CONFIG_EXOFS_FS) += exofs.o 16obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index c6718e4817f..b1b178e6171 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -49,6 +49,7 @@
49#define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ 49#define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */
50#define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ 50#define EXOFS_OBJ_OFF 0x10000 /* offset for objects */
51#define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ 51#define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */
52#define EXOFS_DEVTABLE_ID 0x10001 /* object ID for on-disk device table */
52#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ 53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
53 54
54/* exofs Application specific page/attribute */ 55/* exofs Application specific page/attribute */
@@ -78,17 +79,67 @@ enum {
78#define EXOFS_SUPER_MAGIC 0x5DF5 79#define EXOFS_SUPER_MAGIC 0x5DF5
79 80
80/* 81/*
81 * The file system control block - stored in an object's data (mainly, the one 82 * The file system control block - stored in object EXOFS_SUPER_ID's data.
82 * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored 83 * This is where the in-memory superblock is stored on disk.
83 * on disk. Right now it just has a magic value, which is basically a sanity
84 * check on our ability to communicate with the object store.
85 */ 84 */
85enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
86struct exofs_fscb { 86struct exofs_fscb {
87 __le64 s_nextid; /* Highest object ID used */ 87 __le64 s_nextid; /* Highest object ID used */
88 __le32 s_numfiles; /* Number of files on fs */ 88 __le64 s_numfiles; /* Number of files on fs */
89 __le32 s_version; /* == EXOFS_FSCB_VER */
89 __le16 s_magic; /* Magic signature */ 90 __le16 s_magic; /* Magic signature */
90 __le16 s_newfs; /* Non-zero if this is a new fs */ 91 __le16 s_newfs; /* Non-zero if this is a new fs */
91}; 92
93 /* From here on it's a static part, only written by mkexofs */
94 __le64 s_dev_table_oid; /* Resurved, not used */
95 __le64 s_dev_table_count; /* == 0 means no dev_table */
96} __packed;
97
98/*
99 * Describes the raid used in the FS. It is part of the device table.
100 * This here is taken from the pNFS-objects definition. In exofs we
101 * use one raid policy through-out the filesystem. (NOTE: the funny
102 * alignment at begining. We take care of it at exofs_device_table.
103 */
104struct exofs_dt_data_map {
105 __le32 cb_num_comps;
106 __le64 cb_stripe_unit;
107 __le32 cb_group_width;
108 __le32 cb_group_depth;
109 __le32 cb_mirror_cnt;
110 __le32 cb_raid_algorithm;
111} __packed;
112
113/*
114 * This is an osd device information descriptor. It is a single entry in
115 * the exofs device table. It describes an osd target lun which
116 * contains data belonging to this FS. (Same partition_id on all devices)
117 */
118struct exofs_dt_device_info {
119 __le32 systemid_len;
120 u8 systemid[OSD_SYSTEMID_LEN];
121 __le64 long_name_offset; /* If !0 then offset-in-file */
122 __le32 osdname_len; /* */
123 u8 osdname[44]; /* Embbeded, Ususally an asci uuid */
124} __packed;
125
126/*
127 * The EXOFS device table - stored in object EXOFS_DEVTABLE_ID's data.
128 * It contains the raid used for this multy-device FS and an array of
129 * participating devices.
130 */
131struct exofs_device_table {
132 __le32 dt_version; /* == EXOFS_DT_VER */
133 struct exofs_dt_data_map dt_data_map; /* Raid policy to use */
134
135 /* Resurved space For future use. Total includeing this:
136 * (8 * sizeof(le64))
137 */
138 __le64 __Resurved[4];
139
140 __le64 dt_num_devices; /* Array size */
141 struct exofs_dt_device_info dt_dev_table[]; /* Array of devices */
142} __packed;
92 143
93/**************************************************************************** 144/****************************************************************************
94 * inode-related things 145 * inode-related things
@@ -155,22 +206,4 @@ enum {
155 (((name_len) + offsetof(struct exofs_dir_entry, name) + \ 206 (((name_len) + offsetof(struct exofs_dir_entry, name) + \
156 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) 207 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
157 208
158/*************************
159 * function declarations *
160 *************************/
161/* osd.c */
162void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
163 const struct osd_obj_id *obj);
164
165int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid);
166static inline int exofs_check_ok(struct osd_request *or)
167{
168 return exofs_check_ok_resid(or, NULL, NULL);
169}
170int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred);
171int exofs_async_op(struct osd_request *or,
172 osd_req_done_fn *async_done, void *caller_context, u8 *cred);
173
174int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
175
176#endif /*ifndef __EXOFS_COM_H__*/ 209#endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 5ec72e020b2..c35fd462398 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -30,13 +30,17 @@
30 * along with exofs; if not, write to the Free Software 30 * along with exofs; if not, write to the Free Software
31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32 */ 32 */
33#ifndef __EXOFS_H__
34#define __EXOFS_H__
33 35
34#include <linux/fs.h> 36#include <linux/fs.h>
35#include <linux/time.h> 37#include <linux/time.h>
36#include "common.h" 38#include "common.h"
37 39
38#ifndef __EXOFS_H__ 40/* FIXME: Remove once pnfs hits mainline
39#define __EXOFS_H__ 41 * #include <linux/exportfs/pnfs_osd_xdr.h>
42 */
43#include "pnfs.h"
40 44
41#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) 45#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
42 46
@@ -55,7 +59,7 @@
55 * our extension to the in-memory superblock 59 * our extension to the in-memory superblock
56 */ 60 */
57struct exofs_sb_info { 61struct exofs_sb_info {
58 struct osd_dev *s_dev; /* returned by get_osd_dev */ 62 struct exofs_fscb s_fscb; /* Written often, pre-allocate*/
59 osd_id s_pid; /* partition ID of file system*/ 63 osd_id s_pid; /* partition ID of file system*/
60 int s_timeout; /* timeout for OSD operations */ 64 int s_timeout; /* timeout for OSD operations */
61 uint64_t s_nextid; /* highest object ID used */ 65 uint64_t s_nextid; /* highest object ID used */
@@ -63,7 +67,11 @@ struct exofs_sb_info {
63 spinlock_t s_next_gen_lock; /* spinlock for gen # update */ 67 spinlock_t s_next_gen_lock; /* spinlock for gen # update */
64 u32 s_next_generation; /* next gen # to use */ 68 u32 s_next_generation; /* next gen # to use */
65 atomic_t s_curr_pending; /* number of pending commands */ 69 atomic_t s_curr_pending; /* number of pending commands */
66 uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */ 70 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
71
72 struct pnfs_osd_data_map data_map; /* Default raid to use */
73 unsigned s_numdevs; /* Num of devices in array */
74 struct osd_dev *s_ods[1]; /* Variable length, minimum 1 */
67}; 75};
68 76
69/* 77/*
@@ -79,6 +87,50 @@ struct exofs_i_info {
79 struct inode vfs_inode; /* normal in-memory inode */ 87 struct inode vfs_inode; /* normal in-memory inode */
80}; 88};
81 89
90static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
91{
92 return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
93}
94
95struct exofs_io_state;
96typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
97
98struct exofs_io_state {
99 struct kref kref;
100
101 void *private;
102 exofs_io_done_fn done;
103
104 struct exofs_sb_info *sbi;
105 struct osd_obj_id obj;
106 u8 *cred;
107
108 /* Global read/write IO*/
109 loff_t offset;
110 unsigned long length;
111 void *kern_buff;
112 struct bio *bio;
113
114 /* Attributes */
115 unsigned in_attr_len;
116 struct osd_attr *in_attr;
117 unsigned out_attr_len;
118 struct osd_attr *out_attr;
119
120 /* Variable array of size numdevs */
121 unsigned numdevs;
122 struct exofs_per_dev_state {
123 struct osd_request *or;
124 struct bio *bio;
125 } per_dev[];
126};
127
128static inline unsigned exofs_io_state_size(unsigned numdevs)
129{
130 return sizeof(struct exofs_io_state) +
131 sizeof(struct exofs_per_dev_state) * numdevs;
132}
133
82/* 134/*
83 * our inode flags 135 * our inode flags
84 */ 136 */
@@ -130,6 +182,42 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
130/************************* 182/*************************
131 * function declarations * 183 * function declarations *
132 *************************/ 184 *************************/
185
186/* ios.c */
187void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
188 const struct osd_obj_id *obj);
189int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
190 u64 offset, void *p, unsigned length);
191
192int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios);
193void exofs_put_io_state(struct exofs_io_state *ios);
194
195int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
196
197int exofs_sbi_create(struct exofs_io_state *ios);
198int exofs_sbi_remove(struct exofs_io_state *ios);
199int exofs_sbi_write(struct exofs_io_state *ios);
200int exofs_sbi_read(struct exofs_io_state *ios);
201
202int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
203
204int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
205static inline int exofs_oi_write(struct exofs_i_info *oi,
206 struct exofs_io_state *ios)
207{
208 ios->obj.id = exofs_oi_objno(oi);
209 ios->cred = oi->i_cred;
210 return exofs_sbi_write(ios);
211}
212
213static inline int exofs_oi_read(struct exofs_i_info *oi,
214 struct exofs_io_state *ios)
215{
216 ios->obj.id = exofs_oi_objno(oi);
217 ios->cred = oi->i_cred;
218 return exofs_sbi_read(ios);
219}
220
133/* inode.c */ 221/* inode.c */
134void exofs_truncate(struct inode *inode); 222void exofs_truncate(struct inode *inode);
135int exofs_setattr(struct dentry *, struct iattr *); 223int exofs_setattr(struct dentry *, struct iattr *);
@@ -169,6 +257,7 @@ extern const struct file_operations exofs_file_operations;
169 257
170/* inode.c */ 258/* inode.c */
171extern const struct address_space_operations exofs_aops; 259extern const struct address_space_operations exofs_aops;
260extern const struct osd_attr g_attr_logical_length;
172 261
173/* namei.c */ 262/* namei.c */
174extern const struct inode_operations exofs_dir_inode_operations; 263extern const struct inode_operations exofs_dir_inode_operations;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 6c10f747669..2afbcebeda7 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -37,15 +37,18 @@
37 37
38#include "exofs.h" 38#include "exofs.h"
39 39
40#ifdef CONFIG_EXOFS_DEBUG 40#define EXOFS_DBGMSG2(M...) do {} while (0)
41# define EXOFS_DEBUG_OBJ_ISIZE 1 41
42#endif 42enum { BIO_MAX_PAGES_KMALLOC =
43 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
44};
43 45
44struct page_collect { 46struct page_collect {
45 struct exofs_sb_info *sbi; 47 struct exofs_sb_info *sbi;
46 struct request_queue *req_q; 48 struct request_queue *req_q;
47 struct inode *inode; 49 struct inode *inode;
48 unsigned expected_pages; 50 unsigned expected_pages;
51 struct exofs_io_state *ios;
49 52
50 struct bio *bio; 53 struct bio *bio;
51 unsigned nr_pages; 54 unsigned nr_pages;
@@ -54,22 +57,23 @@ struct page_collect {
54}; 57};
55 58
56static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 59static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
57 struct inode *inode) 60 struct inode *inode)
58{ 61{
59 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 62 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
60 63
61 pcol->sbi = sbi; 64 pcol->sbi = sbi;
62 pcol->req_q = osd_request_queue(sbi->s_dev); 65 /* Create master bios on first Q, later on cloning, each clone will be
66 * allocated on it's destination Q
67 */
68 pcol->req_q = osd_request_queue(sbi->s_ods[0]);
63 pcol->inode = inode; 69 pcol->inode = inode;
64 pcol->expected_pages = expected_pages; 70 pcol->expected_pages = expected_pages;
65 71
72 pcol->ios = NULL;
66 pcol->bio = NULL; 73 pcol->bio = NULL;
67 pcol->nr_pages = 0; 74 pcol->nr_pages = 0;
68 pcol->length = 0; 75 pcol->length = 0;
69 pcol->pg_first = -1; 76 pcol->pg_first = -1;
70
71 EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino,
72 expected_pages);
73} 77}
74 78
75static void _pcol_reset(struct page_collect *pcol) 79static void _pcol_reset(struct page_collect *pcol)
@@ -80,35 +84,49 @@ static void _pcol_reset(struct page_collect *pcol)
80 pcol->nr_pages = 0; 84 pcol->nr_pages = 0;
81 pcol->length = 0; 85 pcol->length = 0;
82 pcol->pg_first = -1; 86 pcol->pg_first = -1;
83 EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", 87 pcol->ios = NULL;
84 pcol->inode->i_ino, pcol->expected_pages);
85 88
86 /* this is probably the end of the loop but in writes 89 /* this is probably the end of the loop but in writes
87 * it might not end here. don't be left with nothing 90 * it might not end here. don't be left with nothing
88 */ 91 */
89 if (!pcol->expected_pages) 92 if (!pcol->expected_pages)
90 pcol->expected_pages = 128; 93 pcol->expected_pages = BIO_MAX_PAGES_KMALLOC;
91} 94}
92 95
93static int pcol_try_alloc(struct page_collect *pcol) 96static int pcol_try_alloc(struct page_collect *pcol)
94{ 97{
95 int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); 98 int pages = min_t(unsigned, pcol->expected_pages,
99 BIO_MAX_PAGES_KMALLOC);
100
101 if (!pcol->ios) { /* First time allocate io_state */
102 int ret = exofs_get_io_state(pcol->sbi, &pcol->ios);
103
104 if (ret)
105 return ret;
106 }
96 107
97 for (; pages; pages >>= 1) { 108 for (; pages; pages >>= 1) {
98 pcol->bio = bio_alloc(GFP_KERNEL, pages); 109 pcol->bio = bio_kmalloc(GFP_KERNEL, pages);
99 if (likely(pcol->bio)) 110 if (likely(pcol->bio))
100 return 0; 111 return 0;
101 } 112 }
102 113
103 EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", 114 EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n",
104 pcol->expected_pages); 115 pcol->expected_pages);
105 return -ENOMEM; 116 return -ENOMEM;
106} 117}
107 118
108static void pcol_free(struct page_collect *pcol) 119static void pcol_free(struct page_collect *pcol)
109{ 120{
110 bio_put(pcol->bio); 121 if (pcol->bio) {
111 pcol->bio = NULL; 122 bio_put(pcol->bio);
123 pcol->bio = NULL;
124 }
125
126 if (pcol->ios) {
127 exofs_put_io_state(pcol->ios);
128 pcol->ios = NULL;
129 }
112} 130}
113 131
114static int pcol_add_page(struct page_collect *pcol, struct page *page, 132static int pcol_add_page(struct page_collect *pcol, struct page *page,
@@ -161,22 +179,17 @@ static void update_write_page(struct page *page, int ret)
161/* Called at the end of reads, to optionally unlock pages and update their 179/* Called at the end of reads, to optionally unlock pages and update their
162 * status. 180 * status.
163 */ 181 */
164static int __readpages_done(struct osd_request *or, struct page_collect *pcol, 182static int __readpages_done(struct page_collect *pcol, bool do_unlock)
165 bool do_unlock)
166{ 183{
167 struct bio_vec *bvec; 184 struct bio_vec *bvec;
168 int i; 185 int i;
169 u64 resid; 186 u64 resid;
170 u64 good_bytes; 187 u64 good_bytes;
171 u64 length = 0; 188 u64 length = 0;
172 int ret = exofs_check_ok_resid(or, &resid, NULL); 189 int ret = exofs_check_io(pcol->ios, &resid);
173
174 osd_end_request(or);
175 190
176 if (likely(!ret)) 191 if (likely(!ret))
177 good_bytes = pcol->length; 192 good_bytes = pcol->length;
178 else if (!resid)
179 good_bytes = 0;
180 else 193 else
181 good_bytes = pcol->length - resid; 194 good_bytes = pcol->length - resid;
182 195
@@ -198,7 +211,7 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
198 else 211 else
199 page_stat = ret; 212 page_stat = ret;
200 213
201 EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", 214 EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n",
202 inode->i_ino, page->index, 215 inode->i_ino, page->index,
203 page_stat ? "bad_bytes" : "good_bytes"); 216 page_stat ? "bad_bytes" : "good_bytes");
204 217
@@ -214,13 +227,13 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
214} 227}
215 228
216/* callback of async reads */ 229/* callback of async reads */
217static void readpages_done(struct osd_request *or, void *p) 230static void readpages_done(struct exofs_io_state *ios, void *p)
218{ 231{
219 struct page_collect *pcol = p; 232 struct page_collect *pcol = p;
220 233
221 __readpages_done(or, pcol, true); 234 __readpages_done(pcol, true);
222 atomic_dec(&pcol->sbi->s_curr_pending); 235 atomic_dec(&pcol->sbi->s_curr_pending);
223 kfree(p); 236 kfree(pcol);
224} 237}
225 238
226static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 239static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
@@ -238,17 +251,13 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
238 251
239 unlock_page(page); 252 unlock_page(page);
240 } 253 }
241 pcol_free(pcol);
242} 254}
243 255
244static int read_exec(struct page_collect *pcol, bool is_sync) 256static int read_exec(struct page_collect *pcol, bool is_sync)
245{ 257{
246 struct exofs_i_info *oi = exofs_i(pcol->inode); 258 struct exofs_i_info *oi = exofs_i(pcol->inode);
247 struct osd_obj_id obj = {pcol->sbi->s_pid, 259 struct exofs_io_state *ios = pcol->ios;
248 pcol->inode->i_ino + EXOFS_OBJ_OFF};
249 struct osd_request *or = NULL;
250 struct page_collect *pcol_copy = NULL; 260 struct page_collect *pcol_copy = NULL;
251 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
252 int ret; 261 int ret;
253 262
254 if (!pcol->bio) 263 if (!pcol->bio)
@@ -257,17 +266,13 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
257 /* see comment in _readpage() about sync reads */ 266 /* see comment in _readpage() about sync reads */
258 WARN_ON(is_sync && (pcol->nr_pages != 1)); 267 WARN_ON(is_sync && (pcol->nr_pages != 1));
259 268
260 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); 269 ios->bio = pcol->bio;
261 if (unlikely(!or)) { 270 ios->length = pcol->length;
262 ret = -ENOMEM; 271 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
263 goto err;
264 }
265
266 osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
267 272
268 if (is_sync) { 273 if (is_sync) {
269 exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); 274 exofs_oi_read(oi, pcol->ios);
270 return __readpages_done(or, pcol, false); 275 return __readpages_done(pcol, false);
271 } 276 }
272 277
273 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 278 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -277,14 +282,16 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
277 } 282 }
278 283
279 *pcol_copy = *pcol; 284 *pcol_copy = *pcol;
280 ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); 285 ios->done = readpages_done;
286 ios->private = pcol_copy;
287 ret = exofs_oi_read(oi, ios);
281 if (unlikely(ret)) 288 if (unlikely(ret))
282 goto err; 289 goto err;
283 290
284 atomic_inc(&pcol->sbi->s_curr_pending); 291 atomic_inc(&pcol->sbi->s_curr_pending);
285 292
286 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 293 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
287 obj.id, _LLU(i_start), pcol->length); 294 ios->obj.id, _LLU(ios->offset), pcol->length);
288 295
289 /* pages ownership was passed to pcol_copy */ 296 /* pages ownership was passed to pcol_copy */
290 _pcol_reset(pcol); 297 _pcol_reset(pcol);
@@ -293,12 +300,10 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
293err: 300err:
294 if (!is_sync) 301 if (!is_sync)
295 _unlock_pcol_pages(pcol, ret, READ); 302 _unlock_pcol_pages(pcol, ret, READ);
296 else /* Pages unlocked by caller in sync mode only free bio */ 303
297 pcol_free(pcol); 304 pcol_free(pcol);
298 305
299 kfree(pcol_copy); 306 kfree(pcol_copy);
300 if (or)
301 osd_end_request(or);
302 return ret; 307 return ret;
303} 308}
304 309
@@ -370,12 +375,12 @@ try_again:
370 if (len != PAGE_CACHE_SIZE) 375 if (len != PAGE_CACHE_SIZE)
371 zero_user(page, len, PAGE_CACHE_SIZE - len); 376 zero_user(page, len, PAGE_CACHE_SIZE - len);
372 377
373 EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 378 EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
374 inode->i_ino, page->index, len); 379 inode->i_ino, page->index, len);
375 380
376 ret = pcol_add_page(pcol, page, len); 381 ret = pcol_add_page(pcol, page, len);
377 if (ret) { 382 if (ret) {
378 EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " 383 EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p "
379 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 384 "this_len=0x%zx nr_pages=%u length=0x%lx\n",
380 page, len, pcol->nr_pages, pcol->length); 385 page, len, pcol->nr_pages, pcol->length);
381 386
@@ -419,9 +424,8 @@ static int _readpage(struct page *page, bool is_sync)
419 424
420 _pcol_init(&pcol, 1, page->mapping->host); 425 _pcol_init(&pcol, 1, page->mapping->host);
421 426
422 /* readpage_strip might call read_exec(,async) inside at several places 427 /* readpage_strip might call read_exec(,is_sync==false) at several
423 * but this is safe for is_async=0 since read_exec will not do anything 428 * places but not if we have a single page.
424 * when we have a single page.
425 */ 429 */
426 ret = readpage_strip(&pcol, page); 430 ret = readpage_strip(&pcol, page);
427 if (ret) { 431 if (ret) {
@@ -440,8 +444,8 @@ static int exofs_readpage(struct file *file, struct page *page)
440 return _readpage(page, false); 444 return _readpage(page, false);
441} 445}
442 446
443/* Callback for osd_write. All writes are asynchronouse */ 447/* Callback for osd_write. All writes are asynchronous */
444static void writepages_done(struct osd_request *or, void *p) 448static void writepages_done(struct exofs_io_state *ios, void *p)
445{ 449{
446 struct page_collect *pcol = p; 450 struct page_collect *pcol = p;
447 struct bio_vec *bvec; 451 struct bio_vec *bvec;
@@ -449,16 +453,12 @@ static void writepages_done(struct osd_request *or, void *p)
449 u64 resid; 453 u64 resid;
450 u64 good_bytes; 454 u64 good_bytes;
451 u64 length = 0; 455 u64 length = 0;
456 int ret = exofs_check_io(ios, &resid);
452 457
453 int ret = exofs_check_ok_resid(or, NULL, &resid);
454
455 osd_end_request(or);
456 atomic_dec(&pcol->sbi->s_curr_pending); 458 atomic_dec(&pcol->sbi->s_curr_pending);
457 459
458 if (likely(!ret)) 460 if (likely(!ret))
459 good_bytes = pcol->length; 461 good_bytes = pcol->length;
460 else if (!resid)
461 good_bytes = 0;
462 else 462 else
463 good_bytes = pcol->length - resid; 463 good_bytes = pcol->length - resid;
464 464
@@ -482,7 +482,7 @@ static void writepages_done(struct osd_request *or, void *p)
482 482
483 update_write_page(page, page_stat); 483 update_write_page(page, page_stat);
484 unlock_page(page); 484 unlock_page(page);
485 EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", 485 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n",
486 inode->i_ino, page->index, page_stat); 486 inode->i_ino, page->index, page_stat);
487 487
488 length += bvec->bv_len; 488 length += bvec->bv_len;
@@ -496,23 +496,13 @@ static void writepages_done(struct osd_request *or, void *p)
496static int write_exec(struct page_collect *pcol) 496static int write_exec(struct page_collect *pcol)
497{ 497{
498 struct exofs_i_info *oi = exofs_i(pcol->inode); 498 struct exofs_i_info *oi = exofs_i(pcol->inode);
499 struct osd_obj_id obj = {pcol->sbi->s_pid, 499 struct exofs_io_state *ios = pcol->ios;
500 pcol->inode->i_ino + EXOFS_OBJ_OFF};
501 struct osd_request *or = NULL;
502 struct page_collect *pcol_copy = NULL; 500 struct page_collect *pcol_copy = NULL;
503 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
504 int ret; 501 int ret;
505 502
506 if (!pcol->bio) 503 if (!pcol->bio)
507 return 0; 504 return 0;
508 505
509 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
510 if (unlikely(!or)) {
511 EXOFS_ERR("write_exec: Faild to osd_start_request()\n");
512 ret = -ENOMEM;
513 goto err;
514 }
515
516 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 506 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
517 if (!pcol_copy) { 507 if (!pcol_copy) {
518 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); 508 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
@@ -523,16 +513,22 @@ static int write_exec(struct page_collect *pcol)
523 *pcol_copy = *pcol; 513 *pcol_copy = *pcol;
524 514
525 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ 515 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
526 osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); 516
527 ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); 517 ios->bio = pcol_copy->bio;
518 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
519 ios->length = pcol_copy->length;
520 ios->done = writepages_done;
521 ios->private = pcol_copy;
522
523 ret = exofs_oi_write(oi, ios);
528 if (unlikely(ret)) { 524 if (unlikely(ret)) {
529 EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); 525 EXOFS_ERR("write_exec: exofs_oi_write() Faild\n");
530 goto err; 526 goto err;
531 } 527 }
532 528
533 atomic_inc(&pcol->sbi->s_curr_pending); 529 atomic_inc(&pcol->sbi->s_curr_pending);
534 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", 530 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
535 pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), 531 pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
536 pcol->length); 532 pcol->length);
537 /* pages ownership was passed to pcol_copy */ 533 /* pages ownership was passed to pcol_copy */
538 _pcol_reset(pcol); 534 _pcol_reset(pcol);
@@ -540,9 +536,9 @@ static int write_exec(struct page_collect *pcol)
540 536
541err: 537err:
542 _unlock_pcol_pages(pcol, ret, WRITE); 538 _unlock_pcol_pages(pcol, ret, WRITE);
539 pcol_free(pcol);
543 kfree(pcol_copy); 540 kfree(pcol_copy);
544 if (or) 541
545 osd_end_request(or);
546 return ret; 542 return ret;
547} 543}
548 544
@@ -586,6 +582,9 @@ static int writepage_strip(struct page *page,
586 if (PageError(page)) 582 if (PageError(page))
587 ClearPageError(page); 583 ClearPageError(page);
588 unlock_page(page); 584 unlock_page(page);
585 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) "
586 "outside the limits\n",
587 inode->i_ino, page->index);
589 return 0; 588 return 0;
590 } 589 }
591 } 590 }
@@ -600,6 +599,9 @@ try_again:
600 ret = write_exec(pcol); 599 ret = write_exec(pcol);
601 if (unlikely(ret)) 600 if (unlikely(ret))
602 goto fail; 601 goto fail;
602
603 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n",
604 inode->i_ino, page->index);
603 goto try_again; 605 goto try_again;
604 } 606 }
605 607
@@ -609,7 +611,7 @@ try_again:
609 goto fail; 611 goto fail;
610 } 612 }
611 613
612 EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 614 EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n",
613 inode->i_ino, page->index, len); 615 inode->i_ino, page->index, len);
614 616
615 ret = pcol_add_page(pcol, page, len); 617 ret = pcol_add_page(pcol, page, len);
@@ -634,6 +636,8 @@ try_again:
634 return 0; 636 return 0;
635 637
636fail: 638fail:
639 EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n",
640 inode->i_ino, page->index, ret);
637 set_bit(AS_EIO, &page->mapping->flags); 641 set_bit(AS_EIO, &page->mapping->flags);
638 unlock_page(page); 642 unlock_page(page);
639 return ret; 643 return ret;
@@ -652,14 +656,17 @@ static int exofs_writepages(struct address_space *mapping,
652 wbc->range_end >> PAGE_CACHE_SHIFT; 656 wbc->range_end >> PAGE_CACHE_SHIFT;
653 657
654 if (start || end) 658 if (start || end)
655 expected_pages = min(end - start + 1, 32L); 659 expected_pages = end - start + 1;
656 else 660 else
657 expected_pages = mapping->nrpages; 661 expected_pages = mapping->nrpages;
658 662
659 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" 663 if (expected_pages < 32L)
660 " m->nrpages=%lu start=0x%lx end=0x%lx\n", 664 expected_pages = 32L;
665
666 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
667 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n",
661 mapping->host->i_ino, wbc->range_start, wbc->range_end, 668 mapping->host->i_ino, wbc->range_start, wbc->range_end,
662 mapping->nrpages, start, end); 669 mapping->nrpages, start, end, expected_pages);
663 670
664 _pcol_init(&pcol, expected_pages, mapping->host); 671 _pcol_init(&pcol, expected_pages, mapping->host);
665 672
@@ -731,13 +738,28 @@ static int exofs_write_begin_export(struct file *file,
731 fsdata); 738 fsdata);
732} 739}
733 740
741static int exofs_write_end(struct file *file, struct address_space *mapping,
742 loff_t pos, unsigned len, unsigned copied,
743 struct page *page, void *fsdata)
744{
745 struct inode *inode = mapping->host;
746 /* According to comment in simple_write_end i_mutex is held */
747 loff_t i_size = inode->i_size;
748 int ret;
749
750 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
751 if (i_size != inode->i_size)
752 mark_inode_dirty(inode);
753 return ret;
754}
755
734const struct address_space_operations exofs_aops = { 756const struct address_space_operations exofs_aops = {
735 .readpage = exofs_readpage, 757 .readpage = exofs_readpage,
736 .readpages = exofs_readpages, 758 .readpages = exofs_readpages,
737 .writepage = exofs_writepage, 759 .writepage = exofs_writepage,
738 .writepages = exofs_writepages, 760 .writepages = exofs_writepages,
739 .write_begin = exofs_write_begin_export, 761 .write_begin = exofs_write_begin_export,
740 .write_end = simple_write_end, 762 .write_end = exofs_write_end,
741}; 763};
742 764
743/****************************************************************************** 765/******************************************************************************
@@ -771,19 +793,28 @@ static int exofs_get_block(struct inode *inode, sector_t iblock,
771const struct osd_attr g_attr_logical_length = ATTR_DEF( 793const struct osd_attr g_attr_logical_length = ATTR_DEF(
772 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 794 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
773 795
796static int _do_truncate(struct inode *inode)
797{
798 struct exofs_i_info *oi = exofs_i(inode);
799 loff_t isize = i_size_read(inode);
800 int ret;
801
802 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
803
804 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
805
806 ret = exofs_oi_truncate(oi, (u64)isize);
807 EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
808 return ret;
809}
810
774/* 811/*
775 * Truncate a file to the specified size - all we have to do is set the size 812 * Truncate a file to the specified size - all we have to do is set the size
776 * attribute. We make sure the object exists first. 813 * attribute. We make sure the object exists first.
777 */ 814 */
778void exofs_truncate(struct inode *inode) 815void exofs_truncate(struct inode *inode)
779{ 816{
780 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
781 struct exofs_i_info *oi = exofs_i(inode); 817 struct exofs_i_info *oi = exofs_i(inode);
782 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
783 struct osd_request *or;
784 struct osd_attr attr;
785 loff_t isize = i_size_read(inode);
786 __be64 newsize;
787 int ret; 818 int ret;
788 819
789 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) 820 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
@@ -793,22 +824,6 @@ void exofs_truncate(struct inode *inode)
793 return; 824 return;
794 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 825 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
795 return; 826 return;
796 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
797
798 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
799
800 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
801 if (unlikely(!or)) {
802 EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n");
803 goto fail;
804 }
805
806 osd_req_set_attributes(or, &obj);
807
808 newsize = cpu_to_be64((u64)isize);
809 attr = g_attr_logical_length;
810 attr.val_ptr = &newsize;
811 osd_req_add_set_attr_list(or, &attr, 1);
812 827
813 /* if we are about to truncate an object, and it hasn't been 828 /* if we are about to truncate an object, and it hasn't been
814 * created yet, wait 829 * created yet, wait
@@ -816,8 +831,7 @@ void exofs_truncate(struct inode *inode)
816 if (unlikely(wait_obj_created(oi))) 831 if (unlikely(wait_obj_created(oi)))
817 goto fail; 832 goto fail;
818 833
819 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 834 ret = _do_truncate(inode);
820 osd_end_request(or);
821 if (ret) 835 if (ret)
822 goto fail; 836 goto fail;
823 837
@@ -847,65 +861,62 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
847 861
848/* 862/*
849 * Read an inode from the OSD, and return it as is. We also return the size 863 * Read an inode from the OSD, and return it as is. We also return the size
850 * attribute in the 'sanity' argument if we got compiled with debugging turned 864 * attribute in the 'obj_size' argument.
851 * on.
852 */ 865 */
853static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 866static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
854 struct exofs_fcb *inode, uint64_t *sanity) 867 struct exofs_fcb *inode, uint64_t *obj_size)
855{ 868{
856 struct exofs_sb_info *sbi = sb->s_fs_info; 869 struct exofs_sb_info *sbi = sb->s_fs_info;
857 struct osd_request *or; 870 struct osd_attr attrs[2];
858 struct osd_attr attr; 871 struct exofs_io_state *ios;
859 struct osd_obj_id obj = {sbi->s_pid,
860 oi->vfs_inode.i_ino + EXOFS_OBJ_OFF};
861 int ret; 872 int ret;
862 873
863 exofs_make_credential(oi->i_cred, &obj); 874 *obj_size = ~0;
864 875 ret = exofs_get_io_state(sbi, &ios);
865 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 876 if (unlikely(ret)) {
866 if (unlikely(!or)) { 877 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
867 EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); 878 return ret;
868 return -ENOMEM;
869 } 879 }
870 osd_req_get_attributes(or, &obj);
871 880
872 /* we need the inode attribute */ 881 ios->obj.id = exofs_oi_objno(oi);
873 osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); 882 exofs_make_credential(oi->i_cred, &ios->obj);
883 ios->cred = oi->i_cred;
874 884
875#ifdef EXOFS_DEBUG_OBJ_ISIZE 885 attrs[0] = g_attr_inode_data;
876 /* we get the size attributes to do a sanity check */ 886 attrs[1] = g_attr_logical_length;
877 osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); 887 ios->in_attr = attrs;
878#endif 888 ios->in_attr_len = ARRAY_SIZE(attrs);
879 889
880 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 890 ret = exofs_sbi_read(ios);
881 if (ret) 891 if (ret)
882 goto out; 892 goto out;
883 893
884 attr = g_attr_inode_data; 894 ret = extract_attr_from_ios(ios, &attrs[0]);
885 ret = extract_attr_from_req(or, &attr);
886 if (ret) { 895 if (ret) {
887 EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); 896 EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
888 goto out; 897 goto out;
889 } 898 }
899 WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE);
900 memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE);
890 901
891 WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); 902 ret = extract_attr_from_ios(ios, &attrs[1]);
892 memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE);
893
894#ifdef EXOFS_DEBUG_OBJ_ISIZE
895 attr = g_attr_logical_length;
896 ret = extract_attr_from_req(or, &attr);
897 if (ret) { 903 if (ret) {
898 EXOFS_ERR("ERROR: extract attr from or failed\n"); 904 EXOFS_ERR("%s: extract_attr of logical_length failed\n",
905 __func__);
899 goto out; 906 goto out;
900 } 907 }
901 *sanity = get_unaligned_be64(attr.val_ptr); 908 *obj_size = get_unaligned_be64(attrs[1].val_ptr);
902#endif
903 909
904out: 910out:
905 osd_end_request(or); 911 exofs_put_io_state(ios);
906 return ret; 912 return ret;
907} 913}
908 914
915static void __oi_init(struct exofs_i_info *oi)
916{
917 init_waitqueue_head(&oi->i_wq);
918 oi->i_flags = 0;
919}
909/* 920/*
910 * Fill in an inode read from the OSD and set it up for use 921 * Fill in an inode read from the OSD and set it up for use
911 */ 922 */
@@ -914,7 +925,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
914 struct exofs_i_info *oi; 925 struct exofs_i_info *oi;
915 struct exofs_fcb fcb; 926 struct exofs_fcb fcb;
916 struct inode *inode; 927 struct inode *inode;
917 uint64_t uninitialized_var(sanity); 928 uint64_t obj_size;
918 int ret; 929 int ret;
919 930
920 inode = iget_locked(sb, ino); 931 inode = iget_locked(sb, ino);
@@ -923,13 +934,13 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
923 if (!(inode->i_state & I_NEW)) 934 if (!(inode->i_state & I_NEW))
924 return inode; 935 return inode;
925 oi = exofs_i(inode); 936 oi = exofs_i(inode);
937 __oi_init(oi);
926 938
927 /* read the inode from the osd */ 939 /* read the inode from the osd */
928 ret = exofs_get_inode(sb, oi, &fcb, &sanity); 940 ret = exofs_get_inode(sb, oi, &fcb, &obj_size);
929 if (ret) 941 if (ret)
930 goto bad_inode; 942 goto bad_inode;
931 943
932 init_waitqueue_head(&oi->i_wq);
933 set_obj_created(oi); 944 set_obj_created(oi);
934 945
935 /* copy stuff from on-disk struct to in-memory struct */ 946 /* copy stuff from on-disk struct to in-memory struct */
@@ -947,14 +958,12 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
947 inode->i_blkbits = EXOFS_BLKSHIFT; 958 inode->i_blkbits = EXOFS_BLKSHIFT;
948 inode->i_generation = le32_to_cpu(fcb.i_generation); 959 inode->i_generation = le32_to_cpu(fcb.i_generation);
949 960
950#ifdef EXOFS_DEBUG_OBJ_ISIZE 961 if ((inode->i_size != obj_size) &&
951 if ((inode->i_size != sanity) &&
952 (!exofs_inode_is_fast_symlink(inode))) { 962 (!exofs_inode_is_fast_symlink(inode))) {
953 EXOFS_ERR("WARNING: Size of object from inode and " 963 EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n",
954 "attributes differ (%lld != %llu)\n", 964 inode->i_size, _LLU(obj_size));
955 inode->i_size, _LLU(sanity)); 965 /* FIXME: call exofs_inode_recovery() */
956 } 966 }
957#endif
958 967
959 oi->i_dir_start_lookup = 0; 968 oi->i_dir_start_lookup = 0;
960 969
@@ -1020,23 +1029,30 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
1020 * set the obj_created flag so that other methods know that the object exists on 1029 * set the obj_created flag so that other methods know that the object exists on
1021 * the OSD. 1030 * the OSD.
1022 */ 1031 */
1023static void create_done(struct osd_request *or, void *p) 1032static void create_done(struct exofs_io_state *ios, void *p)
1024{ 1033{
1025 struct inode *inode = p; 1034 struct inode *inode = p;
1026 struct exofs_i_info *oi = exofs_i(inode); 1035 struct exofs_i_info *oi = exofs_i(inode);
1027 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1036 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
1028 int ret; 1037 int ret;
1029 1038
1030 ret = exofs_check_ok(or); 1039 ret = exofs_check_io(ios, NULL);
1031 osd_end_request(or); 1040 exofs_put_io_state(ios);
1041
1032 atomic_dec(&sbi->s_curr_pending); 1042 atomic_dec(&sbi->s_curr_pending);
1033 1043
1034 if (unlikely(ret)) { 1044 if (unlikely(ret)) {
1035 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1045 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
1036 _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); 1046 _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid));
1037 make_bad_inode(inode); 1047 /*TODO: When FS is corrupted creation can fail, object already
1038 } else 1048 * exist. Get rid of this asynchronous creation, if exist
1039 set_obj_created(oi); 1049 * increment the obj counter and try the next object. Until we
1050 * succeed. All these dangling objects will be made into lost
1051 * files by chkfs.exofs
1052 */
1053 }
1054
1055 set_obj_created(oi);
1040 1056
1041 atomic_dec(&inode->i_count); 1057 atomic_dec(&inode->i_count);
1042 wake_up(&oi->i_wq); 1058 wake_up(&oi->i_wq);
@@ -1051,8 +1067,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1051 struct inode *inode; 1067 struct inode *inode;
1052 struct exofs_i_info *oi; 1068 struct exofs_i_info *oi;
1053 struct exofs_sb_info *sbi; 1069 struct exofs_sb_info *sbi;
1054 struct osd_request *or; 1070 struct exofs_io_state *ios;
1055 struct osd_obj_id obj;
1056 int ret; 1071 int ret;
1057 1072
1058 sb = dir->i_sb; 1073 sb = dir->i_sb;
@@ -1061,8 +1076,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1061 return ERR_PTR(-ENOMEM); 1076 return ERR_PTR(-ENOMEM);
1062 1077
1063 oi = exofs_i(inode); 1078 oi = exofs_i(inode);
1079 __oi_init(oi);
1064 1080
1065 init_waitqueue_head(&oi->i_wq);
1066 set_obj_2bcreated(oi); 1081 set_obj_2bcreated(oi);
1067 1082
1068 sbi = sb->s_fs_info; 1083 sbi = sb->s_fs_info;
@@ -1089,28 +1104,28 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1089 1104
1090 mark_inode_dirty(inode); 1105 mark_inode_dirty(inode);
1091 1106
1092 obj.partition = sbi->s_pid; 1107 ret = exofs_get_io_state(sbi, &ios);
1093 obj.id = inode->i_ino + EXOFS_OBJ_OFF; 1108 if (unlikely(ret)) {
1094 exofs_make_credential(oi->i_cred, &obj); 1109 EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
1095 1110 return ERR_PTR(ret);
1096 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1097 if (unlikely(!or)) {
1098 EXOFS_ERR("exofs_new_inode: osd_start_request failed\n");
1099 return ERR_PTR(-ENOMEM);
1100 } 1111 }
1101 1112
1102 osd_req_create_object(or, &obj); 1113 ios->obj.id = exofs_oi_objno(oi);
1114 exofs_make_credential(oi->i_cred, &ios->obj);
1103 1115
1104 /* increment the refcount so that the inode will still be around when we 1116 /* increment the refcount so that the inode will still be around when we
1105 * reach the callback 1117 * reach the callback
1106 */ 1118 */
1107 atomic_inc(&inode->i_count); 1119 atomic_inc(&inode->i_count);
1108 1120
1109 ret = exofs_async_op(or, create_done, inode, oi->i_cred); 1121 ios->done = create_done;
1122 ios->private = inode;
1123 ios->cred = oi->i_cred;
1124 ret = exofs_sbi_create(ios);
1110 if (ret) { 1125 if (ret) {
1111 atomic_dec(&inode->i_count); 1126 atomic_dec(&inode->i_count);
1112 osd_end_request(or); 1127 exofs_put_io_state(ios);
1113 return ERR_PTR(-EIO); 1128 return ERR_PTR(ret);
1114 } 1129 }
1115 atomic_inc(&sbi->s_curr_pending); 1130 atomic_inc(&sbi->s_curr_pending);
1116 1131
@@ -1128,11 +1143,11 @@ struct updatei_args {
1128/* 1143/*
1129 * Callback function from exofs_update_inode(). 1144 * Callback function from exofs_update_inode().
1130 */ 1145 */
1131static void updatei_done(struct osd_request *or, void *p) 1146static void updatei_done(struct exofs_io_state *ios, void *p)
1132{ 1147{
1133 struct updatei_args *args = p; 1148 struct updatei_args *args = p;
1134 1149
1135 osd_end_request(or); 1150 exofs_put_io_state(ios);
1136 1151
1137 atomic_dec(&args->sbi->s_curr_pending); 1152 atomic_dec(&args->sbi->s_curr_pending);
1138 1153
@@ -1148,8 +1163,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1148 struct exofs_i_info *oi = exofs_i(inode); 1163 struct exofs_i_info *oi = exofs_i(inode);
1149 struct super_block *sb = inode->i_sb; 1164 struct super_block *sb = inode->i_sb;
1150 struct exofs_sb_info *sbi = sb->s_fs_info; 1165 struct exofs_sb_info *sbi = sb->s_fs_info;
1151 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1166 struct exofs_io_state *ios;
1152 struct osd_request *or;
1153 struct osd_attr attr; 1167 struct osd_attr attr;
1154 struct exofs_fcb *fcb; 1168 struct exofs_fcb *fcb;
1155 struct updatei_args *args; 1169 struct updatei_args *args;
@@ -1186,18 +1200,16 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1186 } else 1200 } else
1187 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1201 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1188 1202
1189 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1203 ret = exofs_get_io_state(sbi, &ios);
1190 if (unlikely(!or)) { 1204 if (unlikely(ret)) {
1191 EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); 1205 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
1192 ret = -ENOMEM;
1193 goto free_args; 1206 goto free_args;
1194 } 1207 }
1195 1208
1196 osd_req_set_attributes(or, &obj);
1197
1198 attr = g_attr_inode_data; 1209 attr = g_attr_inode_data;
1199 attr.val_ptr = fcb; 1210 attr.val_ptr = fcb;
1200 osd_req_add_set_attr_list(or, &attr, 1); 1211 ios->out_attr_len = 1;
1212 ios->out_attr = &attr;
1201 1213
1202 if (!obj_created(oi)) { 1214 if (!obj_created(oi)) {
1203 EXOFS_DBGMSG("!obj_created\n"); 1215 EXOFS_DBGMSG("!obj_created\n");
@@ -1206,22 +1218,19 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1206 EXOFS_DBGMSG("wait_event done\n"); 1218 EXOFS_DBGMSG("wait_event done\n");
1207 } 1219 }
1208 1220
1209 if (do_sync) { 1221 if (!do_sync) {
1210 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
1211 osd_end_request(or);
1212 goto free_args;
1213 } else {
1214 args->sbi = sbi; 1222 args->sbi = sbi;
1223 ios->done = updatei_done;
1224 ios->private = args;
1225 }
1215 1226
1216 ret = exofs_async_op(or, updatei_done, args, oi->i_cred); 1227 ret = exofs_oi_write(oi, ios);
1217 if (ret) { 1228 if (!do_sync && !ret) {
1218 osd_end_request(or);
1219 goto free_args;
1220 }
1221 atomic_inc(&sbi->s_curr_pending); 1229 atomic_inc(&sbi->s_curr_pending);
1222 goto out; /* deallocation in updatei_done */ 1230 goto out; /* deallocation in updatei_done */
1223 } 1231 }
1224 1232
1233 exofs_put_io_state(ios);
1225free_args: 1234free_args:
1226 kfree(args); 1235 kfree(args);
1227out: 1236out:
@@ -1238,11 +1247,12 @@ int exofs_write_inode(struct inode *inode, int wait)
1238 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1247 * Callback function from exofs_delete_inode() - don't have much cleaning up to
1239 * do. 1248 * do.
1240 */ 1249 */
1241static void delete_done(struct osd_request *or, void *p) 1250static void delete_done(struct exofs_io_state *ios, void *p)
1242{ 1251{
1243 struct exofs_sb_info *sbi; 1252 struct exofs_sb_info *sbi = p;
1244 osd_end_request(or); 1253
1245 sbi = p; 1254 exofs_put_io_state(ios);
1255
1246 atomic_dec(&sbi->s_curr_pending); 1256 atomic_dec(&sbi->s_curr_pending);
1247} 1257}
1248 1258
@@ -1256,8 +1266,7 @@ void exofs_delete_inode(struct inode *inode)
1256 struct exofs_i_info *oi = exofs_i(inode); 1266 struct exofs_i_info *oi = exofs_i(inode);
1257 struct super_block *sb = inode->i_sb; 1267 struct super_block *sb = inode->i_sb;
1258 struct exofs_sb_info *sbi = sb->s_fs_info; 1268 struct exofs_sb_info *sbi = sb->s_fs_info;
1259 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1269 struct exofs_io_state *ios;
1260 struct osd_request *or;
1261 int ret; 1270 int ret;
1262 1271
1263 truncate_inode_pages(&inode->i_data, 0); 1272 truncate_inode_pages(&inode->i_data, 0);
@@ -1274,25 +1283,26 @@ void exofs_delete_inode(struct inode *inode)
1274 1283
1275 clear_inode(inode); 1284 clear_inode(inode);
1276 1285
1277 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1286 ret = exofs_get_io_state(sbi, &ios);
1278 if (unlikely(!or)) { 1287 if (unlikely(ret)) {
1279 EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); 1288 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
1280 return; 1289 return;
1281 } 1290 }
1282 1291
1283 osd_req_remove_object(or, &obj);
1284
1285 /* if we are deleting an obj that hasn't been created yet, wait */ 1292 /* if we are deleting an obj that hasn't been created yet, wait */
1286 if (!obj_created(oi)) { 1293 if (!obj_created(oi)) {
1287 BUG_ON(!obj_2bcreated(oi)); 1294 BUG_ON(!obj_2bcreated(oi));
1288 wait_event(oi->i_wq, obj_created(oi)); 1295 wait_event(oi->i_wq, obj_created(oi));
1289 } 1296 }
1290 1297
1291 ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); 1298 ios->obj.id = exofs_oi_objno(oi);
1299 ios->done = delete_done;
1300 ios->private = sbi;
1301 ios->cred = oi->i_cred;
1302 ret = exofs_sbi_remove(ios);
1292 if (ret) { 1303 if (ret) {
1293 EXOFS_ERR( 1304 EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
1294 "ERROR: @exofs_delete_inode exofs_async_op failed\n"); 1305 exofs_put_io_state(ios);
1295 osd_end_request(or);
1296 return; 1306 return;
1297 } 1307 }
1298 atomic_inc(&sbi->s_curr_pending); 1308 atomic_inc(&sbi->s_curr_pending);
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
new file mode 100644
index 00000000000..5bad01fa1f9
--- /dev/null
+++ b/fs/exofs/ios.c
@@ -0,0 +1,421 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com>
6 *
7 * This file is part of exofs.
8 *
9 * exofs is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation. Since it is based on ext2, and the only
12 * valid version of GPL for the Linux kernel is version 2, the only valid
13 * version of GPL for exofs is version 2.
14 *
15 * exofs is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with exofs; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#include <scsi/scsi_device.h>
26
27#include "exofs.h"
28
29void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
30{
31 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
32}
33
34int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
35 u64 offset, void *p, unsigned length)
36{
37 struct osd_request *or = osd_start_request(od, GFP_KERNEL);
38/* struct osd_sense_info osi = {.key = 0};*/
39 int ret;
40
41 if (unlikely(!or)) {
42 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
43 return -ENOMEM;
44 }
45 ret = osd_req_read_kern(or, obj, offset, p, length);
46 if (unlikely(ret)) {
47 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
48 goto out;
49 }
50
51 ret = osd_finalize_request(or, 0, cred, NULL);
52 if (unlikely(ret)) {
53 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
54 goto out;
55 }
56
57 ret = osd_execute_request(or);
58 if (unlikely(ret))
59 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
60 /* osd_req_decode_sense(or, ret); */
61
62out:
63 osd_end_request(or);
64 return ret;
65}
66
67int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios)
68{
69 struct exofs_io_state *ios;
70
71 /*TODO: Maybe use kmem_cach per sbi of size
72 * exofs_io_state_size(sbi->s_numdevs)
73 */
74 ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL);
75 if (unlikely(!ios)) {
76 *pios = NULL;
77 return -ENOMEM;
78 }
79
80 ios->sbi = sbi;
81 ios->obj.partition = sbi->s_pid;
82 *pios = ios;
83 return 0;
84}
85
86void exofs_put_io_state(struct exofs_io_state *ios)
87{
88 if (ios) {
89 unsigned i;
90
91 for (i = 0; i < ios->numdevs; i++) {
92 struct exofs_per_dev_state *per_dev = &ios->per_dev[i];
93
94 if (per_dev->or)
95 osd_end_request(per_dev->or);
96 if (per_dev->bio)
97 bio_put(per_dev->bio);
98 }
99
100 kfree(ios);
101 }
102}
103
104static void _sync_done(struct exofs_io_state *ios, void *p)
105{
106 struct completion *waiting = p;
107
108 complete(waiting);
109}
110
111static void _last_io(struct kref *kref)
112{
113 struct exofs_io_state *ios = container_of(
114 kref, struct exofs_io_state, kref);
115
116 ios->done(ios, ios->private);
117}
118
119static void _done_io(struct osd_request *or, void *p)
120{
121 struct exofs_io_state *ios = p;
122
123 kref_put(&ios->kref, _last_io);
124}
125
126static int exofs_io_execute(struct exofs_io_state *ios)
127{
128 DECLARE_COMPLETION_ONSTACK(wait);
129 bool sync = (ios->done == NULL);
130 int i, ret;
131
132 if (sync) {
133 ios->done = _sync_done;
134 ios->private = &wait;
135 }
136
137 for (i = 0; i < ios->numdevs; i++) {
138 struct osd_request *or = ios->per_dev[i].or;
139 if (unlikely(!or))
140 continue;
141
142 ret = osd_finalize_request(or, 0, ios->cred, NULL);
143 if (unlikely(ret)) {
144 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n",
145 ret);
146 return ret;
147 }
148 }
149
150 kref_init(&ios->kref);
151
152 for (i = 0; i < ios->numdevs; i++) {
153 struct osd_request *or = ios->per_dev[i].or;
154 if (unlikely(!or))
155 continue;
156
157 kref_get(&ios->kref);
158 osd_execute_request_async(or, _done_io, ios);
159 }
160
161 kref_put(&ios->kref, _last_io);
162 ret = 0;
163
164 if (sync) {
165 wait_for_completion(&wait);
166 ret = exofs_check_io(ios, NULL);
167 }
168 return ret;
169}
170
171int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
172{
173 enum osd_err_priority acumulated_osd_err = 0;
174 int acumulated_lin_err = 0;
175 int i;
176
177 for (i = 0; i < ios->numdevs; i++) {
178 struct osd_sense_info osi;
179 int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi);
180
181 if (likely(!ret))
182 continue;
183
184 if (unlikely(ret == -EFAULT)) {
185 EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__);
186 /*FIXME: All the pages in this device range should:
187 * clear_highpage(page);
188 */
189 }
190
191 if (osi.osd_err_pri >= acumulated_osd_err) {
192 acumulated_osd_err = osi.osd_err_pri;
193 acumulated_lin_err = ret;
194 }
195 }
196
197 /* TODO: raid specific residual calculations */
198 if (resid) {
199 if (likely(!acumulated_lin_err))
200 *resid = 0;
201 else
202 *resid = ios->length;
203 }
204
205 return acumulated_lin_err;
206}
207
208int exofs_sbi_create(struct exofs_io_state *ios)
209{
210 int i, ret;
211
212 for (i = 0; i < ios->sbi->s_numdevs; i++) {
213 struct osd_request *or;
214
215 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
216 if (unlikely(!or)) {
217 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
218 ret = -ENOMEM;
219 goto out;
220 }
221 ios->per_dev[i].or = or;
222 ios->numdevs++;
223
224 osd_req_create_object(or, &ios->obj);
225 }
226 ret = exofs_io_execute(ios);
227
228out:
229 return ret;
230}
231
232int exofs_sbi_remove(struct exofs_io_state *ios)
233{
234 int i, ret;
235
236 for (i = 0; i < ios->sbi->s_numdevs; i++) {
237 struct osd_request *or;
238
239 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
240 if (unlikely(!or)) {
241 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
242 ret = -ENOMEM;
243 goto out;
244 }
245 ios->per_dev[i].or = or;
246 ios->numdevs++;
247
248 osd_req_remove_object(or, &ios->obj);
249 }
250 ret = exofs_io_execute(ios);
251
252out:
253 return ret;
254}
255
256int exofs_sbi_write(struct exofs_io_state *ios)
257{
258 int i, ret;
259
260 for (i = 0; i < ios->sbi->s_numdevs; i++) {
261 struct osd_request *or;
262
263 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
264 if (unlikely(!or)) {
265 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
266 ret = -ENOMEM;
267 goto out;
268 }
269 ios->per_dev[i].or = or;
270 ios->numdevs++;
271
272 if (ios->bio) {
273 struct bio *bio;
274
275 if (i != 0) {
276 bio = bio_kmalloc(GFP_KERNEL,
277 ios->bio->bi_max_vecs);
278 if (unlikely(!bio)) {
279 ret = -ENOMEM;
280 goto out;
281 }
282
283 __bio_clone(bio, ios->bio);
284 bio->bi_bdev = NULL;
285 bio->bi_next = NULL;
286 ios->per_dev[i].bio = bio;
287 } else {
288 bio = ios->bio;
289 }
290
291 osd_req_write(or, &ios->obj, ios->offset, bio,
292 ios->length);
293/* EXOFS_DBGMSG("write sync=%d\n", sync);*/
294 } else if (ios->kern_buff) {
295 osd_req_write_kern(or, &ios->obj, ios->offset,
296 ios->kern_buff, ios->length);
297/* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/
298 } else {
299 osd_req_set_attributes(or, &ios->obj);
300/* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/
301 }
302
303 if (ios->out_attr)
304 osd_req_add_set_attr_list(or, ios->out_attr,
305 ios->out_attr_len);
306
307 if (ios->in_attr)
308 osd_req_add_get_attr_list(or, ios->in_attr,
309 ios->in_attr_len);
310 }
311 ret = exofs_io_execute(ios);
312
313out:
314 return ret;
315}
316
317int exofs_sbi_read(struct exofs_io_state *ios)
318{
319 int i, ret;
320
321 for (i = 0; i < 1; i++) {
322 struct osd_request *or;
323 unsigned first_dev = (unsigned)ios->obj.id;
324
325 first_dev %= ios->sbi->s_numdevs;
326 or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL);
327 if (unlikely(!or)) {
328 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
329 ret = -ENOMEM;
330 goto out;
331 }
332 ios->per_dev[i].or = or;
333 ios->numdevs++;
334
335 if (ios->bio) {
336 osd_req_read(or, &ios->obj, ios->offset, ios->bio,
337 ios->length);
338/* EXOFS_DBGMSG("read sync=%d\n", sync);*/
339 } else if (ios->kern_buff) {
340 osd_req_read_kern(or, &ios->obj, ios->offset,
341 ios->kern_buff, ios->length);
342/* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/
343 } else {
344 osd_req_get_attributes(or, &ios->obj);
345/* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/
346 }
347
348 if (ios->out_attr)
349 osd_req_add_set_attr_list(or, ios->out_attr,
350 ios->out_attr_len);
351
352 if (ios->in_attr)
353 osd_req_add_get_attr_list(or, ios->in_attr,
354 ios->in_attr_len);
355 }
356 ret = exofs_io_execute(ios);
357
358out:
359 return ret;
360}
361
362int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
363{
364 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
365 void *iter = NULL;
366 int nelem;
367
368 do {
369 nelem = 1;
370 osd_req_decode_get_attr_list(ios->per_dev[0].or,
371 &cur_attr, &nelem, &iter);
372 if ((cur_attr.attr_page == attr->attr_page) &&
373 (cur_attr.attr_id == attr->attr_id)) {
374 attr->len = cur_attr.len;
375 attr->val_ptr = cur_attr.val_ptr;
376 return 0;
377 }
378 } while (iter);
379
380 return -EIO;
381}
382
383int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
384{
385 struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
386 struct exofs_io_state *ios;
387 struct osd_attr attr;
388 __be64 newsize;
389 int i, ret;
390
391 if (exofs_get_io_state(sbi, &ios))
392 return -ENOMEM;
393
394 ios->obj.id = exofs_oi_objno(oi);
395 ios->cred = oi->i_cred;
396
397 newsize = cpu_to_be64(size);
398 attr = g_attr_logical_length;
399 attr.val_ptr = &newsize;
400
401 for (i = 0; i < sbi->s_numdevs; i++) {
402 struct osd_request *or;
403
404 or = osd_start_request(sbi->s_ods[i], GFP_KERNEL);
405 if (unlikely(!or)) {
406 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
407 ret = -ENOMEM;
408 goto out;
409 }
410 ios->per_dev[i].or = or;
411 ios->numdevs++;
412
413 osd_req_set_attributes(or, &ios->obj);
414 osd_req_add_set_attr_list(or, &attr, 1);
415 }
416 ret = exofs_io_execute(ios);
417
418out:
419 exofs_put_io_state(ios);
420 return ret;
421}
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
deleted file mode 100644
index 4372542df28..00000000000
--- a/fs/exofs/osd.c
+++ /dev/null
@@ -1,125 +0,0 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com>
6 *
7 * This file is part of exofs.
8 *
9 * exofs is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation. Since it is based on ext2, and the only
12 * valid version of GPL for the Linux kernel is version 2, the only valid
13 * version of GPL for exofs is version 2.
14 *
15 * exofs is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with exofs; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#include <scsi/scsi_device.h>
26#include <scsi/osd_sense.h>
27
28#include "exofs.h"
29
30int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid)
31{
32 struct osd_sense_info osi;
33 int ret = osd_req_decode_sense(or, &osi);
34
35 if (ret) { /* translate to Linux codes */
36 if (osi.additional_code == scsi_invalid_field_in_cdb) {
37 if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE)
38 ret = -EFAULT;
39 if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID)
40 ret = -ENOENT;
41 else
42 ret = -EINVAL;
43 } else if (osi.additional_code == osd_quota_error)
44 ret = -ENOSPC;
45 else
46 ret = -EIO;
47 }
48
49 /* FIXME: should be include in osd_sense_info */
50 if (in_resid)
51 *in_resid = or->in.req ? or->in.req->resid_len : 0;
52
53 if (out_resid)
54 *out_resid = or->out.req ? or->out.req->resid_len : 0;
55
56 return ret;
57}
58
59void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
60{
61 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
62}
63
64/*
65 * Perform a synchronous OSD operation.
66 */
67int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
68{
69 int ret;
70
71 or->timeout = timeout;
72 ret = osd_finalize_request(or, 0, credential, NULL);
73 if (ret) {
74 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
75 return ret;
76 }
77
78 ret = osd_execute_request(or);
79
80 if (ret)
81 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
82 /* osd_req_decode_sense(or, ret); */
83 return ret;
84}
85
86/*
87 * Perform an asynchronous OSD operation.
88 */
89int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done,
90 void *caller_context, u8 *cred)
91{
92 int ret;
93
94 ret = osd_finalize_request(or, 0, cred, NULL);
95 if (ret) {
96 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
97 return ret;
98 }
99
100 ret = osd_execute_request_async(or, async_done, caller_context);
101
102 if (ret)
103 EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret);
104 return ret;
105}
106
107int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
108{
109 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
110 void *iter = NULL;
111 int nelem;
112
113 do {
114 nelem = 1;
115 osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
116 if ((cur_attr.attr_page == attr->attr_page) &&
117 (cur_attr.attr_id == attr->attr_id)) {
118 attr->len = cur_attr.len;
119 attr->val_ptr = cur_attr.val_ptr;
120 return 0;
121 }
122 } while (iter);
123
124 return -EIO;
125}
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
new file mode 100644
index 00000000000..c52e9888b8a
--- /dev/null
+++ b/fs/exofs/pnfs.h
@@ -0,0 +1,45 @@
1/*
2 * Copyright (C) 2008, 2009
3 * Boaz Harrosh <bharrosh@panasas.com>
4 *
5 * This file is part of exofs.
6 *
7 * exofs is free software; you can redistribute it and/or modify it under the
8 * terms of the GNU General Public License version 2 as published by the Free
9 * Software Foundation.
10 *
11 */
12
13/* FIXME: Remove this file once pnfs hits mainline */
14
15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__
17
18#if ! defined(__PNFS_OSD_XDR_H__)
19
20enum pnfs_iomode {
21 IOMODE_READ = 1,
22 IOMODE_RW = 2,
23 IOMODE_ANY = 3,
24};
25
26/* Layout Structure */
27enum pnfs_osd_raid_algorithm4 {
28 PNFS_OSD_RAID_0 = 1,
29 PNFS_OSD_RAID_4 = 2,
30 PNFS_OSD_RAID_5 = 3,
31 PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */
32};
33
34struct pnfs_osd_data_map {
35 u32 odm_num_comps;
36 u64 odm_stripe_unit;
37 u32 odm_group_width;
38 u32 odm_group_depth;
39 u32 odm_mirror_cnt;
40 u32 odm_raid_algorithm;
41};
42
43#endif /* ! defined(__PNFS_OSD_XDR_H__) */
44
45#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 9f500dec3b5..a1d1e77b12e 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -203,49 +203,45 @@ int exofs_sync_fs(struct super_block *sb, int wait)
203{ 203{
204 struct exofs_sb_info *sbi; 204 struct exofs_sb_info *sbi;
205 struct exofs_fscb *fscb; 205 struct exofs_fscb *fscb;
206 struct osd_request *or; 206 struct exofs_io_state *ios;
207 struct osd_obj_id obj;
208 int ret = -ENOMEM; 207 int ret = -ENOMEM;
209 208
210 fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
211 if (!fscb) {
212 EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
213 return -ENOMEM;
214 }
215
216 lock_super(sb); 209 lock_super(sb);
217 sbi = sb->s_fs_info; 210 sbi = sb->s_fs_info;
211 fscb = &sbi->s_fscb;
212
213 ret = exofs_get_io_state(sbi, &ios);
214 if (ret)
215 goto out;
216
217 /* Note: We only write the changing part of the fscb. .i.e upto the
218 * the fscb->s_dev_table_oid member. There is no read-modify-write
219 * here.
220 */
221 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
222 memset(fscb, 0, ios->length);
218 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 223 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
219 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); 224 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
220 fscb->s_magic = cpu_to_le16(sb->s_magic); 225 fscb->s_magic = cpu_to_le16(sb->s_magic);
221 fscb->s_newfs = 0; 226 fscb->s_newfs = 0;
227 fscb->s_version = EXOFS_FSCB_VER;
222 228
223 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 229 ios->obj.id = EXOFS_SUPER_ID;
224 if (unlikely(!or)) { 230 ios->offset = 0;
225 EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); 231 ios->kern_buff = fscb;
226 goto out; 232 ios->cred = sbi->s_cred;
227 }
228 233
229 obj.partition = sbi->s_pid; 234 ret = exofs_sbi_write(ios);
230 obj.id = EXOFS_SUPER_ID;
231 ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb));
232 if (unlikely(ret)) { 235 if (unlikely(ret)) {
233 EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); 236 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
234 goto out;
235 }
236
237 ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
238 if (unlikely(ret)) {
239 EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n");
240 goto out; 237 goto out;
241 } 238 }
242 sb->s_dirt = 0; 239 sb->s_dirt = 0;
243 240
244out: 241out:
245 if (or) 242 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
246 osd_end_request(or); 243 exofs_put_io_state(ios);
247 unlock_super(sb); 244 unlock_super(sb);
248 kfree(fscb);
249 return ret; 245 return ret;
250} 246}
251 247
@@ -257,6 +253,29 @@ static void exofs_write_super(struct super_block *sb)
257 sb->s_dirt = 0; 253 sb->s_dirt = 0;
258} 254}
259 255
256static void _exofs_print_device(const char *msg, const char *dev_path,
257 struct osd_dev *od, u64 pid)
258{
259 const struct osd_dev_info *odi = osduld_device_info(od);
260
261 printk(KERN_NOTICE "exofs: %s %s osd_name-%s pid-0x%llx\n",
262 msg, dev_path ?: "", odi->osdname, _LLU(pid));
263}
264
265void exofs_free_sbi(struct exofs_sb_info *sbi)
266{
267 while (sbi->s_numdevs) {
268 int i = --sbi->s_numdevs;
269 struct osd_dev *od = sbi->s_ods[i];
270
271 if (od) {
272 sbi->s_ods[i] = NULL;
273 osduld_put_device(od);
274 }
275 }
276 kfree(sbi);
277}
278
260/* 279/*
261 * This function is called when the vfs is freeing the superblock. We just 280 * This function is called when the vfs is freeing the superblock. We just
262 * need to free our own part. 281 * need to free our own part.
@@ -279,11 +298,182 @@ static void exofs_put_super(struct super_block *sb)
279 msecs_to_jiffies(100)); 298 msecs_to_jiffies(100));
280 } 299 }
281 300
282 osduld_put_device(sbi->s_dev); 301 _exofs_print_device("Unmounting", NULL, sbi->s_ods[0], sbi->s_pid);
283 kfree(sb->s_fs_info); 302
303 exofs_free_sbi(sbi);
284 sb->s_fs_info = NULL; 304 sb->s_fs_info = NULL;
285} 305}
286 306
307static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
308 struct exofs_device_table *dt)
309{
310 sbi->data_map.odm_num_comps =
311 le32_to_cpu(dt->dt_data_map.cb_num_comps);
312 sbi->data_map.odm_stripe_unit =
313 le64_to_cpu(dt->dt_data_map.cb_stripe_unit);
314 sbi->data_map.odm_group_width =
315 le32_to_cpu(dt->dt_data_map.cb_group_width);
316 sbi->data_map.odm_group_depth =
317 le32_to_cpu(dt->dt_data_map.cb_group_depth);
318 sbi->data_map.odm_mirror_cnt =
319 le32_to_cpu(dt->dt_data_map.cb_mirror_cnt);
320 sbi->data_map.odm_raid_algorithm =
321 le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
322
323/* FIXME: Hard coded mirror only for now. if not so do not mount */
324 if ((sbi->data_map.odm_num_comps != numdevs) ||
325 (sbi->data_map.odm_stripe_unit != EXOFS_BLKSIZE) ||
326 (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) ||
327 (sbi->data_map.odm_mirror_cnt != (numdevs - 1)))
328 return -EINVAL;
329 else
330 return 0;
331}
332
333/* @odi is valid only as long as @fscb_dev is valid */
334static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
335 struct osd_dev_info *odi)
336{
337 odi->systemid_len = le32_to_cpu(dt_dev->systemid_len);
338 memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len);
339
340 odi->osdname_len = le32_to_cpu(dt_dev->osdname_len);
341 odi->osdname = dt_dev->osdname;
342
343 /* FIXME support long names. Will need a _put function */
344 if (dt_dev->long_name_offset)
345 return -EINVAL;
346
347 /* Make sure osdname is printable!
348 * mkexofs should give us space for a null-terminator else the
349 * device-table is invalid.
350 */
351 if (unlikely(odi->osdname_len >= sizeof(dt_dev->osdname)))
352 odi->osdname_len = sizeof(dt_dev->osdname) - 1;
353 dt_dev->osdname[odi->osdname_len] = 0;
354
355 /* If it's all zeros something is bad we read past end-of-obj */
356 return !(odi->systemid_len || odi->osdname_len);
357}
358
359static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
360 unsigned table_count)
361{
362 struct exofs_sb_info *sbi = *psbi;
363 struct osd_dev *fscb_od;
364 struct osd_obj_id obj = {.partition = sbi->s_pid,
365 .id = EXOFS_DEVTABLE_ID};
366 struct exofs_device_table *dt;
367 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
368 sizeof(*dt);
369 unsigned numdevs, i;
370 int ret;
371
372 dt = kmalloc(table_bytes, GFP_KERNEL);
373 if (unlikely(!dt)) {
374 EXOFS_ERR("ERROR: allocating %x bytes for device table\n",
375 table_bytes);
376 return -ENOMEM;
377 }
378
379 fscb_od = sbi->s_ods[0];
380 sbi->s_ods[0] = NULL;
381 sbi->s_numdevs = 0;
382 ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
383 if (unlikely(ret)) {
384 EXOFS_ERR("ERROR: reading device table\n");
385 goto out;
386 }
387
388 numdevs = le64_to_cpu(dt->dt_num_devices);
389 if (unlikely(!numdevs)) {
390 ret = -EINVAL;
391 goto out;
392 }
393 WARN_ON(table_count != numdevs);
394
395 ret = _read_and_match_data_map(sbi, numdevs, dt);
396 if (unlikely(ret))
397 goto out;
398
399 if (likely(numdevs > 1)) {
400 unsigned size = numdevs * sizeof(sbi->s_ods[0]);
401
402 sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
403 if (unlikely(!sbi)) {
404 ret = -ENOMEM;
405 goto out;
406 }
407 memset(&sbi->s_ods[1], 0, size - sizeof(sbi->s_ods[0]));
408 *psbi = sbi;
409 }
410
411 for (i = 0; i < numdevs; i++) {
412 struct exofs_fscb fscb;
413 struct osd_dev_info odi;
414 struct osd_dev *od;
415
416 if (exofs_devs_2_odi(&dt->dt_dev_table[i], &odi)) {
417 EXOFS_ERR("ERROR: Read all-zeros device entry\n");
418 ret = -EINVAL;
419 goto out;
420 }
421
422 printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",
423 i, odi.osdname);
424
425 /* On all devices the device table is identical. The user can
426 * specify any one of the participating devices on the command
427 * line. We always keep them in device-table order.
428 */
429 if (fscb_od && osduld_device_same(fscb_od, &odi)) {
430 sbi->s_ods[i] = fscb_od;
431 ++sbi->s_numdevs;
432 fscb_od = NULL;
433 continue;
434 }
435
436 od = osduld_info_lookup(&odi);
437 if (unlikely(IS_ERR(od))) {
438 ret = PTR_ERR(od);
439 EXOFS_ERR("ERROR: device requested is not found "
440 "osd_name-%s =>%d\n", odi.osdname, ret);
441 goto out;
442 }
443
444 sbi->s_ods[i] = od;
445 ++sbi->s_numdevs;
446
447 /* Read the fscb of the other devices to make sure the FS
448 * partition is there.
449 */
450 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb,
451 sizeof(fscb));
452 if (unlikely(ret)) {
453 EXOFS_ERR("ERROR: Malformed participating device "
454 "error reading fscb osd_name-%s\n",
455 odi.osdname);
456 goto out;
457 }
458
459 /* TODO: verify other information is correct and FS-uuid
460 * matches. Benny what did you say about device table
461 * generation and old devices?
462 */
463 }
464
465out:
466 kfree(dt);
467 if (unlikely(!ret && fscb_od)) {
468 EXOFS_ERR(
469 "ERROR: Bad device-table container device not present\n");
470 osduld_put_device(fscb_od);
471 ret = -EINVAL;
472 }
473
474 return ret;
475}
476
287/* 477/*
288 * Read the superblock from the OSD and fill in the fields 478 * Read the superblock from the OSD and fill in the fields
289 */ 479 */
@@ -292,24 +482,25 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
292 struct inode *root; 482 struct inode *root;
293 struct exofs_mountopt *opts = data; 483 struct exofs_mountopt *opts = data;
294 struct exofs_sb_info *sbi; /*extended info */ 484 struct exofs_sb_info *sbi; /*extended info */
485 struct osd_dev *od; /* Master device */
295 struct exofs_fscb fscb; /*on-disk superblock info */ 486 struct exofs_fscb fscb; /*on-disk superblock info */
296 struct osd_request *or = NULL;
297 struct osd_obj_id obj; 487 struct osd_obj_id obj;
488 unsigned table_count;
298 int ret; 489 int ret;
299 490
300 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 491 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
301 if (!sbi) 492 if (!sbi)
302 return -ENOMEM; 493 return -ENOMEM;
303 sb->s_fs_info = sbi;
304 494
305 /* use mount options to fill superblock */ 495 /* use mount options to fill superblock */
306 sbi->s_dev = osduld_path_lookup(opts->dev_name); 496 od = osduld_path_lookup(opts->dev_name);
307 if (IS_ERR(sbi->s_dev)) { 497 if (IS_ERR(od)) {
308 ret = PTR_ERR(sbi->s_dev); 498 ret = PTR_ERR(od);
309 sbi->s_dev = NULL;
310 goto free_sbi; 499 goto free_sbi;
311 } 500 }
312 501
502 sbi->s_ods[0] = od;
503 sbi->s_numdevs = 1;
313 sbi->s_pid = opts->pid; 504 sbi->s_pid = opts->pid;
314 sbi->s_timeout = opts->timeout; 505 sbi->s_timeout = opts->timeout;
315 506
@@ -323,35 +514,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
323 sb->s_bdev = NULL; 514 sb->s_bdev = NULL;
324 sb->s_dev = 0; 515 sb->s_dev = 0;
325 516
326 /* read data from on-disk superblock object */
327 obj.partition = sbi->s_pid; 517 obj.partition = sbi->s_pid;
328 obj.id = EXOFS_SUPER_ID; 518 obj.id = EXOFS_SUPER_ID;
329 exofs_make_credential(sbi->s_cred, &obj); 519 exofs_make_credential(sbi->s_cred, &obj);
330 520
331 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 521 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
332 if (unlikely(!or)) { 522 if (unlikely(ret))
333 if (!silent)
334 EXOFS_ERR(
335 "exofs_fill_super: osd_start_request failed.\n");
336 ret = -ENOMEM;
337 goto free_sbi;
338 }
339 ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb));
340 if (unlikely(ret)) {
341 if (!silent)
342 EXOFS_ERR(
343 "exofs_fill_super: osd_req_read_kern failed.\n");
344 ret = -ENOMEM;
345 goto free_sbi;
346 }
347
348 ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
349 if (unlikely(ret)) {
350 if (!silent)
351 EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n");
352 ret = -EIO;
353 goto free_sbi; 523 goto free_sbi;
354 }
355 524
356 sb->s_magic = le16_to_cpu(fscb.s_magic); 525 sb->s_magic = le16_to_cpu(fscb.s_magic);
357 sbi->s_nextid = le64_to_cpu(fscb.s_nextid); 526 sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
@@ -364,12 +533,26 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
364 ret = -EINVAL; 533 ret = -EINVAL;
365 goto free_sbi; 534 goto free_sbi;
366 } 535 }
536 if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) {
537 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
538 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
539 ret = -EINVAL;
540 goto free_sbi;
541 }
367 542
368 /* start generation numbers from a random point */ 543 /* start generation numbers from a random point */
369 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 544 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
370 spin_lock_init(&sbi->s_next_gen_lock); 545 spin_lock_init(&sbi->s_next_gen_lock);
371 546
547 table_count = le64_to_cpu(fscb.s_dev_table_count);
548 if (table_count) {
549 ret = exofs_read_lookup_dev_table(&sbi, table_count);
550 if (unlikely(ret))
551 goto free_sbi;
552 }
553
372 /* set up operation vectors */ 554 /* set up operation vectors */
555 sb->s_fs_info = sbi;
373 sb->s_op = &exofs_sops; 556 sb->s_op = &exofs_sops;
374 sb->s_export_op = &exofs_export_ops; 557 sb->s_export_op = &exofs_export_ops;
375 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); 558 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
@@ -395,16 +578,15 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
395 goto free_sbi; 578 goto free_sbi;
396 } 579 }
397 580
398 ret = 0; 581 _exofs_print_device("Mounting", opts->dev_name, sbi->s_ods[0],
399out: 582 sbi->s_pid);
400 if (or) 583 return 0;
401 osd_end_request(or);
402 return ret;
403 584
404free_sbi: 585free_sbi:
405 osduld_put_device(sbi->s_dev); /* NULL safe */ 586 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
406 kfree(sbi); 587 opts->dev_name, sbi->s_pid, ret);
407 goto out; 588 exofs_free_sbi(sbi);
589 return ret;
408} 590}
409 591
410/* 592/*
@@ -433,7 +615,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
433{ 615{
434 struct super_block *sb = dentry->d_sb; 616 struct super_block *sb = dentry->d_sb;
435 struct exofs_sb_info *sbi = sb->s_fs_info; 617 struct exofs_sb_info *sbi = sb->s_fs_info;
436 struct osd_obj_id obj = {sbi->s_pid, 0}; 618 struct exofs_io_state *ios;
437 struct osd_attr attrs[] = { 619 struct osd_attr attrs[] = {
438 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, 620 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
439 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), 621 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
@@ -442,32 +624,33 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
442 }; 624 };
443 uint64_t capacity = ULLONG_MAX; 625 uint64_t capacity = ULLONG_MAX;
444 uint64_t used = ULLONG_MAX; 626 uint64_t used = ULLONG_MAX;
445 struct osd_request *or;
446 uint8_t cred_a[OSD_CAP_LEN]; 627 uint8_t cred_a[OSD_CAP_LEN];
447 int ret; 628 int ret;
448 629
449 /* get used/capacity attributes */ 630 ret = exofs_get_io_state(sbi, &ios);
450 exofs_make_credential(cred_a, &obj); 631 if (ret) {
451 632 EXOFS_DBGMSG("exofs_get_io_state failed.\n");
452 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 633 return ret;
453 if (unlikely(!or)) {
454 EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n");
455 return -ENOMEM;
456 } 634 }
457 635
458 osd_req_get_attributes(or, &obj); 636 exofs_make_credential(cred_a, &ios->obj);
459 osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); 637 ios->cred = sbi->s_cred;
460 ret = exofs_sync_op(or, sbi->s_timeout, cred_a); 638 ios->in_attr = attrs;
639 ios->in_attr_len = ARRAY_SIZE(attrs);
640
641 ret = exofs_sbi_read(ios);
461 if (unlikely(ret)) 642 if (unlikely(ret))
462 goto out; 643 goto out;
463 644
464 ret = extract_attr_from_req(or, &attrs[0]); 645 ret = extract_attr_from_ios(ios, &attrs[0]);
465 if (likely(!ret)) 646 if (likely(!ret)) {
466 capacity = get_unaligned_be64(attrs[0].val_ptr); 647 capacity = get_unaligned_be64(attrs[0].val_ptr);
467 else 648 if (unlikely(!capacity))
649 capacity = ULLONG_MAX;
650 } else
468 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); 651 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
469 652
470 ret = extract_attr_from_req(or, &attrs[1]); 653 ret = extract_attr_from_ios(ios, &attrs[1]);
471 if (likely(!ret)) 654 if (likely(!ret))
472 used = get_unaligned_be64(attrs[1].val_ptr); 655 used = get_unaligned_be64(attrs[1].val_ptr);
473 else 656 else
@@ -476,15 +659,15 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
476 /* fill in the stats buffer */ 659 /* fill in the stats buffer */
477 buf->f_type = EXOFS_SUPER_MAGIC; 660 buf->f_type = EXOFS_SUPER_MAGIC;
478 buf->f_bsize = EXOFS_BLKSIZE; 661 buf->f_bsize = EXOFS_BLKSIZE;
479 buf->f_blocks = (capacity >> EXOFS_BLKSHIFT); 662 buf->f_blocks = capacity >> 9;
480 buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT); 663 buf->f_bfree = (capacity - used) >> 9;
481 buf->f_bavail = buf->f_bfree; 664 buf->f_bavail = buf->f_bfree;
482 buf->f_files = sbi->s_numfiles; 665 buf->f_files = sbi->s_numfiles;
483 buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; 666 buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles;
484 buf->f_namelen = EXOFS_NAME_LEN; 667 buf->f_namelen = EXOFS_NAME_LEN;
485 668
486out: 669out:
487 osd_end_request(or); 670 exofs_put_io_state(ios);
488 return ret; 671 return ret;
489} 672}
490 673
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 197c7db583c..e9e175949a6 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -6,7 +6,7 @@
6 * and for mapping back from file handles to dentries. 6 * and for mapping back from file handles to dentries.
7 * 7 *
8 * For details on why we do all the strange and hairy things in here 8 * For details on why we do all the strange and hairy things in here
9 * take a look at Documentation/filesystems/Exporting. 9 * take a look at Documentation/filesystems/nfs/Exporting.
10 */ 10 */
11#include <linux/exportfs.h> 11#include <linux/exportfs.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index a63d44256a7..a99e54318c3 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -339,12 +339,12 @@ ext2_acl_chmod(struct inode *inode)
339 * Extended attribut handlers 339 * Extended attribut handlers
340 */ 340 */
341static size_t 341static size_t
342ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, 342ext2_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_size,
343 const char *name, size_t name_len) 343 const char *name, size_t name_len, int type)
344{ 344{
345 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 345 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
346 346
347 if (!test_opt(inode->i_sb, POSIX_ACL)) 347 if (!test_opt(dentry->d_sb, POSIX_ACL))
348 return 0; 348 return 0;
349 if (list && size <= list_size) 349 if (list && size <= list_size)
350 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 350 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -352,12 +352,12 @@ ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
352} 352}
353 353
354static size_t 354static size_t
355ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, 355ext2_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_size,
356 const char *name, size_t name_len) 356 const char *name, size_t name_len, int type)
357{ 357{
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359 359
360 if (!test_opt(inode->i_sb, POSIX_ACL)) 360 if (!test_opt(dentry->d_sb, POSIX_ACL))
361 return 0; 361 return 0;
362 if (list && size <= list_size) 362 if (list && size <= list_size)
363 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 363 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -365,15 +365,18 @@ ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
365} 365}
366 366
367static int 367static int
368ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 368ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
369 size_t size, int type)
369{ 370{
370 struct posix_acl *acl; 371 struct posix_acl *acl;
371 int error; 372 int error;
372 373
373 if (!test_opt(inode->i_sb, POSIX_ACL)) 374 if (strcmp(name, "") != 0)
375 return -EINVAL;
376 if (!test_opt(dentry->d_sb, POSIX_ACL))
374 return -EOPNOTSUPP; 377 return -EOPNOTSUPP;
375 378
376 acl = ext2_get_acl(inode, type); 379 acl = ext2_get_acl(dentry->d_inode, type);
377 if (IS_ERR(acl)) 380 if (IS_ERR(acl))
378 return PTR_ERR(acl); 381 return PTR_ERR(acl);
379 if (acl == NULL) 382 if (acl == NULL)
@@ -385,33 +388,17 @@ ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
385} 388}
386 389
387static int 390static int
388ext2_xattr_get_acl_access(struct inode *inode, const char *name, 391ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
389 void *buffer, size_t size) 392 size_t size, int flags, int type)
390{
391 if (strcmp(name, "") != 0)
392 return -EINVAL;
393 return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
394}
395
396static int
397ext2_xattr_get_acl_default(struct inode *inode, const char *name,
398 void *buffer, size_t size)
399{
400 if (strcmp(name, "") != 0)
401 return -EINVAL;
402 return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
403}
404
405static int
406ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
407 size_t size)
408{ 393{
409 struct posix_acl *acl; 394 struct posix_acl *acl;
410 int error; 395 int error;
411 396
412 if (!test_opt(inode->i_sb, POSIX_ACL)) 397 if (strcmp(name, "") != 0)
398 return -EINVAL;
399 if (!test_opt(dentry->d_sb, POSIX_ACL))
413 return -EOPNOTSUPP; 400 return -EOPNOTSUPP;
414 if (!is_owner_or_cap(inode)) 401 if (!is_owner_or_cap(dentry->d_inode))
415 return -EPERM; 402 return -EPERM;
416 403
417 if (value) { 404 if (value) {
@@ -426,41 +413,25 @@ ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
426 } else 413 } else
427 acl = NULL; 414 acl = NULL;
428 415
429 error = ext2_set_acl(inode, type, acl); 416 error = ext2_set_acl(dentry->d_inode, type, acl);
430 417
431release_and_out: 418release_and_out:
432 posix_acl_release(acl); 419 posix_acl_release(acl);
433 return error; 420 return error;
434} 421}
435 422
436static int
437ext2_xattr_set_acl_access(struct inode *inode, const char *name,
438 const void *value, size_t size, int flags)
439{
440 if (strcmp(name, "") != 0)
441 return -EINVAL;
442 return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
443}
444
445static int
446ext2_xattr_set_acl_default(struct inode *inode, const char *name,
447 const void *value, size_t size, int flags)
448{
449 if (strcmp(name, "") != 0)
450 return -EINVAL;
451 return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
452}
453
454struct xattr_handler ext2_xattr_acl_access_handler = { 423struct xattr_handler ext2_xattr_acl_access_handler = {
455 .prefix = POSIX_ACL_XATTR_ACCESS, 424 .prefix = POSIX_ACL_XATTR_ACCESS,
425 .flags = ACL_TYPE_ACCESS,
456 .list = ext2_xattr_list_acl_access, 426 .list = ext2_xattr_list_acl_access,
457 .get = ext2_xattr_get_acl_access, 427 .get = ext2_xattr_get_acl,
458 .set = ext2_xattr_set_acl_access, 428 .set = ext2_xattr_set_acl,
459}; 429};
460 430
461struct xattr_handler ext2_xattr_acl_default_handler = { 431struct xattr_handler ext2_xattr_acl_default_handler = {
462 .prefix = POSIX_ACL_XATTR_DEFAULT, 432 .prefix = POSIX_ACL_XATTR_DEFAULT,
433 .flags = ACL_TYPE_DEFAULT,
463 .list = ext2_xattr_list_acl_default, 434 .list = ext2_xattr_list_acl_default,
464 .get = ext2_xattr_get_acl_default, 435 .get = ext2_xattr_get_acl,
465 .set = ext2_xattr_set_acl_default, 436 .set = ext2_xattr_set_acl,
466}; 437};
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 6cde970b0a1..7516957273e 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -353,8 +353,8 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
353 * ext2_find_entry() 353 * ext2_find_entry()
354 * 354 *
355 * finds an entry in the specified directory with the wanted name. It 355 * finds an entry in the specified directory with the wanted name. It
356 * returns the page in which the entry was found, and the entry itself 356 * returns the page in which the entry was found (as a parameter - res_page),
357 * (as a parameter - res_dir). Page is returned mapped and unlocked. 357 * and the entry itself. Page is returned mapped and unlocked.
358 * Entry is guaranteed to be valid. 358 * Entry is guaranteed to be valid.
359 */ 359 */
360struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir, 360struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir,
@@ -721,5 +721,5 @@ const struct file_operations ext2_dir_operations = {
721#ifdef CONFIG_COMPAT 721#ifdef CONFIG_COMPAT
722 .compat_ioctl = ext2_compat_ioctl, 722 .compat_ioctl = ext2_compat_ioctl,
723#endif 723#endif
724 .fsync = simple_fsync, 724 .fsync = ext2_fsync,
725}; 725};
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9a8a8e27a06..061914add3c 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -142,7 +142,7 @@ struct dentry *ext2_get_parent(struct dentry *child);
142/* super.c */ 142/* super.c */
143extern void ext2_error (struct super_block *, const char *, const char *, ...) 143extern void ext2_error (struct super_block *, const char *, const char *, ...)
144 __attribute__ ((format (printf, 3, 4))); 144 __attribute__ ((format (printf, 3, 4)));
145extern void ext2_warning (struct super_block *, const char *, const char *, ...) 145extern void ext2_msg(struct super_block *, const char *, const char *, ...)
146 __attribute__ ((format (printf, 3, 4))); 146 __attribute__ ((format (printf, 3, 4)));
147extern void ext2_update_dynamic_rev (struct super_block *sb); 147extern void ext2_update_dynamic_rev (struct super_block *sb);
148extern void ext2_write_super (struct super_block *); 148extern void ext2_write_super (struct super_block *);
@@ -155,6 +155,7 @@ extern void ext2_write_super (struct super_block *);
155extern const struct file_operations ext2_dir_operations; 155extern const struct file_operations ext2_dir_operations;
156 156
157/* file.c */ 157/* file.c */
158extern int ext2_fsync(struct file *file, struct dentry *dentry, int datasync);
158extern const struct inode_operations ext2_file_inode_operations; 159extern const struct inode_operations ext2_file_inode_operations;
159extern const struct file_operations ext2_file_operations; 160extern const struct file_operations ext2_file_operations;
160extern const struct file_operations ext2_xip_file_operations; 161extern const struct file_operations ext2_xip_file_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index a2f3afd1a1c..586e3589d4c 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/pagemap.h>
22#include "ext2.h" 23#include "ext2.h"
23#include "xattr.h" 24#include "xattr.h"
24#include "acl.h" 25#include "acl.h"
@@ -38,6 +39,22 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
38 return 0; 39 return 0;
39} 40}
40 41
42int ext2_fsync(struct file *file, struct dentry *dentry, int datasync)
43{
44 int ret;
45 struct super_block *sb = dentry->d_inode->i_sb;
46 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
47
48 ret = simple_fsync(file, dentry, datasync);
49 if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
50 /* We don't really know where the IO error happened... */
51 ext2_error(sb, __func__,
52 "detected IO error when writing metadata buffers");
53 ret = -EIO;
54 }
55 return ret;
56}
57
41/* 58/*
42 * We have mostly NULL's here: the current defaults are ok for 59 * We have mostly NULL's here: the current defaults are ok for
43 * the ext2 filesystem. 60 * the ext2 filesystem.
@@ -55,7 +72,7 @@ const struct file_operations ext2_file_operations = {
55 .mmap = generic_file_mmap, 72 .mmap = generic_file_mmap,
56 .open = generic_file_open, 73 .open = generic_file_open,
57 .release = ext2_release_file, 74 .release = ext2_release_file,
58 .fsync = simple_fsync, 75 .fsync = ext2_fsync,
59 .splice_read = generic_file_splice_read, 76 .splice_read = generic_file_splice_read,
60 .splice_write = generic_file_splice_write, 77 .splice_write = generic_file_splice_write,
61}; 78};
@@ -72,7 +89,7 @@ const struct file_operations ext2_xip_file_operations = {
72 .mmap = xip_file_mmap, 89 .mmap = xip_file_mmap,
73 .open = generic_file_open, 90 .open = generic_file_open,
74 .release = ext2_release_file, 91 .release = ext2_release_file,
75 .fsync = simple_fsync, 92 .fsync = ext2_fsync,
76}; 93};
77#endif 94#endif
78 95
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index ade634076d0..71b032c65a0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -137,7 +137,8 @@ static int ext2_block_to_path(struct inode *inode,
137 int final = 0; 137 int final = 0;
138 138
139 if (i_block < 0) { 139 if (i_block < 0) {
140 ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0"); 140 ext2_msg(inode->i_sb, KERN_WARNING,
141 "warning: %s: block < 0", __func__);
141 } else if (i_block < direct_blocks) { 142 } else if (i_block < direct_blocks) {
142 offsets[n++] = i_block; 143 offsets[n++] = i_block;
143 final = direct_blocks; 144 final = direct_blocks;
@@ -157,7 +158,8 @@ static int ext2_block_to_path(struct inode *inode,
157 offsets[n++] = i_block & (ptrs - 1); 158 offsets[n++] = i_block & (ptrs - 1);
158 final = ptrs; 159 final = ptrs;
159 } else { 160 } else {
160 ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big"); 161 ext2_msg(inode->i_sb, KERN_WARNING,
162 "warning: %s: block is too big", __func__);
161 } 163 }
162 if (boundary) 164 if (boundary)
163 *boundary = final - 1 - (i_block & (ptrs - 1)); 165 *boundary = final - 1 - (i_block & (ptrs - 1));
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1a9ffee47d5..f9cb54a585c 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -58,27 +58,27 @@ void ext2_error (struct super_block * sb, const char * function,
58 } 58 }
59 59
60 va_start(args, fmt); 60 va_start(args, fmt);
61 printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function); 61 printk(KERN_CRIT "EXT2-fs (%s): error: %s: ", sb->s_id, function);
62 vprintk(fmt, args); 62 vprintk(fmt, args);
63 printk("\n"); 63 printk("\n");
64 va_end(args); 64 va_end(args);
65 65
66 if (test_opt(sb, ERRORS_PANIC)) 66 if (test_opt(sb, ERRORS_PANIC))
67 panic("EXT2-fs panic from previous error\n"); 67 panic("EXT2-fs: panic from previous error\n");
68 if (test_opt(sb, ERRORS_RO)) { 68 if (test_opt(sb, ERRORS_RO)) {
69 printk("Remounting filesystem read-only\n"); 69 ext2_msg(sb, KERN_CRIT,
70 "error: remounting filesystem read-only");
70 sb->s_flags |= MS_RDONLY; 71 sb->s_flags |= MS_RDONLY;
71 } 72 }
72} 73}
73 74
74void ext2_warning (struct super_block * sb, const char * function, 75void ext2_msg(struct super_block *sb, const char *prefix,
75 const char * fmt, ...) 76 const char *fmt, ...)
76{ 77{
77 va_list args; 78 va_list args;
78 79
79 va_start(args, fmt); 80 va_start(args, fmt);
80 printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ", 81 printk("%sEXT2-fs (%s): ", prefix, sb->s_id);
81 sb->s_id, function);
82 vprintk(fmt, args); 82 vprintk(fmt, args);
83 printk("\n"); 83 printk("\n");
84 va_end(args); 84 va_end(args);
@@ -91,9 +91,9 @@ void ext2_update_dynamic_rev(struct super_block *sb)
91 if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV) 91 if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV)
92 return; 92 return;
93 93
94 ext2_warning(sb, __func__, 94 ext2_msg(sb, KERN_WARNING,
95 "updating to rev %d because of new feature flag, " 95 "warning: updating to rev %d because of "
96 "running e2fsck is recommended", 96 "new feature flag, running e2fsck is recommended",
97 EXT2_DYNAMIC_REV); 97 EXT2_DYNAMIC_REV);
98 98
99 es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO); 99 es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
@@ -419,10 +419,10 @@ static const match_table_t tokens = {
419 {Opt_err, NULL} 419 {Opt_err, NULL}
420}; 420};
421 421
422static int parse_options (char * options, 422static int parse_options(char *options, struct super_block *sb)
423 struct ext2_sb_info *sbi)
424{ 423{
425 char * p; 424 char *p;
425 struct ext2_sb_info *sbi = EXT2_SB(sb);
426 substring_t args[MAX_OPT_ARGS]; 426 substring_t args[MAX_OPT_ARGS];
427 int option; 427 int option;
428 428
@@ -505,7 +505,8 @@ static int parse_options (char * options,
505#else 505#else
506 case Opt_user_xattr: 506 case Opt_user_xattr:
507 case Opt_nouser_xattr: 507 case Opt_nouser_xattr:
508 printk("EXT2 (no)user_xattr options not supported\n"); 508 ext2_msg(sb, KERN_INFO, "(no)user_xattr options"
509 "not supported");
509 break; 510 break;
510#endif 511#endif
511#ifdef CONFIG_EXT2_FS_POSIX_ACL 512#ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -518,14 +519,15 @@ static int parse_options (char * options,
518#else 519#else
519 case Opt_acl: 520 case Opt_acl:
520 case Opt_noacl: 521 case Opt_noacl:
521 printk("EXT2 (no)acl options not supported\n"); 522 ext2_msg(sb, KERN_INFO,
523 "(no)acl options not supported");
522 break; 524 break;
523#endif 525#endif
524 case Opt_xip: 526 case Opt_xip:
525#ifdef CONFIG_EXT2_FS_XIP 527#ifdef CONFIG_EXT2_FS_XIP
526 set_opt (sbi->s_mount_opt, XIP); 528 set_opt (sbi->s_mount_opt, XIP);
527#else 529#else
528 printk("EXT2 xip option not supported\n"); 530 ext2_msg(sb, KERN_INFO, "xip option not supported");
529#endif 531#endif
530 break; 532 break;
531 533
@@ -542,19 +544,18 @@ static int parse_options (char * options,
542 case Opt_quota: 544 case Opt_quota:
543 case Opt_usrquota: 545 case Opt_usrquota:
544 case Opt_grpquota: 546 case Opt_grpquota:
545 printk(KERN_ERR 547 ext2_msg(sb, KERN_INFO,
546 "EXT2-fs: quota operations not supported.\n"); 548 "quota operations not supported");
547
548 break; 549 break;
549#endif 550#endif
550 551
551 case Opt_reservation: 552 case Opt_reservation:
552 set_opt(sbi->s_mount_opt, RESERVATION); 553 set_opt(sbi->s_mount_opt, RESERVATION);
553 printk("reservations ON\n"); 554 ext2_msg(sb, KERN_INFO, "reservations ON");
554 break; 555 break;
555 case Opt_noreservation: 556 case Opt_noreservation:
556 clear_opt(sbi->s_mount_opt, RESERVATION); 557 clear_opt(sbi->s_mount_opt, RESERVATION);
557 printk("reservations OFF\n"); 558 ext2_msg(sb, KERN_INFO, "reservations OFF");
558 break; 559 break;
559 case Opt_ignore: 560 case Opt_ignore:
560 break; 561 break;
@@ -573,34 +574,40 @@ static int ext2_setup_super (struct super_block * sb,
573 struct ext2_sb_info *sbi = EXT2_SB(sb); 574 struct ext2_sb_info *sbi = EXT2_SB(sb);
574 575
575 if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) { 576 if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
576 printk ("EXT2-fs warning: revision level too high, " 577 ext2_msg(sb, KERN_ERR,
577 "forcing read-only mode\n"); 578 "error: revision level too high, "
579 "forcing read-only mode");
578 res = MS_RDONLY; 580 res = MS_RDONLY;
579 } 581 }
580 if (read_only) 582 if (read_only)
581 return res; 583 return res;
582 if (!(sbi->s_mount_state & EXT2_VALID_FS)) 584 if (!(sbi->s_mount_state & EXT2_VALID_FS))
583 printk ("EXT2-fs warning: mounting unchecked fs, " 585 ext2_msg(sb, KERN_WARNING,
584 "running e2fsck is recommended\n"); 586 "warning: mounting unchecked fs, "
587 "running e2fsck is recommended");
585 else if ((sbi->s_mount_state & EXT2_ERROR_FS)) 588 else if ((sbi->s_mount_state & EXT2_ERROR_FS))
586 printk ("EXT2-fs warning: mounting fs with errors, " 589 ext2_msg(sb, KERN_WARNING,
587 "running e2fsck is recommended\n"); 590 "warning: mounting fs with errors, "
591 "running e2fsck is recommended");
588 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 592 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
589 le16_to_cpu(es->s_mnt_count) >= 593 le16_to_cpu(es->s_mnt_count) >=
590 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 594 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
591 printk ("EXT2-fs warning: maximal mount count reached, " 595 ext2_msg(sb, KERN_WARNING,
592 "running e2fsck is recommended\n"); 596 "warning: maximal mount count reached, "
597 "running e2fsck is recommended");
593 else if (le32_to_cpu(es->s_checkinterval) && 598 else if (le32_to_cpu(es->s_checkinterval) &&
594 (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) 599 (le32_to_cpu(es->s_lastcheck) +
595 printk ("EXT2-fs warning: checktime reached, " 600 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
596 "running e2fsck is recommended\n"); 601 ext2_msg(sb, KERN_WARNING,
602 "warning: checktime reached, "
603 "running e2fsck is recommended");
597 if (!le16_to_cpu(es->s_max_mnt_count)) 604 if (!le16_to_cpu(es->s_max_mnt_count))
598 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); 605 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
599 le16_add_cpu(&es->s_mnt_count, 1); 606 le16_add_cpu(&es->s_mnt_count, 1);
600 ext2_write_super(sb); 607 ext2_write_super(sb);
601 if (test_opt (sb, DEBUG)) 608 if (test_opt (sb, DEBUG))
602 printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, " 609 ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
603 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 610 "bpg=%lu, ipg=%lu, mo=%04lx]",
604 EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, 611 EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
605 sbi->s_frag_size, 612 sbi->s_frag_size,
606 sbi->s_groups_count, 613 sbi->s_groups_count,
@@ -767,7 +774,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
767 */ 774 */
768 blocksize = sb_min_blocksize(sb, BLOCK_SIZE); 775 blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
769 if (!blocksize) { 776 if (!blocksize) {
770 printk ("EXT2-fs: unable to set blocksize\n"); 777 ext2_msg(sb, KERN_ERR, "error: unable to set blocksize");
771 goto failed_sbi; 778 goto failed_sbi;
772 } 779 }
773 780
@@ -783,7 +790,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
783 } 790 }
784 791
785 if (!(bh = sb_bread(sb, logic_sb_block))) { 792 if (!(bh = sb_bread(sb, logic_sb_block))) {
786 printk ("EXT2-fs: unable to read superblock\n"); 793 ext2_msg(sb, KERN_ERR, "error: unable to read superblock");
787 goto failed_sbi; 794 goto failed_sbi;
788 } 795 }
789 /* 796 /*
@@ -826,7 +833,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
826 833
827 set_opt(sbi->s_mount_opt, RESERVATION); 834 set_opt(sbi->s_mount_opt, RESERVATION);
828 835
829 if (!parse_options ((char *) data, sbi)) 836 if (!parse_options((char *) data, sb))
830 goto failed_mount; 837 goto failed_mount;
831 838
832 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 839 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -840,8 +847,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
840 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || 847 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
841 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 848 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
842 EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U))) 849 EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U)))
843 printk("EXT2-fs warning: feature flags set on rev 0 fs, " 850 ext2_msg(sb, KERN_WARNING,
844 "running e2fsck is recommended\n"); 851 "warning: feature flags set on rev 0 fs, "
852 "running e2fsck is recommended");
845 /* 853 /*
846 * Check feature flags regardless of the revision level, since we 854 * Check feature flags regardless of the revision level, since we
847 * previously didn't change the revision level when setting the flags, 855 * previously didn't change the revision level when setting the flags,
@@ -849,16 +857,16 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
849 */ 857 */
850 features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP); 858 features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
851 if (features) { 859 if (features) {
852 printk("EXT2-fs: %s: couldn't mount because of " 860 ext2_msg(sb, KERN_ERR, "error: couldn't mount because of "
853 "unsupported optional features (%x).\n", 861 "unsupported optional features (%x)",
854 sb->s_id, le32_to_cpu(features)); 862 le32_to_cpu(features));
855 goto failed_mount; 863 goto failed_mount;
856 } 864 }
857 if (!(sb->s_flags & MS_RDONLY) && 865 if (!(sb->s_flags & MS_RDONLY) &&
858 (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ 866 (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
859 printk("EXT2-fs: %s: couldn't mount RDWR because of " 867 ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of "
860 "unsupported optional features (%x).\n", 868 "unsupported optional features (%x)",
861 sb->s_id, le32_to_cpu(features)); 869 le32_to_cpu(features));
862 goto failed_mount; 870 goto failed_mount;
863 } 871 }
864 872
@@ -866,7 +874,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
866 874
867 if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) { 875 if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) {
868 if (!silent) 876 if (!silent)
869 printk("XIP: Unsupported blocksize\n"); 877 ext2_msg(sb, KERN_ERR,
878 "error: unsupported blocksize for xip");
870 goto failed_mount; 879 goto failed_mount;
871 } 880 }
872 881
@@ -875,7 +884,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
875 brelse(bh); 884 brelse(bh);
876 885
877 if (!sb_set_blocksize(sb, blocksize)) { 886 if (!sb_set_blocksize(sb, blocksize)) {
878 printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n"); 887 ext2_msg(sb, KERN_ERR, "error: blocksize is too small");
879 goto failed_sbi; 888 goto failed_sbi;
880 } 889 }
881 890
@@ -883,14 +892,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
883 offset = (sb_block*BLOCK_SIZE) % blocksize; 892 offset = (sb_block*BLOCK_SIZE) % blocksize;
884 bh = sb_bread(sb, logic_sb_block); 893 bh = sb_bread(sb, logic_sb_block);
885 if(!bh) { 894 if(!bh) {
886 printk("EXT2-fs: Couldn't read superblock on " 895 ext2_msg(sb, KERN_ERR, "error: couldn't read"
887 "2nd try.\n"); 896 "superblock on 2nd try");
888 goto failed_sbi; 897 goto failed_sbi;
889 } 898 }
890 es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); 899 es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
891 sbi->s_es = es; 900 sbi->s_es = es;
892 if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) { 901 if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
893 printk ("EXT2-fs: Magic mismatch, very weird !\n"); 902 ext2_msg(sb, KERN_ERR, "error: magic mismatch");
894 goto failed_mount; 903 goto failed_mount;
895 } 904 }
896 } 905 }
@@ -906,7 +915,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
906 if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) || 915 if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) ||
907 !is_power_of_2(sbi->s_inode_size) || 916 !is_power_of_2(sbi->s_inode_size) ||
908 (sbi->s_inode_size > blocksize)) { 917 (sbi->s_inode_size > blocksize)) {
909 printk ("EXT2-fs: unsupported inode size: %d\n", 918 ext2_msg(sb, KERN_ERR,
919 "error: unsupported inode size: %d",
910 sbi->s_inode_size); 920 sbi->s_inode_size);
911 goto failed_mount; 921 goto failed_mount;
912 } 922 }
@@ -943,29 +953,33 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
943 953
944 if (sb->s_blocksize != bh->b_size) { 954 if (sb->s_blocksize != bh->b_size) {
945 if (!silent) 955 if (!silent)
946 printk ("VFS: Unsupported blocksize on dev " 956 ext2_msg(sb, KERN_ERR, "error: unsupported blocksize");
947 "%s.\n", sb->s_id);
948 goto failed_mount; 957 goto failed_mount;
949 } 958 }
950 959
951 if (sb->s_blocksize != sbi->s_frag_size) { 960 if (sb->s_blocksize != sbi->s_frag_size) {
952 printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n", 961 ext2_msg(sb, KERN_ERR,
962 "error: fragsize %lu != blocksize %lu"
963 "(not supported yet)",
953 sbi->s_frag_size, sb->s_blocksize); 964 sbi->s_frag_size, sb->s_blocksize);
954 goto failed_mount; 965 goto failed_mount;
955 } 966 }
956 967
957 if (sbi->s_blocks_per_group > sb->s_blocksize * 8) { 968 if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
958 printk ("EXT2-fs: #blocks per group too big: %lu\n", 969 ext2_msg(sb, KERN_ERR,
970 "error: #blocks per group too big: %lu",
959 sbi->s_blocks_per_group); 971 sbi->s_blocks_per_group);
960 goto failed_mount; 972 goto failed_mount;
961 } 973 }
962 if (sbi->s_frags_per_group > sb->s_blocksize * 8) { 974 if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
963 printk ("EXT2-fs: #fragments per group too big: %lu\n", 975 ext2_msg(sb, KERN_ERR,
976 "error: #fragments per group too big: %lu",
964 sbi->s_frags_per_group); 977 sbi->s_frags_per_group);
965 goto failed_mount; 978 goto failed_mount;
966 } 979 }
967 if (sbi->s_inodes_per_group > sb->s_blocksize * 8) { 980 if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
968 printk ("EXT2-fs: #inodes per group too big: %lu\n", 981 ext2_msg(sb, KERN_ERR,
982 "error: #inodes per group too big: %lu",
969 sbi->s_inodes_per_group); 983 sbi->s_inodes_per_group);
970 goto failed_mount; 984 goto failed_mount;
971 } 985 }
@@ -979,13 +993,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
979 EXT2_DESC_PER_BLOCK(sb); 993 EXT2_DESC_PER_BLOCK(sb);
980 sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); 994 sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
981 if (sbi->s_group_desc == NULL) { 995 if (sbi->s_group_desc == NULL) {
982 printk ("EXT2-fs: not enough memory\n"); 996 ext2_msg(sb, KERN_ERR, "error: not enough memory");
983 goto failed_mount; 997 goto failed_mount;
984 } 998 }
985 bgl_lock_init(sbi->s_blockgroup_lock); 999 bgl_lock_init(sbi->s_blockgroup_lock);
986 sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL); 1000 sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL);
987 if (!sbi->s_debts) { 1001 if (!sbi->s_debts) {
988 printk ("EXT2-fs: not enough memory\n"); 1002 ext2_msg(sb, KERN_ERR, "error: not enough memory");
989 goto failed_mount_group_desc; 1003 goto failed_mount_group_desc;
990 } 1004 }
991 for (i = 0; i < db_count; i++) { 1005 for (i = 0; i < db_count; i++) {
@@ -994,12 +1008,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
994 if (!sbi->s_group_desc[i]) { 1008 if (!sbi->s_group_desc[i]) {
995 for (j = 0; j < i; j++) 1009 for (j = 0; j < i; j++)
996 brelse (sbi->s_group_desc[j]); 1010 brelse (sbi->s_group_desc[j]);
997 printk ("EXT2-fs: unable to read group descriptors\n"); 1011 ext2_msg(sb, KERN_ERR,
1012 "error: unable to read group descriptors");
998 goto failed_mount_group_desc; 1013 goto failed_mount_group_desc;
999 } 1014 }
1000 } 1015 }
1001 if (!ext2_check_descriptors (sb)) { 1016 if (!ext2_check_descriptors (sb)) {
1002 printk ("EXT2-fs: group descriptors corrupted!\n"); 1017 ext2_msg(sb, KERN_ERR, "group descriptors corrupted");
1003 goto failed_mount2; 1018 goto failed_mount2;
1004 } 1019 }
1005 sbi->s_gdb_count = db_count; 1020 sbi->s_gdb_count = db_count;
@@ -1032,7 +1047,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1032 ext2_count_dirs(sb)); 1047 ext2_count_dirs(sb));
1033 } 1048 }
1034 if (err) { 1049 if (err) {
1035 printk(KERN_ERR "EXT2-fs: insufficient memory\n"); 1050 ext2_msg(sb, KERN_ERR, "error: insufficient memory");
1036 goto failed_mount3; 1051 goto failed_mount3;
1037 } 1052 }
1038 /* 1053 /*
@@ -1048,27 +1063,28 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1048 } 1063 }
1049 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1064 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1050 iput(root); 1065 iput(root);
1051 printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); 1066 ext2_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
1052 goto failed_mount3; 1067 goto failed_mount3;
1053 } 1068 }
1054 1069
1055 sb->s_root = d_alloc_root(root); 1070 sb->s_root = d_alloc_root(root);
1056 if (!sb->s_root) { 1071 if (!sb->s_root) {
1057 iput(root); 1072 iput(root);
1058 printk(KERN_ERR "EXT2-fs: get root inode failed\n"); 1073 ext2_msg(sb, KERN_ERR, "error: get root inode failed");
1059 ret = -ENOMEM; 1074 ret = -ENOMEM;
1060 goto failed_mount3; 1075 goto failed_mount3;
1061 } 1076 }
1062 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) 1077 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
1063 ext2_warning(sb, __func__, 1078 ext2_msg(sb, KERN_WARNING,
1064 "mounting ext3 filesystem as ext2"); 1079 "warning: mounting ext3 filesystem as ext2");
1065 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1080 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
1066 return 0; 1081 return 0;
1067 1082
1068cantfind_ext2: 1083cantfind_ext2:
1069 if (!silent) 1084 if (!silent)
1070 printk("VFS: Can't find an ext2 filesystem on dev %s.\n", 1085 ext2_msg(sb, KERN_ERR,
1071 sb->s_id); 1086 "error: can't find an ext2 filesystem on dev %s.",
1087 sb->s_id);
1072 goto failed_mount; 1088 goto failed_mount;
1073failed_mount3: 1089failed_mount3:
1074 percpu_counter_destroy(&sbi->s_freeblocks_counter); 1090 percpu_counter_destroy(&sbi->s_freeblocks_counter);
@@ -1089,9 +1105,30 @@ failed_sbi:
1089 return ret; 1105 return ret;
1090} 1106}
1091 1107
1108static void ext2_clear_super_error(struct super_block *sb)
1109{
1110 struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
1111
1112 if (buffer_write_io_error(sbh)) {
1113 /*
1114 * Oh, dear. A previous attempt to write the
1115 * superblock failed. This could happen because the
1116 * USB device was yanked out. Or it could happen to
1117 * be a transient write error and maybe the block will
1118 * be remapped. Nothing we can do but to retry the
1119 * write and hope for the best.
1120 */
1121 printk(KERN_ERR "EXT2-fs: %s previous I/O error to "
1122 "superblock detected", sb->s_id);
1123 clear_buffer_write_io_error(sbh);
1124 set_buffer_uptodate(sbh);
1125 }
1126}
1127
1092static void ext2_commit_super (struct super_block * sb, 1128static void ext2_commit_super (struct super_block * sb,
1093 struct ext2_super_block * es) 1129 struct ext2_super_block * es)
1094{ 1130{
1131 ext2_clear_super_error(sb);
1095 es->s_wtime = cpu_to_le32(get_seconds()); 1132 es->s_wtime = cpu_to_le32(get_seconds());
1096 mark_buffer_dirty(EXT2_SB(sb)->s_sbh); 1133 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
1097 sb->s_dirt = 0; 1134 sb->s_dirt = 0;
@@ -1099,6 +1136,7 @@ static void ext2_commit_super (struct super_block * sb,
1099 1136
1100static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) 1137static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
1101{ 1138{
1139 ext2_clear_super_error(sb);
1102 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); 1140 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
1103 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); 1141 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
1104 es->s_wtime = cpu_to_le32(get_seconds()); 1142 es->s_wtime = cpu_to_le32(get_seconds());
@@ -1121,8 +1159,24 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
1121static int ext2_sync_fs(struct super_block *sb, int wait) 1159static int ext2_sync_fs(struct super_block *sb, int wait)
1122{ 1160{
1123 struct ext2_super_block *es = EXT2_SB(sb)->s_es; 1161 struct ext2_super_block *es = EXT2_SB(sb)->s_es;
1162 struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
1124 1163
1125 lock_kernel(); 1164 lock_kernel();
1165 if (buffer_write_io_error(sbh)) {
1166 /*
1167 * Oh, dear. A previous attempt to write the
1168 * superblock failed. This could happen because the
1169 * USB device was yanked out. Or it could happen to
1170 * be a transient write error and maybe the block will
1171 * be remapped. Nothing we can do but to retry the
1172 * write and hope for the best.
1173 */
1174 ext2_msg(sb, KERN_ERR,
1175 "previous I/O error to superblock detected\n");
1176 clear_buffer_write_io_error(sbh);
1177 set_buffer_uptodate(sbh);
1178 }
1179
1126 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { 1180 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
1127 ext2_debug("setting valid to 0\n"); 1181 ext2_debug("setting valid to 0\n");
1128 es->s_state &= cpu_to_le16(~EXT2_VALID_FS); 1182 es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
@@ -1170,7 +1224,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1170 /* 1224 /*
1171 * Allow the "check" option to be passed as a remount option. 1225 * Allow the "check" option to be passed as a remount option.
1172 */ 1226 */
1173 if (!parse_options (data, sbi)) { 1227 if (!parse_options(data, sb)) {
1174 err = -EINVAL; 1228 err = -EINVAL;
1175 goto restore_opts; 1229 goto restore_opts;
1176 } 1230 }
@@ -1182,7 +1236,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1182 EXT2_MOUNT_XIP if not */ 1236 EXT2_MOUNT_XIP if not */
1183 1237
1184 if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) { 1238 if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) {
1185 printk("XIP: Unsupported blocksize\n"); 1239 ext2_msg(sb, KERN_WARNING,
1240 "warning: unsupported blocksize for xip");
1186 err = -EINVAL; 1241 err = -EINVAL;
1187 goto restore_opts; 1242 goto restore_opts;
1188 } 1243 }
@@ -1191,8 +1246,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1191 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != 1246 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
1192 (old_mount_opt & EXT2_MOUNT_XIP)) && 1247 (old_mount_opt & EXT2_MOUNT_XIP)) &&
1193 invalidate_inodes(sb)) { 1248 invalidate_inodes(sb)) {
1194 ext2_warning(sb, __func__, "refusing change of xip flag " 1249 ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
1195 "with busy inodes while remounting"); 1250 "xip flag with busy inodes while remounting");
1196 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; 1251 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
1197 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; 1252 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
1198 } 1253 }
@@ -1216,9 +1271,10 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1216 __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, 1271 __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
1217 ~EXT2_FEATURE_RO_COMPAT_SUPP); 1272 ~EXT2_FEATURE_RO_COMPAT_SUPP);
1218 if (ret) { 1273 if (ret) {
1219 printk("EXT2-fs: %s: couldn't remount RDWR because of " 1274 ext2_msg(sb, KERN_WARNING,
1220 "unsupported optional features (%x).\n", 1275 "warning: couldn't remount RDWR because of "
1221 sb->s_id, le32_to_cpu(ret)); 1276 "unsupported optional features (%x).",
1277 le32_to_cpu(ret));
1222 err = -EROFS; 1278 err = -EROFS;
1223 goto restore_opts; 1279 goto restore_opts;
1224 } 1280 }
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7913531ec6d..904f00642f8 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -60,6 +60,7 @@
60#include <linux/mbcache.h> 60#include <linux/mbcache.h>
61#include <linux/quotaops.h> 61#include <linux/quotaops.h>
62#include <linux/rwsem.h> 62#include <linux/rwsem.h>
63#include <linux/security.h>
63#include "ext2.h" 64#include "ext2.h"
64#include "xattr.h" 65#include "xattr.h"
65#include "acl.h" 66#include "acl.h"
@@ -249,8 +250,9 @@ cleanup:
249 * used / required on success. 250 * used / required on success.
250 */ 251 */
251static int 252static int
252ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 253ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
253{ 254{
255 struct inode *inode = dentry->d_inode;
254 struct buffer_head *bh = NULL; 256 struct buffer_head *bh = NULL;
255 struct ext2_xattr_entry *entry; 257 struct ext2_xattr_entry *entry;
256 char *end; 258 char *end;
@@ -300,9 +302,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
300 ext2_xattr_handler(entry->e_name_index); 302 ext2_xattr_handler(entry->e_name_index);
301 303
302 if (handler) { 304 if (handler) {
303 size_t size = handler->list(inode, buffer, rest, 305 size_t size = handler->list(dentry, buffer, rest,
304 entry->e_name, 306 entry->e_name,
305 entry->e_name_len); 307 entry->e_name_len,
308 handler->flags);
306 if (buffer) { 309 if (buffer) {
307 if (size > rest) { 310 if (size > rest) {
308 error = -ERANGE; 311 error = -ERANGE;
@@ -330,7 +333,7 @@ cleanup:
330ssize_t 333ssize_t
331ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) 334ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
332{ 335{
333 return ext2_xattr_list(dentry->d_inode, buffer, size); 336 return ext2_xattr_list(dentry, buffer, size);
334} 337}
335 338
336/* 339/*
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 70c0dbdcdcb..c8155845ac0 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -11,8 +11,8 @@
11#include "xattr.h" 11#include "xattr.h"
12 12
13static size_t 13static size_t
14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, 14ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
15 const char *name, size_t name_len) 15 const char *name, size_t name_len, int type)
16{ 16{
17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN; 17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
18 const size_t total_len = prefix_len + name_len + 1; 18 const size_t total_len = prefix_len + name_len + 1;
@@ -26,22 +26,22 @@ ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
26} 26}
27 27
28static int 28static int
29ext2_xattr_security_get(struct inode *inode, const char *name, 29ext2_xattr_security_get(struct dentry *dentry, const char *name,
30 void *buffer, size_t size) 30 void *buffer, size_t size, int type)
31{ 31{
32 if (strcmp(name, "") == 0) 32 if (strcmp(name, "") == 0)
33 return -EINVAL; 33 return -EINVAL;
34 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, 34 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
35 buffer, size); 35 buffer, size);
36} 36}
37 37
38static int 38static int
39ext2_xattr_security_set(struct inode *inode, const char *name, 39ext2_xattr_security_set(struct dentry *dentry, const char *name,
40 const void *value, size_t size, int flags) 40 const void *value, size_t size, int flags, int type)
41{ 41{
42 if (strcmp(name, "") == 0) 42 if (strcmp(name, "") == 0)
43 return -EINVAL; 43 return -EINVAL;
44 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name, 44 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
45 value, size, flags); 45 value, size, flags);
46} 46}
47 47
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index e8219f8eae9..2a26d71f477 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -13,8 +13,8 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 16ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; 19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
@@ -31,22 +31,22 @@ ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext2_xattr_trusted_get(struct inode *inode, const char *name, 34ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size) 35 void *buffer, size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, 39 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
40 buffer, size); 40 buffer, size);
41} 41}
42 42
43static int 43static int
44ext2_xattr_trusted_set(struct inode *inode, const char *name, 44ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
45 const void *value, size_t size, int flags) 45 const void *value, size_t size, int flags, int type)
46{ 46{
47 if (strcmp(name, "") == 0) 47 if (strcmp(name, "") == 0)
48 return -EINVAL; 48 return -EINVAL;
49 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, 49 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
50 value, size, flags); 50 value, size, flags);
51} 51}
52 52
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 92495d28c62..3f6caf3684b 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -12,13 +12,13 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, 15ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 18 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
20 20
21 if (!test_opt(inode->i_sb, XATTR_USER)) 21 if (!test_opt(dentry->d_sb, XATTR_USER))
22 return 0; 22 return 0;
23 23
24 if (list && total_len <= list_size) { 24 if (list && total_len <= list_size) {
@@ -30,27 +30,28 @@ ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
30} 30}
31 31
32static int 32static int
33ext2_xattr_user_get(struct inode *inode, const char *name, 33ext2_xattr_user_get(struct dentry *dentry, const char *name,
34 void *buffer, size_t size) 34 void *buffer, size_t size, int type)
35{ 35{
36 if (strcmp(name, "") == 0) 36 if (strcmp(name, "") == 0)
37 return -EINVAL; 37 return -EINVAL;
38 if (!test_opt(inode->i_sb, XATTR_USER)) 38 if (!test_opt(dentry->d_sb, XATTR_USER))
39 return -EOPNOTSUPP; 39 return -EOPNOTSUPP;
40 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); 40 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
41 name, buffer, size);
41} 42}
42 43
43static int 44static int
44ext2_xattr_user_set(struct inode *inode, const char *name, 45ext2_xattr_user_set(struct dentry *dentry, const char *name,
45 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
46{ 47{
47 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
48 return -EINVAL; 49 return -EINVAL;
49 if (!test_opt(inode->i_sb, XATTR_USER)) 50 if (!test_opt(dentry->d_sb, XATTR_USER))
50 return -EOPNOTSUPP; 51 return -EOPNOTSUPP;
51 52
52 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, 53 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext2_xattr_user_handler = { 57struct xattr_handler ext2_xattr_user_handler = {
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index c18fbf3e406..322a56b2dfb 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -69,8 +69,9 @@ void ext2_xip_verify_sb(struct super_block *sb)
69 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) && 69 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
70 !sb->s_bdev->bd_disk->fops->direct_access) { 70 !sb->s_bdev->bd_disk->fops->direct_access) {
71 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP); 71 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
72 ext2_warning(sb, __func__, 72 ext2_msg(sb, KERN_WARNING,
73 "ignoring xip option - not supported by bdev"); 73 "warning: ignoring xip option - "
74 "not supported by bdev");
74 } 75 }
75} 76}
76 77
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c9b0df376b5..82ba3415866 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -366,12 +366,12 @@ out:
366 * Extended attribute handlers 366 * Extended attribute handlers
367 */ 367 */
368static size_t 368static size_t
369ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 369ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
370 const char *name, size_t name_len) 370 const char *name, size_t name_len, int type)
371{ 371{
372 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 372 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
373 373
374 if (!test_opt(inode->i_sb, POSIX_ACL)) 374 if (!test_opt(dentry->d_sb, POSIX_ACL))
375 return 0; 375 return 0;
376 if (list && size <= list_len) 376 if (list && size <= list_len)
377 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 377 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -379,12 +379,12 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
379} 379}
380 380
381static size_t 381static size_t
382ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 382ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
383 const char *name, size_t name_len) 383 const char *name, size_t name_len, int type)
384{ 384{
385 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 385 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
386 386
387 if (!test_opt(inode->i_sb, POSIX_ACL)) 387 if (!test_opt(dentry->d_sb, POSIX_ACL))
388 return 0; 388 return 0;
389 if (list && size <= list_len) 389 if (list && size <= list_len)
390 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 390 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -392,15 +392,18 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
392} 392}
393 393
394static int 394static int
395ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 395ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
396 size_t size, int type)
396{ 397{
397 struct posix_acl *acl; 398 struct posix_acl *acl;
398 int error; 399 int error;
399 400
400 if (!test_opt(inode->i_sb, POSIX_ACL)) 401 if (strcmp(name, "") != 0)
402 return -EINVAL;
403 if (!test_opt(dentry->d_sb, POSIX_ACL))
401 return -EOPNOTSUPP; 404 return -EOPNOTSUPP;
402 405
403 acl = ext3_get_acl(inode, type); 406 acl = ext3_get_acl(dentry->d_inode, type);
404 if (IS_ERR(acl)) 407 if (IS_ERR(acl))
405 return PTR_ERR(acl); 408 return PTR_ERR(acl);
406 if (acl == NULL) 409 if (acl == NULL)
@@ -412,31 +415,16 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
412} 415}
413 416
414static int 417static int
415ext3_xattr_get_acl_access(struct inode *inode, const char *name, 418ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
416 void *buffer, size_t size) 419 size_t size, int flags, int type)
417{
418 if (strcmp(name, "") != 0)
419 return -EINVAL;
420 return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
421}
422
423static int
424ext3_xattr_get_acl_default(struct inode *inode, const char *name,
425 void *buffer, size_t size)
426{
427 if (strcmp(name, "") != 0)
428 return -EINVAL;
429 return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
430}
431
432static int
433ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
434 size_t size)
435{ 420{
421 struct inode *inode = dentry->d_inode;
436 handle_t *handle; 422 handle_t *handle;
437 struct posix_acl *acl; 423 struct posix_acl *acl;
438 int error, retries = 0; 424 int error, retries = 0;
439 425
426 if (strcmp(name, "") != 0)
427 return -EINVAL;
440 if (!test_opt(inode->i_sb, POSIX_ACL)) 428 if (!test_opt(inode->i_sb, POSIX_ACL))
441 return -EOPNOTSUPP; 429 return -EOPNOTSUPP;
442 if (!is_owner_or_cap(inode)) 430 if (!is_owner_or_cap(inode))
@@ -468,34 +456,18 @@ release_and_out:
468 return error; 456 return error;
469} 457}
470 458
471static int
472ext3_xattr_set_acl_access(struct inode *inode, const char *name,
473 const void *value, size_t size, int flags)
474{
475 if (strcmp(name, "") != 0)
476 return -EINVAL;
477 return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
478}
479
480static int
481ext3_xattr_set_acl_default(struct inode *inode, const char *name,
482 const void *value, size_t size, int flags)
483{
484 if (strcmp(name, "") != 0)
485 return -EINVAL;
486 return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
487}
488
489struct xattr_handler ext3_xattr_acl_access_handler = { 459struct xattr_handler ext3_xattr_acl_access_handler = {
490 .prefix = POSIX_ACL_XATTR_ACCESS, 460 .prefix = POSIX_ACL_XATTR_ACCESS,
461 .flags = ACL_TYPE_ACCESS,
491 .list = ext3_xattr_list_acl_access, 462 .list = ext3_xattr_list_acl_access,
492 .get = ext3_xattr_get_acl_access, 463 .get = ext3_xattr_get_acl,
493 .set = ext3_xattr_set_acl_access, 464 .set = ext3_xattr_set_acl,
494}; 465};
495 466
496struct xattr_handler ext3_xattr_acl_default_handler = { 467struct xattr_handler ext3_xattr_acl_default_handler = {
497 .prefix = POSIX_ACL_XATTR_DEFAULT, 468 .prefix = POSIX_ACL_XATTR_DEFAULT,
469 .flags = ACL_TYPE_DEFAULT,
498 .list = ext3_xattr_list_acl_default, 470 .list = ext3_xattr_list_acl_default,
499 .get = ext3_xattr_get_acl_default, 471 .get = ext3_xattr_get_acl,
500 .set = ext3_xattr_set_acl_default, 472 .set = ext3_xattr_set_acl,
501}; 473};
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 354ed3b47b3..455e6e6e5cb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -970,7 +970,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
970 if (max_blocks > DIO_MAX_BLOCKS) 970 if (max_blocks > DIO_MAX_BLOCKS)
971 max_blocks = DIO_MAX_BLOCKS; 971 max_blocks = DIO_MAX_BLOCKS;
972 handle = ext3_journal_start(inode, DIO_CREDITS + 972 handle = ext3_journal_start(inode, DIO_CREDITS +
973 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb)); 973 EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
974 if (IS_ERR(handle)) { 974 if (IS_ERR(handle)) {
975 ret = PTR_ERR(handle); 975 ret = PTR_ERR(handle);
976 goto out; 976 goto out;
@@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle,
1151 return ext3_journal_get_write_access(handle, bh); 1151 return ext3_journal_get_write_access(handle, bh);
1152} 1152}
1153 1153
1154/*
1155 * Truncate blocks that were not used by write. We have to truncate the
1156 * pagecache as well so that corresponding buffers get properly unmapped.
1157 */
1158static void ext3_truncate_failed_write(struct inode *inode)
1159{
1160 truncate_inode_pages(inode->i_mapping, inode->i_size);
1161 ext3_truncate(inode);
1162}
1163
1154static int ext3_write_begin(struct file *file, struct address_space *mapping, 1164static int ext3_write_begin(struct file *file, struct address_space *mapping,
1155 loff_t pos, unsigned len, unsigned flags, 1165 loff_t pos, unsigned len, unsigned flags,
1156 struct page **pagep, void **fsdata) 1166 struct page **pagep, void **fsdata)
@@ -1209,7 +1219,7 @@ write_begin_failed:
1209 unlock_page(page); 1219 unlock_page(page);
1210 page_cache_release(page); 1220 page_cache_release(page);
1211 if (pos + len > inode->i_size) 1221 if (pos + len > inode->i_size)
1212 ext3_truncate(inode); 1222 ext3_truncate_failed_write(inode);
1213 } 1223 }
1214 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1224 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1215 goto retry; 1225 goto retry;
@@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file,
1304 page_cache_release(page); 1314 page_cache_release(page);
1305 1315
1306 if (pos + len > inode->i_size) 1316 if (pos + len > inode->i_size)
1307 ext3_truncate(inode); 1317 ext3_truncate_failed_write(inode);
1308 return ret ? ret : copied; 1318 return ret ? ret : copied;
1309} 1319}
1310 1320
@@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file,
1330 page_cache_release(page); 1340 page_cache_release(page);
1331 1341
1332 if (pos + len > inode->i_size) 1342 if (pos + len > inode->i_size)
1333 ext3_truncate(inode); 1343 ext3_truncate_failed_write(inode);
1334 return ret ? ret : copied; 1344 return ret ? ret : copied;
1335} 1345}
1336 1346
@@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file,
1383 page_cache_release(page); 1393 page_cache_release(page);
1384 1394
1385 if (pos + len > inode->i_size) 1395 if (pos + len > inode->i_size)
1386 ext3_truncate(inode); 1396 ext3_truncate_failed_write(inode);
1387 return ret ? ret : copied; 1397 return ret ? ret : copied;
1388} 1398}
1389 1399
@@ -2033,7 +2043,7 @@ static Indirect *ext3_find_shared(struct inode *inode, int depth,
2033 int k, err; 2043 int k, err;
2034 2044
2035 *top = 0; 2045 *top = 0;
2036 /* Make k index the deepest non-null offest + 1 */ 2046 /* Make k index the deepest non-null offset + 1 */
2037 for (k = depth; k > 1 && !offsets[k-1]; k--) 2047 for (k = depth; k > 1 && !offsets[k-1]; k--)
2038 ; 2048 ;
2039 partial = ext3_get_branch(inode, k, offsets, chain, &err); 2049 partial = ext3_get_branch(inode, k, offsets, chain, &err);
@@ -3136,8 +3146,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3136 3146
3137 /* (user+group)*(old+new) structure, inode write (sb, 3147 /* (user+group)*(old+new) structure, inode write (sb,
3138 * inode block, ? - but truncate inode update has it) */ 3148 * inode block, ? - but truncate inode update has it) */
3139 handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+ 3149 handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
3140 EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3); 3150 EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
3141 if (IS_ERR(handle)) { 3151 if (IS_ERR(handle)) {
3142 error = PTR_ERR(handle); 3152 error = PTR_ERR(handle);
3143 goto err_out; 3153 goto err_out;
@@ -3229,7 +3239,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
3229#ifdef CONFIG_QUOTA 3239#ifdef CONFIG_QUOTA
3230 /* We know that structure was already allocated during vfs_dq_init so 3240 /* We know that structure was already allocated during vfs_dq_init so
3231 * we will be updating only the data blocks + inodes */ 3241 * we will be updating only the data blocks + inodes */
3232 ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); 3242 ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
3233#endif 3243#endif
3234 3244
3235 return ret; 3245 return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index aad6400c9b7..7b0e44f7d66 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1699,7 +1699,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1699retry: 1699retry:
1700 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1700 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1701 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1701 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1702 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1702 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1703 if (IS_ERR(handle)) 1703 if (IS_ERR(handle))
1704 return PTR_ERR(handle); 1704 return PTR_ERR(handle);
1705 1705
@@ -1733,7 +1733,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1733retry: 1733retry:
1734 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1734 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1735 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1735 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1736 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1736 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1737 if (IS_ERR(handle)) 1737 if (IS_ERR(handle))
1738 return PTR_ERR(handle); 1738 return PTR_ERR(handle);
1739 1739
@@ -1769,7 +1769,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1769retry: 1769retry:
1770 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1770 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1771 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1771 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1772 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1772 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1773 if (IS_ERR(handle)) 1773 if (IS_ERR(handle))
1774 return PTR_ERR(handle); 1774 return PTR_ERR(handle);
1775 1775
@@ -1920,7 +1920,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1920 struct ext3_iloc iloc; 1920 struct ext3_iloc iloc;
1921 int err = 0, rc; 1921 int err = 0, rc;
1922 1922
1923 lock_super(sb); 1923 mutex_lock(&EXT3_SB(sb)->s_orphan_lock);
1924 if (!list_empty(&EXT3_I(inode)->i_orphan)) 1924 if (!list_empty(&EXT3_I(inode)->i_orphan))
1925 goto out_unlock; 1925 goto out_unlock;
1926 1926
@@ -1929,9 +1929,13 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1929 1929
1930 /* @@@ FIXME: Observation from aviro: 1930 /* @@@ FIXME: Observation from aviro:
1931 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block 1931 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
1932 * here (on lock_super()), so race with ext3_link() which might bump 1932 * here (on s_orphan_lock), so race with ext3_link() which might bump
1933 * ->i_nlink. For, say it, character device. Not a regular file, 1933 * ->i_nlink. For, say it, character device. Not a regular file,
1934 * not a directory, not a symlink and ->i_nlink > 0. 1934 * not a directory, not a symlink and ->i_nlink > 0.
1935 *
1936 * tytso, 4/25/2009: I'm not sure how that could happen;
1937 * shouldn't the fs core protect us from these sort of
1938 * unlink()/link() races?
1935 */ 1939 */
1936 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1940 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1937 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 1941 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -1968,7 +1972,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1968 jbd_debug(4, "orphan inode %lu will point to %d\n", 1972 jbd_debug(4, "orphan inode %lu will point to %d\n",
1969 inode->i_ino, NEXT_ORPHAN(inode)); 1973 inode->i_ino, NEXT_ORPHAN(inode));
1970out_unlock: 1974out_unlock:
1971 unlock_super(sb); 1975 mutex_unlock(&EXT3_SB(sb)->s_orphan_lock);
1972 ext3_std_error(inode->i_sb, err); 1976 ext3_std_error(inode->i_sb, err);
1973 return err; 1977 return err;
1974} 1978}
@@ -1986,11 +1990,9 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
1986 struct ext3_iloc iloc; 1990 struct ext3_iloc iloc;
1987 int err = 0; 1991 int err = 0;
1988 1992
1989 lock_super(inode->i_sb); 1993 mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
1990 if (list_empty(&ei->i_orphan)) { 1994 if (list_empty(&ei->i_orphan))
1991 unlock_super(inode->i_sb); 1995 goto out;
1992 return 0;
1993 }
1994 1996
1995 ino_next = NEXT_ORPHAN(inode); 1997 ino_next = NEXT_ORPHAN(inode);
1996 prev = ei->i_orphan.prev; 1998 prev = ei->i_orphan.prev;
@@ -2040,7 +2042,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
2040out_err: 2042out_err:
2041 ext3_std_error(inode->i_sb, err); 2043 ext3_std_error(inode->i_sb, err);
2042out: 2044out:
2043 unlock_super(inode->i_sb); 2045 mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
2044 return err; 2046 return err;
2045 2047
2046out_brelse: 2048out_brelse:
@@ -2175,7 +2177,7 @@ static int ext3_symlink (struct inode * dir,
2175retry: 2177retry:
2176 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2178 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2177 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2179 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2178 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 2180 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2179 if (IS_ERR(handle)) 2181 if (IS_ERR(handle))
2180 return PTR_ERR(handle); 2182 return PTR_ERR(handle);
2181 2183
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 8359e7b3dc8..54351ac7cef 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -209,7 +209,7 @@ static int setup_new_group_blocks(struct super_block *sb,
209 if (IS_ERR(handle)) 209 if (IS_ERR(handle))
210 return PTR_ERR(handle); 210 return PTR_ERR(handle);
211 211
212 lock_super(sb); 212 mutex_lock(&sbi->s_resize_lock);
213 if (input->group != sbi->s_groups_count) { 213 if (input->group != sbi->s_groups_count) {
214 err = -EBUSY; 214 err = -EBUSY;
215 goto exit_journal; 215 goto exit_journal;
@@ -266,7 +266,7 @@ static int setup_new_group_blocks(struct super_block *sb,
266 goto exit_bh; 266 goto exit_bh;
267 267
268 if (IS_ERR(gdb = bclean(handle, sb, block))) { 268 if (IS_ERR(gdb = bclean(handle, sb, block))) {
269 err = PTR_ERR(bh); 269 err = PTR_ERR(gdb);
270 goto exit_bh; 270 goto exit_bh;
271 } 271 }
272 ext3_journal_dirty_metadata(handle, gdb); 272 ext3_journal_dirty_metadata(handle, gdb);
@@ -324,7 +324,7 @@ exit_bh:
324 brelse(bh); 324 brelse(bh);
325 325
326exit_journal: 326exit_journal:
327 unlock_super(sb); 327 mutex_unlock(&sbi->s_resize_lock);
328 if ((err2 = ext3_journal_stop(handle)) && !err) 328 if ((err2 = ext3_journal_stop(handle)) && !err)
329 err = err2; 329 err = err2;
330 330
@@ -662,11 +662,12 @@ exit_free:
662 * important part is that the new block and inode counts are in the backup 662 * important part is that the new block and inode counts are in the backup
663 * superblocks, and the location of the new group metadata in the GDT backups. 663 * superblocks, and the location of the new group metadata in the GDT backups.
664 * 664 *
665 * We do not need lock_super() for this, because these blocks are not 665 * We do not need take the s_resize_lock for this, because these
666 * otherwise touched by the filesystem code when it is mounted. We don't 666 * blocks are not otherwise touched by the filesystem code when it is
667 * need to worry about last changing from sbi->s_groups_count, because the 667 * mounted. We don't need to worry about last changing from
668 * worst that can happen is that we do not copy the full number of backups 668 * sbi->s_groups_count, because the worst that can happen is that we
669 * at this time. The resize which changed s_groups_count will backup again. 669 * do not copy the full number of backups at this time. The resize
670 * which changed s_groups_count will backup again.
670 */ 671 */
671static void update_backups(struct super_block *sb, 672static void update_backups(struct super_block *sb,
672 int blk_off, char *data, int size) 673 int blk_off, char *data, int size)
@@ -825,7 +826,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
825 goto exit_put; 826 goto exit_put;
826 } 827 }
827 828
828 lock_super(sb); 829 mutex_lock(&sbi->s_resize_lock);
829 if (input->group != sbi->s_groups_count) { 830 if (input->group != sbi->s_groups_count) {
830 ext3_warning(sb, __func__, 831 ext3_warning(sb, __func__,
831 "multiple resizers run on filesystem!"); 832 "multiple resizers run on filesystem!");
@@ -856,7 +857,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
856 /* 857 /*
857 * OK, now we've set up the new group. Time to make it active. 858 * OK, now we've set up the new group. Time to make it active.
858 * 859 *
859 * Current kernels don't lock all allocations via lock_super(), 860 * We do not lock all allocations via s_resize_lock
860 * so we have to be safe wrt. concurrent accesses the group 861 * so we have to be safe wrt. concurrent accesses the group
861 * data. So we need to be careful to set all of the relevant 862 * data. So we need to be careful to set all of the relevant
862 * group descriptor data etc. *before* we enable the group. 863 * group descriptor data etc. *before* we enable the group.
@@ -900,12 +901,12 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
900 * 901 *
901 * The precise rules we use are: 902 * The precise rules we use are:
902 * 903 *
903 * * Writers of s_groups_count *must* hold lock_super 904 * * Writers of s_groups_count *must* hold s_resize_lock
904 * AND 905 * AND
905 * * Writers must perform a smp_wmb() after updating all dependent 906 * * Writers must perform a smp_wmb() after updating all dependent
906 * data and before modifying the groups count 907 * data and before modifying the groups count
907 * 908 *
908 * * Readers must hold lock_super() over the access 909 * * Readers must hold s_resize_lock over the access
909 * OR 910 * OR
910 * * Readers must perform an smp_rmb() after reading the groups count 911 * * Readers must perform an smp_rmb() after reading the groups count
911 * and before reading any dependent data. 912 * and before reading any dependent data.
@@ -936,7 +937,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
936 ext3_journal_dirty_metadata(handle, sbi->s_sbh); 937 ext3_journal_dirty_metadata(handle, sbi->s_sbh);
937 938
938exit_journal: 939exit_journal:
939 unlock_super(sb); 940 mutex_unlock(&sbi->s_resize_lock);
940 if ((err2 = ext3_journal_stop(handle)) && !err) 941 if ((err2 = ext3_journal_stop(handle)) && !err)
941 err = err2; 942 err = err2;
942 if (!err) { 943 if (!err) {
@@ -973,7 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
973 974
974 /* We don't need to worry about locking wrt other resizers just 975 /* We don't need to worry about locking wrt other resizers just
975 * yet: we're going to revalidate es->s_blocks_count after 976 * yet: we're going to revalidate es->s_blocks_count after
976 * taking lock_super() below. */ 977 * taking the s_resize_lock below. */
977 o_blocks_count = le32_to_cpu(es->s_blocks_count); 978 o_blocks_count = le32_to_cpu(es->s_blocks_count);
978 o_groups_count = EXT3_SB(sb)->s_groups_count; 979 o_groups_count = EXT3_SB(sb)->s_groups_count;
979 980
@@ -1045,11 +1046,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1045 goto exit_put; 1046 goto exit_put;
1046 } 1047 }
1047 1048
1048 lock_super(sb); 1049 mutex_lock(&EXT3_SB(sb)->s_resize_lock);
1049 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 1050 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1050 ext3_warning(sb, __func__, 1051 ext3_warning(sb, __func__,
1051 "multiple resizers run on filesystem!"); 1052 "multiple resizers run on filesystem!");
1052 unlock_super(sb); 1053 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1053 ext3_journal_stop(handle); 1054 ext3_journal_stop(handle);
1054 err = -EBUSY; 1055 err = -EBUSY;
1055 goto exit_put; 1056 goto exit_put;
@@ -1059,13 +1060,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1059 EXT3_SB(sb)->s_sbh))) { 1060 EXT3_SB(sb)->s_sbh))) {
1060 ext3_warning(sb, __func__, 1061 ext3_warning(sb, __func__,
1061 "error %d on journal write access", err); 1062 "error %d on journal write access", err);
1062 unlock_super(sb); 1063 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1063 ext3_journal_stop(handle); 1064 ext3_journal_stop(handle);
1064 goto exit_put; 1065 goto exit_put;
1065 } 1066 }
1066 es->s_blocks_count = cpu_to_le32(o_blocks_count + add); 1067 es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1067 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1068 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1068 unlock_super(sb); 1069 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1069 ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, 1070 ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
1070 o_blocks_count + add); 1071 o_blocks_count + add);
1071 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1072 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 427496c4767..afa2b569da1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -135,12 +135,24 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
135 if (is_handle_aborted(handle)) 135 if (is_handle_aborted(handle))
136 return; 136 return;
137 137
138 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 138 printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
139 caller, errstr, err_fn); 139 caller, errstr, err_fn);
140 140
141 journal_abort_handle(handle); 141 journal_abort_handle(handle);
142} 142}
143 143
144void ext3_msg(struct super_block *sb, const char *prefix,
145 const char *fmt, ...)
146{
147 va_list args;
148
149 va_start(args, fmt);
150 printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
151 vprintk(fmt, args);
152 printk("\n");
153 va_end(args);
154}
155
144/* Deal with the reporting of failure conditions on a filesystem such as 156/* Deal with the reporting of failure conditions on a filesystem such as
145 * inconsistencies detected or read IO failures. 157 * inconsistencies detected or read IO failures.
146 * 158 *
@@ -174,12 +186,13 @@ static void ext3_handle_error(struct super_block *sb)
174 journal_abort(journal, -EIO); 186 journal_abort(journal, -EIO);
175 } 187 }
176 if (test_opt (sb, ERRORS_RO)) { 188 if (test_opt (sb, ERRORS_RO)) {
177 printk (KERN_CRIT "Remounting filesystem read-only\n"); 189 ext3_msg(sb, KERN_CRIT,
190 "error: remounting filesystem read-only");
178 sb->s_flags |= MS_RDONLY; 191 sb->s_flags |= MS_RDONLY;
179 } 192 }
180 ext3_commit_super(sb, es, 1); 193 ext3_commit_super(sb, es, 1);
181 if (test_opt(sb, ERRORS_PANIC)) 194 if (test_opt(sb, ERRORS_PANIC))
182 panic("EXT3-fs (device %s): panic forced after error\n", 195 panic("EXT3-fs (%s): panic forced after error\n",
183 sb->s_id); 196 sb->s_id);
184} 197}
185 198
@@ -247,8 +260,7 @@ void __ext3_std_error (struct super_block * sb, const char * function,
247 return; 260 return;
248 261
249 errstr = ext3_decode_error(sb, errno, nbuf); 262 errstr = ext3_decode_error(sb, errno, nbuf);
250 printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n", 263 ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
251 sb->s_id, function, errstr);
252 264
253 ext3_handle_error(sb); 265 ext3_handle_error(sb);
254} 266}
@@ -268,21 +280,20 @@ void ext3_abort (struct super_block * sb, const char * function,
268{ 280{
269 va_list args; 281 va_list args;
270 282
271 printk (KERN_CRIT "ext3_abort called.\n");
272
273 va_start(args, fmt); 283 va_start(args, fmt);
274 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); 284 printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
275 vprintk(fmt, args); 285 vprintk(fmt, args);
276 printk("\n"); 286 printk("\n");
277 va_end(args); 287 va_end(args);
278 288
279 if (test_opt(sb, ERRORS_PANIC)) 289 if (test_opt(sb, ERRORS_PANIC))
280 panic("EXT3-fs panic from previous error\n"); 290 panic("EXT3-fs: panic from previous error\n");
281 291
282 if (sb->s_flags & MS_RDONLY) 292 if (sb->s_flags & MS_RDONLY)
283 return; 293 return;
284 294
285 printk(KERN_CRIT "Remounting filesystem read-only\n"); 295 ext3_msg(sb, KERN_CRIT,
296 "error: remounting filesystem read-only");
286 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 297 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
287 sb->s_flags |= MS_RDONLY; 298 sb->s_flags |= MS_RDONLY;
288 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 299 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
@@ -296,7 +307,7 @@ void ext3_warning (struct super_block * sb, const char * function,
296 va_list args; 307 va_list args;
297 308
298 va_start(args, fmt); 309 va_start(args, fmt);
299 printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ", 310 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
300 sb->s_id, function); 311 sb->s_id, function);
301 vprintk(fmt, args); 312 vprintk(fmt, args);
302 printk("\n"); 313 printk("\n");
@@ -310,10 +321,10 @@ void ext3_update_dynamic_rev(struct super_block *sb)
310 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 321 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
311 return; 322 return;
312 323
313 ext3_warning(sb, __func__, 324 ext3_msg(sb, KERN_WARNING,
314 "updating to rev %d because of new feature flag, " 325 "warning: updating to rev %d because of "
315 "running e2fsck is recommended", 326 "new feature flag, running e2fsck is recommended",
316 EXT3_DYNAMIC_REV); 327 EXT3_DYNAMIC_REV);
317 328
318 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); 329 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
319 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); 330 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
@@ -331,7 +342,7 @@ void ext3_update_dynamic_rev(struct super_block *sb)
331/* 342/*
332 * Open the external journal device 343 * Open the external journal device
333 */ 344 */
334static struct block_device *ext3_blkdev_get(dev_t dev) 345static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
335{ 346{
336 struct block_device *bdev; 347 struct block_device *bdev;
337 char b[BDEVNAME_SIZE]; 348 char b[BDEVNAME_SIZE];
@@ -342,8 +353,9 @@ static struct block_device *ext3_blkdev_get(dev_t dev)
342 return bdev; 353 return bdev;
343 354
344fail: 355fail:
345 printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n", 356 ext3_msg(sb, "error: failed to open journal device %s: %ld",
346 __bdevname(dev, b), PTR_ERR(bdev)); 357 __bdevname(dev, b), PTR_ERR(bdev));
358
347 return NULL; 359 return NULL;
348} 360}
349 361
@@ -378,13 +390,13 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
378{ 390{
379 struct list_head *l; 391 struct list_head *l;
380 392
381 printk(KERN_ERR "sb orphan head is %d\n", 393 ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
382 le32_to_cpu(sbi->s_es->s_last_orphan)); 394 le32_to_cpu(sbi->s_es->s_last_orphan));
383 395
384 printk(KERN_ERR "sb_info orphan list:\n"); 396 ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
385 list_for_each(l, &sbi->s_orphan) { 397 list_for_each(l, &sbi->s_orphan) {
386 struct inode *inode = orphan_list_entry(l); 398 struct inode *inode = orphan_list_entry(l);
387 printk(KERN_ERR " " 399 ext3_msg(sb, KERN_ERR, " "
388 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 400 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
389 inode->i_sb->s_id, inode->i_ino, inode, 401 inode->i_sb->s_id, inode->i_ino, inode,
390 inode->i_mode, inode->i_nlink, 402 inode->i_mode, inode->i_nlink,
@@ -527,9 +539,22 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl
527#if defined(CONFIG_QUOTA) 539#if defined(CONFIG_QUOTA)
528 struct ext3_sb_info *sbi = EXT3_SB(sb); 540 struct ext3_sb_info *sbi = EXT3_SB(sb);
529 541
530 if (sbi->s_jquota_fmt) 542 if (sbi->s_jquota_fmt) {
531 seq_printf(seq, ",jqfmt=%s", 543 char *fmtname = "";
532 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); 544
545 switch (sbi->s_jquota_fmt) {
546 case QFMT_VFS_OLD:
547 fmtname = "vfsold";
548 break;
549 case QFMT_VFS_V0:
550 fmtname = "vfsv0";
551 break;
552 case QFMT_VFS_V1:
553 fmtname = "vfsv1";
554 break;
555 }
556 seq_printf(seq, ",jqfmt=%s", fmtname);
557 }
533 558
534 if (sbi->s_qf_names[USRQUOTA]) 559 if (sbi->s_qf_names[USRQUOTA])
535 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 560 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -636,6 +661,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
636 if (test_opt(sb, DATA_ERR_ABORT)) 661 if (test_opt(sb, DATA_ERR_ABORT))
637 seq_puts(seq, ",data_err=abort"); 662 seq_puts(seq, ",data_err=abort");
638 663
664 if (test_opt(sb, NOLOAD))
665 seq_puts(seq, ",norecovery");
666
639 ext3_show_quota_options(seq, sb); 667 ext3_show_quota_options(seq, sb);
640 668
641 return 0; 669 return 0;
@@ -787,9 +815,9 @@ enum {
787 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 815 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
788 Opt_data_err_abort, Opt_data_err_ignore, 816 Opt_data_err_abort, Opt_data_err_ignore,
789 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 817 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
790 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 818 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
791 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 819 Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
792 Opt_grpquota 820 Opt_usrquota, Opt_grpquota
793}; 821};
794 822
795static const match_table_t tokens = { 823static const match_table_t tokens = {
@@ -818,6 +846,7 @@ static const match_table_t tokens = {
818 {Opt_reservation, "reservation"}, 846 {Opt_reservation, "reservation"},
819 {Opt_noreservation, "noreservation"}, 847 {Opt_noreservation, "noreservation"},
820 {Opt_noload, "noload"}, 848 {Opt_noload, "noload"},
849 {Opt_noload, "norecovery"},
821 {Opt_nobh, "nobh"}, 850 {Opt_nobh, "nobh"},
822 {Opt_bh, "bh"}, 851 {Opt_bh, "bh"},
823 {Opt_commit, "commit=%u"}, 852 {Opt_commit, "commit=%u"},
@@ -836,6 +865,7 @@ static const match_table_t tokens = {
836 {Opt_grpjquota, "grpjquota=%s"}, 865 {Opt_grpjquota, "grpjquota=%s"},
837 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 866 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
838 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 867 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
868 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
839 {Opt_grpquota, "grpquota"}, 869 {Opt_grpquota, "grpquota"},
840 {Opt_noquota, "noquota"}, 870 {Opt_noquota, "noquota"},
841 {Opt_quota, "quota"}, 871 {Opt_quota, "quota"},
@@ -845,7 +875,7 @@ static const match_table_t tokens = {
845 {Opt_err, NULL}, 875 {Opt_err, NULL},
846}; 876};
847 877
848static ext3_fsblk_t get_sb_block(void **data) 878static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
849{ 879{
850 ext3_fsblk_t sb_block; 880 ext3_fsblk_t sb_block;
851 char *options = (char *) *data; 881 char *options = (char *) *data;
@@ -856,7 +886,7 @@ static ext3_fsblk_t get_sb_block(void **data)
856 /*todo: use simple_strtoll with >32bit ext3 */ 886 /*todo: use simple_strtoll with >32bit ext3 */
857 sb_block = simple_strtoul(options, &options, 0); 887 sb_block = simple_strtoul(options, &options, 0);
858 if (*options && *options != ',') { 888 if (*options && *options != ',') {
859 printk("EXT3-fs: Invalid sb specification: %s\n", 889 ext3_msg(sb, "error: invalid sb specification: %s",
860 (char *) *data); 890 (char *) *data);
861 return 1; 891 return 1;
862 } 892 }
@@ -956,7 +986,8 @@ static int parse_options (char *options, struct super_block *sb,
956#else 986#else
957 case Opt_user_xattr: 987 case Opt_user_xattr:
958 case Opt_nouser_xattr: 988 case Opt_nouser_xattr:
959 printk("EXT3 (no)user_xattr options not supported\n"); 989 ext3_msg(sb, KERN_INFO,
990 "(no)user_xattr options not supported");
960 break; 991 break;
961#endif 992#endif
962#ifdef CONFIG_EXT3_FS_POSIX_ACL 993#ifdef CONFIG_EXT3_FS_POSIX_ACL
@@ -969,7 +1000,8 @@ static int parse_options (char *options, struct super_block *sb,
969#else 1000#else
970 case Opt_acl: 1001 case Opt_acl:
971 case Opt_noacl: 1002 case Opt_noacl:
972 printk("EXT3 (no)acl options not supported\n"); 1003 ext3_msg(sb, KERN_INFO,
1004 "(no)acl options not supported");
973 break; 1005 break;
974#endif 1006#endif
975 case Opt_reservation: 1007 case Opt_reservation:
@@ -985,16 +1017,16 @@ static int parse_options (char *options, struct super_block *sb,
985 user to specify an existing inode to be the 1017 user to specify an existing inode to be the
986 journal file. */ 1018 journal file. */
987 if (is_remount) { 1019 if (is_remount) {
988 printk(KERN_ERR "EXT3-fs: cannot specify " 1020 ext3_msg(sb, KERN_ERR, "error: cannot specify "
989 "journal on remount\n"); 1021 "journal on remount");
990 return 0; 1022 return 0;
991 } 1023 }
992 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1024 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
993 break; 1025 break;
994 case Opt_journal_inum: 1026 case Opt_journal_inum:
995 if (is_remount) { 1027 if (is_remount) {
996 printk(KERN_ERR "EXT3-fs: cannot specify " 1028 ext3_msg(sb, KERN_ERR, "error: cannot specify "
997 "journal on remount\n"); 1029 "journal on remount");
998 return 0; 1030 return 0;
999 } 1031 }
1000 if (match_int(&args[0], &option)) 1032 if (match_int(&args[0], &option))
@@ -1003,8 +1035,8 @@ static int parse_options (char *options, struct super_block *sb,
1003 break; 1035 break;
1004 case Opt_journal_dev: 1036 case Opt_journal_dev:
1005 if (is_remount) { 1037 if (is_remount) {
1006 printk(KERN_ERR "EXT3-fs: cannot specify " 1038 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1007 "journal on remount\n"); 1039 "journal on remount");
1008 return 0; 1040 return 0;
1009 } 1041 }
1010 if (match_int(&args[0], &option)) 1042 if (match_int(&args[0], &option))
@@ -1036,12 +1068,11 @@ static int parse_options (char *options, struct super_block *sb,
1036 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) 1068 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
1037 == data_opt) 1069 == data_opt)
1038 break; 1070 break;
1039 printk(KERN_ERR 1071 ext3_msg(sb, KERN_ERR,
1040 "EXT3-fs (device %s): Cannot change " 1072 "error: cannot change "
1041 "data mode on remount. The filesystem " 1073 "data mode on remount. The filesystem "
1042 "is mounted in data=%s mode and you " 1074 "is mounted in data=%s mode and you "
1043 "try to remount it in data=%s mode.\n", 1075 "try to remount it in data=%s mode.",
1044 sb->s_id,
1045 data_mode_string(sbi->s_mount_opt & 1076 data_mode_string(sbi->s_mount_opt &
1046 EXT3_MOUNT_DATA_FLAGS), 1077 EXT3_MOUNT_DATA_FLAGS),
1047 data_mode_string(data_opt)); 1078 data_mode_string(data_opt));
@@ -1066,31 +1097,31 @@ static int parse_options (char *options, struct super_block *sb,
1066set_qf_name: 1097set_qf_name:
1067 if (sb_any_quota_loaded(sb) && 1098 if (sb_any_quota_loaded(sb) &&
1068 !sbi->s_qf_names[qtype]) { 1099 !sbi->s_qf_names[qtype]) {
1069 printk(KERN_ERR 1100 ext3_msg(sb, KERN_ERR,
1070 "EXT3-fs: Cannot change journaled " 1101 "error: cannot change journaled "
1071 "quota options when quota turned on.\n"); 1102 "quota options when quota turned on.");
1072 return 0; 1103 return 0;
1073 } 1104 }
1074 qname = match_strdup(&args[0]); 1105 qname = match_strdup(&args[0]);
1075 if (!qname) { 1106 if (!qname) {
1076 printk(KERN_ERR 1107 ext3_msg(sb, KERN_ERR,
1077 "EXT3-fs: not enough memory for " 1108 "error: not enough memory for "
1078 "storing quotafile name.\n"); 1109 "storing quotafile name.");
1079 return 0; 1110 return 0;
1080 } 1111 }
1081 if (sbi->s_qf_names[qtype] && 1112 if (sbi->s_qf_names[qtype] &&
1082 strcmp(sbi->s_qf_names[qtype], qname)) { 1113 strcmp(sbi->s_qf_names[qtype], qname)) {
1083 printk(KERN_ERR 1114 ext3_msg(sb, KERN_ERR,
1084 "EXT3-fs: %s quota file already " 1115 "error: %s quota file already "
1085 "specified.\n", QTYPE2NAME(qtype)); 1116 "specified.", QTYPE2NAME(qtype));
1086 kfree(qname); 1117 kfree(qname);
1087 return 0; 1118 return 0;
1088 } 1119 }
1089 sbi->s_qf_names[qtype] = qname; 1120 sbi->s_qf_names[qtype] = qname;
1090 if (strchr(sbi->s_qf_names[qtype], '/')) { 1121 if (strchr(sbi->s_qf_names[qtype], '/')) {
1091 printk(KERN_ERR 1122 ext3_msg(sb, KERN_ERR,
1092 "EXT3-fs: quotafile must be on " 1123 "error: quotafile must be on "
1093 "filesystem root.\n"); 1124 "filesystem root.");
1094 kfree(sbi->s_qf_names[qtype]); 1125 kfree(sbi->s_qf_names[qtype]);
1095 sbi->s_qf_names[qtype] = NULL; 1126 sbi->s_qf_names[qtype] = NULL;
1096 return 0; 1127 return 0;
@@ -1105,9 +1136,9 @@ set_qf_name:
1105clear_qf_name: 1136clear_qf_name:
1106 if (sb_any_quota_loaded(sb) && 1137 if (sb_any_quota_loaded(sb) &&
1107 sbi->s_qf_names[qtype]) { 1138 sbi->s_qf_names[qtype]) {
1108 printk(KERN_ERR "EXT3-fs: Cannot change " 1139 ext3_msg(sb, KERN_ERR, "error: cannot change "
1109 "journaled quota options when " 1140 "journaled quota options when "
1110 "quota turned on.\n"); 1141 "quota turned on.");
1111 return 0; 1142 return 0;
1112 } 1143 }
1113 /* 1144 /*
@@ -1121,12 +1152,15 @@ clear_qf_name:
1121 goto set_qf_format; 1152 goto set_qf_format;
1122 case Opt_jqfmt_vfsv0: 1153 case Opt_jqfmt_vfsv0:
1123 qfmt = QFMT_VFS_V0; 1154 qfmt = QFMT_VFS_V0;
1155 goto set_qf_format;
1156 case Opt_jqfmt_vfsv1:
1157 qfmt = QFMT_VFS_V1;
1124set_qf_format: 1158set_qf_format:
1125 if (sb_any_quota_loaded(sb) && 1159 if (sb_any_quota_loaded(sb) &&
1126 sbi->s_jquota_fmt != qfmt) { 1160 sbi->s_jquota_fmt != qfmt) {
1127 printk(KERN_ERR "EXT3-fs: Cannot change " 1161 ext3_msg(sb, KERN_ERR, "error: cannot change "
1128 "journaled quota options when " 1162 "journaled quota options when "
1129 "quota turned on.\n"); 1163 "quota turned on.");
1130 return 0; 1164 return 0;
1131 } 1165 }
1132 sbi->s_jquota_fmt = qfmt; 1166 sbi->s_jquota_fmt = qfmt;
@@ -1142,8 +1176,8 @@ set_qf_format:
1142 break; 1176 break;
1143 case Opt_noquota: 1177 case Opt_noquota:
1144 if (sb_any_quota_loaded(sb)) { 1178 if (sb_any_quota_loaded(sb)) {
1145 printk(KERN_ERR "EXT3-fs: Cannot change quota " 1179 ext3_msg(sb, KERN_ERR, "error: cannot change "
1146 "options when quota turned on.\n"); 1180 "quota options when quota turned on.");
1147 return 0; 1181 return 0;
1148 } 1182 }
1149 clear_opt(sbi->s_mount_opt, QUOTA); 1183 clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1154,8 +1188,8 @@ set_qf_format:
1154 case Opt_quota: 1188 case Opt_quota:
1155 case Opt_usrquota: 1189 case Opt_usrquota:
1156 case Opt_grpquota: 1190 case Opt_grpquota:
1157 printk(KERN_ERR 1191 ext3_msg(sb, KERN_ERR,
1158 "EXT3-fs: quota options not supported.\n"); 1192 "error: quota options not supported.");
1159 break; 1193 break;
1160 case Opt_usrjquota: 1194 case Opt_usrjquota:
1161 case Opt_grpjquota: 1195 case Opt_grpjquota:
@@ -1163,9 +1197,10 @@ set_qf_format:
1163 case Opt_offgrpjquota: 1197 case Opt_offgrpjquota:
1164 case Opt_jqfmt_vfsold: 1198 case Opt_jqfmt_vfsold:
1165 case Opt_jqfmt_vfsv0: 1199 case Opt_jqfmt_vfsv0:
1166 printk(KERN_ERR 1200 case Opt_jqfmt_vfsv1:
1167 "EXT3-fs: journaled quota options not " 1201 ext3_msg(sb, KERN_ERR,
1168 "supported.\n"); 1202 "error: journaled quota options not "
1203 "supported.");
1169 break; 1204 break;
1170 case Opt_noquota: 1205 case Opt_noquota:
1171 break; 1206 break;
@@ -1185,8 +1220,9 @@ set_qf_format:
1185 break; 1220 break;
1186 case Opt_resize: 1221 case Opt_resize:
1187 if (!is_remount) { 1222 if (!is_remount) {
1188 printk("EXT3-fs: resize option only available " 1223 ext3_msg(sb, KERN_ERR,
1189 "for remount\n"); 1224 "error: resize option only available "
1225 "for remount");
1190 return 0; 1226 return 0;
1191 } 1227 }
1192 if (match_int(&args[0], &option) != 0) 1228 if (match_int(&args[0], &option) != 0)
@@ -1200,9 +1236,9 @@ set_qf_format:
1200 clear_opt(sbi->s_mount_opt, NOBH); 1236 clear_opt(sbi->s_mount_opt, NOBH);
1201 break; 1237 break;
1202 default: 1238 default:
1203 printk (KERN_ERR 1239 ext3_msg(sb, KERN_ERR,
1204 "EXT3-fs: Unrecognized mount option \"%s\" " 1240 "error: unrecognized mount option \"%s\" "
1205 "or missing value\n", p); 1241 "or missing value", p);
1206 return 0; 1242 return 0;
1207 } 1243 }
1208 } 1244 }
@@ -1220,21 +1256,21 @@ set_qf_format:
1220 (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) || 1256 (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
1221 (sbi->s_qf_names[GRPQUOTA] && 1257 (sbi->s_qf_names[GRPQUOTA] &&
1222 (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) { 1258 (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
1223 printk(KERN_ERR "EXT3-fs: old and new quota " 1259 ext3_msg(sb, KERN_ERR, "error: old and new quota "
1224 "format mixing.\n"); 1260 "format mixing.");
1225 return 0; 1261 return 0;
1226 } 1262 }
1227 1263
1228 if (!sbi->s_jquota_fmt) { 1264 if (!sbi->s_jquota_fmt) {
1229 printk(KERN_ERR "EXT3-fs: journaled quota format " 1265 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1230 "not specified.\n"); 1266 "not specified.");
1231 return 0; 1267 return 0;
1232 } 1268 }
1233 } else { 1269 } else {
1234 if (sbi->s_jquota_fmt) { 1270 if (sbi->s_jquota_fmt) {
1235 printk(KERN_ERR "EXT3-fs: journaled quota format " 1271 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1236 "specified with no journaling " 1272 "specified with no journaling "
1237 "enabled.\n"); 1273 "enabled.");
1238 return 0; 1274 return 0;
1239 } 1275 }
1240 } 1276 }
@@ -1249,31 +1285,33 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1249 int res = 0; 1285 int res = 0;
1250 1286
1251 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { 1287 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
1252 printk (KERN_ERR "EXT3-fs warning: revision level too high, " 1288 ext3_msg(sb, KERN_ERR,
1253 "forcing read-only mode\n"); 1289 "error: revision level too high, "
1290 "forcing read-only mode");
1254 res = MS_RDONLY; 1291 res = MS_RDONLY;
1255 } 1292 }
1256 if (read_only) 1293 if (read_only)
1257 return res; 1294 return res;
1258 if (!(sbi->s_mount_state & EXT3_VALID_FS)) 1295 if (!(sbi->s_mount_state & EXT3_VALID_FS))
1259 printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, " 1296 ext3_msg(sb, KERN_WARNING,
1260 "running e2fsck is recommended\n"); 1297 "warning: mounting unchecked fs, "
1298 "running e2fsck is recommended");
1261 else if ((sbi->s_mount_state & EXT3_ERROR_FS)) 1299 else if ((sbi->s_mount_state & EXT3_ERROR_FS))
1262 printk (KERN_WARNING 1300 ext3_msg(sb, KERN_WARNING,
1263 "EXT3-fs warning: mounting fs with errors, " 1301 "warning: mounting fs with errors, "
1264 "running e2fsck is recommended\n"); 1302 "running e2fsck is recommended");
1265 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1303 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1266 le16_to_cpu(es->s_mnt_count) >= 1304 le16_to_cpu(es->s_mnt_count) >=
1267 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1305 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1268 printk (KERN_WARNING 1306 ext3_msg(sb, KERN_WARNING,
1269 "EXT3-fs warning: maximal mount count reached, " 1307 "warning: maximal mount count reached, "
1270 "running e2fsck is recommended\n"); 1308 "running e2fsck is recommended");
1271 else if (le32_to_cpu(es->s_checkinterval) && 1309 else if (le32_to_cpu(es->s_checkinterval) &&
1272 (le32_to_cpu(es->s_lastcheck) + 1310 (le32_to_cpu(es->s_lastcheck) +
1273 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1311 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1274 printk (KERN_WARNING 1312 ext3_msg(sb, KERN_WARNING,
1275 "EXT3-fs warning: checktime reached, " 1313 "warning: checktime reached, "
1276 "running e2fsck is recommended\n"); 1314 "running e2fsck is recommended");
1277#if 0 1315#if 0
1278 /* @@@ We _will_ want to clear the valid bit if we find 1316 /* @@@ We _will_ want to clear the valid bit if we find
1279 inconsistencies, to force a fsck at reboot. But for 1317 inconsistencies, to force a fsck at reboot. But for
@@ -1290,22 +1328,20 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1290 1328
1291 ext3_commit_super(sb, es, 1); 1329 ext3_commit_super(sb, es, 1);
1292 if (test_opt(sb, DEBUG)) 1330 if (test_opt(sb, DEBUG))
1293 printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, " 1331 ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
1294 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1332 "bpg=%lu, ipg=%lu, mo=%04lx]",
1295 sb->s_blocksize, 1333 sb->s_blocksize,
1296 sbi->s_groups_count, 1334 sbi->s_groups_count,
1297 EXT3_BLOCKS_PER_GROUP(sb), 1335 EXT3_BLOCKS_PER_GROUP(sb),
1298 EXT3_INODES_PER_GROUP(sb), 1336 EXT3_INODES_PER_GROUP(sb),
1299 sbi->s_mount_opt); 1337 sbi->s_mount_opt);
1300 1338
1301 printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
1302 if (EXT3_SB(sb)->s_journal->j_inode == NULL) { 1339 if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
1303 char b[BDEVNAME_SIZE]; 1340 char b[BDEVNAME_SIZE];
1304 1341 ext3_msg(sb, KERN_INFO, "using external journal on %s",
1305 printk("external journal on %s\n",
1306 bdevname(EXT3_SB(sb)->s_journal->j_dev, b)); 1342 bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
1307 } else { 1343 } else {
1308 printk("internal journal\n"); 1344 ext3_msg(sb, KERN_INFO, "using internal journal");
1309 } 1345 }
1310 return res; 1346 return res;
1311} 1347}
@@ -1399,8 +1435,8 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1399 } 1435 }
1400 1436
1401 if (bdev_read_only(sb->s_bdev)) { 1437 if (bdev_read_only(sb->s_bdev)) {
1402 printk(KERN_ERR "EXT3-fs: write access " 1438 ext3_msg(sb, KERN_ERR, "error: write access "
1403 "unavailable, skipping orphan cleanup.\n"); 1439 "unavailable, skipping orphan cleanup.");
1404 return; 1440 return;
1405 } 1441 }
1406 1442
@@ -1414,8 +1450,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1414 } 1450 }
1415 1451
1416 if (s_flags & MS_RDONLY) { 1452 if (s_flags & MS_RDONLY) {
1417 printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n", 1453 ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1418 sb->s_id);
1419 sb->s_flags &= ~MS_RDONLY; 1454 sb->s_flags &= ~MS_RDONLY;
1420 } 1455 }
1421#ifdef CONFIG_QUOTA 1456#ifdef CONFIG_QUOTA
@@ -1426,9 +1461,9 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1426 if (EXT3_SB(sb)->s_qf_names[i]) { 1461 if (EXT3_SB(sb)->s_qf_names[i]) {
1427 int ret = ext3_quota_on_mount(sb, i); 1462 int ret = ext3_quota_on_mount(sb, i);
1428 if (ret < 0) 1463 if (ret < 0)
1429 printk(KERN_ERR 1464 ext3_msg(sb, KERN_ERR,
1430 "EXT3-fs: Cannot turn on journaled " 1465 "error: cannot turn on journaled "
1431 "quota: error %d\n", ret); 1466 "quota: %d", ret);
1432 } 1467 }
1433 } 1468 }
1434#endif 1469#endif
@@ -1466,11 +1501,11 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1466#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1501#define PLURAL(x) (x), ((x)==1) ? "" : "s"
1467 1502
1468 if (nr_orphans) 1503 if (nr_orphans)
1469 printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n", 1504 ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1470 sb->s_id, PLURAL(nr_orphans)); 1505 PLURAL(nr_orphans));
1471 if (nr_truncates) 1506 if (nr_truncates)
1472 printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n", 1507 ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1473 sb->s_id, PLURAL(nr_truncates)); 1508 PLURAL(nr_truncates));
1474#ifdef CONFIG_QUOTA 1509#ifdef CONFIG_QUOTA
1475 /* Turn quotas off */ 1510 /* Turn quotas off */
1476 for (i = 0; i < MAXQUOTAS; i++) { 1511 for (i = 0; i < MAXQUOTAS; i++) {
@@ -1554,7 +1589,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1554 struct ext3_super_block *es = NULL; 1589 struct ext3_super_block *es = NULL;
1555 struct ext3_sb_info *sbi; 1590 struct ext3_sb_info *sbi;
1556 ext3_fsblk_t block; 1591 ext3_fsblk_t block;
1557 ext3_fsblk_t sb_block = get_sb_block(&data); 1592 ext3_fsblk_t sb_block = get_sb_block(&data, sb);
1558 ext3_fsblk_t logic_sb_block; 1593 ext3_fsblk_t logic_sb_block;
1559 unsigned long offset = 0; 1594 unsigned long offset = 0;
1560 unsigned int journal_inum = 0; 1595 unsigned int journal_inum = 0;
@@ -1590,7 +1625,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1590 1625
1591 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); 1626 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
1592 if (!blocksize) { 1627 if (!blocksize) {
1593 printk(KERN_ERR "EXT3-fs: unable to set blocksize\n"); 1628 ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
1594 goto out_fail; 1629 goto out_fail;
1595 } 1630 }
1596 1631
@@ -1606,7 +1641,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1606 } 1641 }
1607 1642
1608 if (!(bh = sb_bread(sb, logic_sb_block))) { 1643 if (!(bh = sb_bread(sb, logic_sb_block))) {
1609 printk (KERN_ERR "EXT3-fs: unable to read superblock\n"); 1644 ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
1610 goto out_fail; 1645 goto out_fail;
1611 } 1646 }
1612 /* 1647 /*
@@ -1665,9 +1700,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1665 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1700 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
1666 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1701 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
1667 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1702 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
1668 printk(KERN_WARNING 1703 ext3_msg(sb, KERN_WARNING,
1669 "EXT3-fs warning: feature flags set on rev 0 fs, " 1704 "warning: feature flags set on rev 0 fs, "
1670 "running e2fsck is recommended\n"); 1705 "running e2fsck is recommended");
1671 /* 1706 /*
1672 * Check feature flags regardless of the revision level, since we 1707 * Check feature flags regardless of the revision level, since we
1673 * previously didn't change the revision level when setting the flags, 1708 * previously didn't change the revision level when setting the flags,
@@ -1675,25 +1710,25 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1675 */ 1710 */
1676 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP); 1711 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
1677 if (features) { 1712 if (features) {
1678 printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of " 1713 ext3_msg(sb, KERN_ERR,
1679 "unsupported optional features (%x).\n", 1714 "error: couldn't mount because of unsupported "
1680 sb->s_id, le32_to_cpu(features)); 1715 "optional features (%x)", le32_to_cpu(features));
1681 goto failed_mount; 1716 goto failed_mount;
1682 } 1717 }
1683 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP); 1718 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
1684 if (!(sb->s_flags & MS_RDONLY) && features) { 1719 if (!(sb->s_flags & MS_RDONLY) && features) {
1685 printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of " 1720 ext3_msg(sb, KERN_ERR,
1686 "unsupported optional features (%x).\n", 1721 "error: couldn't mount RDWR because of unsupported "
1687 sb->s_id, le32_to_cpu(features)); 1722 "optional features (%x)", le32_to_cpu(features));
1688 goto failed_mount; 1723 goto failed_mount;
1689 } 1724 }
1690 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1725 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
1691 1726
1692 if (blocksize < EXT3_MIN_BLOCK_SIZE || 1727 if (blocksize < EXT3_MIN_BLOCK_SIZE ||
1693 blocksize > EXT3_MAX_BLOCK_SIZE) { 1728 blocksize > EXT3_MAX_BLOCK_SIZE) {
1694 printk(KERN_ERR 1729 ext3_msg(sb, KERN_ERR,
1695 "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n", 1730 "error: couldn't mount because of unsupported "
1696 blocksize, sb->s_id); 1731 "filesystem blocksize %d", blocksize);
1697 goto failed_mount; 1732 goto failed_mount;
1698 } 1733 }
1699 1734
@@ -1704,30 +1739,31 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1704 * than the hardware sectorsize for the machine. 1739 * than the hardware sectorsize for the machine.
1705 */ 1740 */
1706 if (blocksize < hblock) { 1741 if (blocksize < hblock) {
1707 printk(KERN_ERR "EXT3-fs: blocksize %d too small for " 1742 ext3_msg(sb, KERN_ERR,
1708 "device blocksize %d.\n", blocksize, hblock); 1743 "error: fsblocksize %d too small for "
1744 "hardware sectorsize %d", blocksize, hblock);
1709 goto failed_mount; 1745 goto failed_mount;
1710 } 1746 }
1711 1747
1712 brelse (bh); 1748 brelse (bh);
1713 if (!sb_set_blocksize(sb, blocksize)) { 1749 if (!sb_set_blocksize(sb, blocksize)) {
1714 printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n", 1750 ext3_msg(sb, KERN_ERR,
1715 blocksize); 1751 "error: bad blocksize %d", blocksize);
1716 goto out_fail; 1752 goto out_fail;
1717 } 1753 }
1718 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1754 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1719 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1755 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1720 bh = sb_bread(sb, logic_sb_block); 1756 bh = sb_bread(sb, logic_sb_block);
1721 if (!bh) { 1757 if (!bh) {
1722 printk(KERN_ERR 1758 ext3_msg(sb, KERN_ERR,
1723 "EXT3-fs: Can't read superblock on 2nd try.\n"); 1759 "error: can't read superblock on 2nd try");
1724 goto failed_mount; 1760 goto failed_mount;
1725 } 1761 }
1726 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); 1762 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
1727 sbi->s_es = es; 1763 sbi->s_es = es;
1728 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1764 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1729 printk (KERN_ERR 1765 ext3_msg(sb, KERN_ERR,
1730 "EXT3-fs: Magic mismatch, very weird !\n"); 1766 "error: magic mismatch");
1731 goto failed_mount; 1767 goto failed_mount;
1732 } 1768 }
1733 } 1769 }
@@ -1743,8 +1779,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1743 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || 1779 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
1744 (!is_power_of_2(sbi->s_inode_size)) || 1780 (!is_power_of_2(sbi->s_inode_size)) ||
1745 (sbi->s_inode_size > blocksize)) { 1781 (sbi->s_inode_size > blocksize)) {
1746 printk (KERN_ERR 1782 ext3_msg(sb, KERN_ERR,
1747 "EXT3-fs: unsupported inode size: %d\n", 1783 "error: unsupported inode size: %d",
1748 sbi->s_inode_size); 1784 sbi->s_inode_size);
1749 goto failed_mount; 1785 goto failed_mount;
1750 } 1786 }
@@ -1752,8 +1788,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1752 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << 1788 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
1753 le32_to_cpu(es->s_log_frag_size); 1789 le32_to_cpu(es->s_log_frag_size);
1754 if (blocksize != sbi->s_frag_size) { 1790 if (blocksize != sbi->s_frag_size) {
1755 printk(KERN_ERR 1791 ext3_msg(sb, KERN_ERR,
1756 "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n", 1792 "error: fragsize %lu != blocksize %u (unsupported)",
1757 sbi->s_frag_size, blocksize); 1793 sbi->s_frag_size, blocksize);
1758 goto failed_mount; 1794 goto failed_mount;
1759 } 1795 }
@@ -1789,31 +1825,31 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1789 } 1825 }
1790 1826
1791 if (sbi->s_blocks_per_group > blocksize * 8) { 1827 if (sbi->s_blocks_per_group > blocksize * 8) {
1792 printk (KERN_ERR 1828 ext3_msg(sb, KERN_ERR,
1793 "EXT3-fs: #blocks per group too big: %lu\n", 1829 "#blocks per group too big: %lu",
1794 sbi->s_blocks_per_group); 1830 sbi->s_blocks_per_group);
1795 goto failed_mount; 1831 goto failed_mount;
1796 } 1832 }
1797 if (sbi->s_frags_per_group > blocksize * 8) { 1833 if (sbi->s_frags_per_group > blocksize * 8) {
1798 printk (KERN_ERR 1834 ext3_msg(sb, KERN_ERR,
1799 "EXT3-fs: #fragments per group too big: %lu\n", 1835 "error: #fragments per group too big: %lu",
1800 sbi->s_frags_per_group); 1836 sbi->s_frags_per_group);
1801 goto failed_mount; 1837 goto failed_mount;
1802 } 1838 }
1803 if (sbi->s_inodes_per_group > blocksize * 8) { 1839 if (sbi->s_inodes_per_group > blocksize * 8) {
1804 printk (KERN_ERR 1840 ext3_msg(sb, KERN_ERR,
1805 "EXT3-fs: #inodes per group too big: %lu\n", 1841 "error: #inodes per group too big: %lu",
1806 sbi->s_inodes_per_group); 1842 sbi->s_inodes_per_group);
1807 goto failed_mount; 1843 goto failed_mount;
1808 } 1844 }
1809 1845
1810 if (le32_to_cpu(es->s_blocks_count) > 1846 if (le32_to_cpu(es->s_blocks_count) >
1811 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1847 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1812 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 1848 ext3_msg(sb, KERN_ERR,
1813 " too large to mount safely\n", sb->s_id); 1849 "error: filesystem is too large to mount safely");
1814 if (sizeof(sector_t) < 8) 1850 if (sizeof(sector_t) < 8)
1815 printk(KERN_WARNING "EXT3-fs: CONFIG_LBDAF not " 1851 ext3_msg(sb, KERN_ERR,
1816 "enabled\n"); 1852 "error: CONFIG_LBDAF not enabled");
1817 goto failed_mount; 1853 goto failed_mount;
1818 } 1854 }
1819 1855
@@ -1827,7 +1863,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1827 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1863 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1828 GFP_KERNEL); 1864 GFP_KERNEL);
1829 if (sbi->s_group_desc == NULL) { 1865 if (sbi->s_group_desc == NULL) {
1830 printk (KERN_ERR "EXT3-fs: not enough memory\n"); 1866 ext3_msg(sb, KERN_ERR,
1867 "error: not enough memory");
1831 goto failed_mount; 1868 goto failed_mount;
1832 } 1869 }
1833 1870
@@ -1837,14 +1874,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1837 block = descriptor_loc(sb, logic_sb_block, i); 1874 block = descriptor_loc(sb, logic_sb_block, i);
1838 sbi->s_group_desc[i] = sb_bread(sb, block); 1875 sbi->s_group_desc[i] = sb_bread(sb, block);
1839 if (!sbi->s_group_desc[i]) { 1876 if (!sbi->s_group_desc[i]) {
1840 printk (KERN_ERR "EXT3-fs: " 1877 ext3_msg(sb, KERN_ERR,
1841 "can't read group descriptor %d\n", i); 1878 "error: can't read group descriptor %d", i);
1842 db_count = i; 1879 db_count = i;
1843 goto failed_mount2; 1880 goto failed_mount2;
1844 } 1881 }
1845 } 1882 }
1846 if (!ext3_check_descriptors (sb)) { 1883 if (!ext3_check_descriptors (sb)) {
1847 printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n"); 1884 ext3_msg(sb, KERN_ERR,
1885 "error: group descriptors corrupted");
1848 goto failed_mount2; 1886 goto failed_mount2;
1849 } 1887 }
1850 sbi->s_gdb_count = db_count; 1888 sbi->s_gdb_count = db_count;
@@ -1862,7 +1900,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1862 ext3_count_dirs(sb)); 1900 ext3_count_dirs(sb));
1863 } 1901 }
1864 if (err) { 1902 if (err) {
1865 printk(KERN_ERR "EXT3-fs: insufficient memory\n"); 1903 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
1866 goto failed_mount3; 1904 goto failed_mount3;
1867 } 1905 }
1868 1906
@@ -1890,6 +1928,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1890 sb->dq_op = &ext3_quota_operations; 1928 sb->dq_op = &ext3_quota_operations;
1891#endif 1929#endif
1892 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1930 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1931 mutex_init(&sbi->s_orphan_lock);
1932 mutex_init(&sbi->s_resize_lock);
1893 1933
1894 sb->s_root = NULL; 1934 sb->s_root = NULL;
1895 1935
@@ -1910,9 +1950,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1910 goto failed_mount3; 1950 goto failed_mount3;
1911 } else { 1951 } else {
1912 if (!silent) 1952 if (!silent)
1913 printk (KERN_ERR 1953 ext3_msg(sb, KERN_ERR,
1914 "ext3: No journal on filesystem on %s\n", 1954 "error: no journal found. "
1915 sb->s_id); 1955 "mounting ext3 over ext2?");
1916 goto failed_mount3; 1956 goto failed_mount3;
1917 } 1957 }
1918 1958
@@ -1934,8 +1974,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1934 case EXT3_MOUNT_WRITEBACK_DATA: 1974 case EXT3_MOUNT_WRITEBACK_DATA:
1935 if (!journal_check_available_features 1975 if (!journal_check_available_features
1936 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { 1976 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
1937 printk(KERN_ERR "EXT3-fs: Journal does not support " 1977 ext3_msg(sb, KERN_ERR,
1938 "requested data journaling mode\n"); 1978 "error: journal does not support "
1979 "requested data journaling mode");
1939 goto failed_mount4; 1980 goto failed_mount4;
1940 } 1981 }
1941 default: 1982 default:
@@ -1944,8 +1985,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1944 1985
1945 if (test_opt(sb, NOBH)) { 1986 if (test_opt(sb, NOBH)) {
1946 if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) { 1987 if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
1947 printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - " 1988 ext3_msg(sb, KERN_WARNING,
1948 "its supported only with writeback mode\n"); 1989 "warning: ignoring nobh option - "
1990 "it is supported only with writeback mode");
1949 clear_opt(sbi->s_mount_opt, NOBH); 1991 clear_opt(sbi->s_mount_opt, NOBH);
1950 } 1992 }
1951 } 1993 }
@@ -1956,39 +1998,32 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1956 1998
1957 root = ext3_iget(sb, EXT3_ROOT_INO); 1999 root = ext3_iget(sb, EXT3_ROOT_INO);
1958 if (IS_ERR(root)) { 2000 if (IS_ERR(root)) {
1959 printk(KERN_ERR "EXT3-fs: get root inode failed\n"); 2001 ext3_msg(sb, KERN_ERR, "error: get root inode failed");
1960 ret = PTR_ERR(root); 2002 ret = PTR_ERR(root);
1961 goto failed_mount4; 2003 goto failed_mount4;
1962 } 2004 }
1963 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2005 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1964 iput(root); 2006 iput(root);
1965 printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n"); 2007 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
1966 goto failed_mount4; 2008 goto failed_mount4;
1967 } 2009 }
1968 sb->s_root = d_alloc_root(root); 2010 sb->s_root = d_alloc_root(root);
1969 if (!sb->s_root) { 2011 if (!sb->s_root) {
1970 printk(KERN_ERR "EXT3-fs: get root dentry failed\n"); 2012 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
1971 iput(root); 2013 iput(root);
1972 ret = -ENOMEM; 2014 ret = -ENOMEM;
1973 goto failed_mount4; 2015 goto failed_mount4;
1974 } 2016 }
1975 2017
1976 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2018 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
1977 /* 2019
1978 * akpm: core read_super() calls in here with the superblock locked.
1979 * That deadlocks, because orphan cleanup needs to lock the superblock
1980 * in numerous places. Here we just pop the lock - it's relatively
1981 * harmless, because we are now ready to accept write_super() requests,
1982 * and aviro says that's the only reason for hanging onto the
1983 * superblock lock.
1984 */
1985 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 2020 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
1986 ext3_orphan_cleanup(sb, es); 2021 ext3_orphan_cleanup(sb, es);
1987 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 2022 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
1988 if (needs_recovery) 2023 if (needs_recovery)
1989 printk (KERN_INFO "EXT3-fs: recovery complete.\n"); 2024 ext3_msg(sb, KERN_INFO, "recovery complete");
1990 ext3_mark_recovery_complete(sb, es); 2025 ext3_mark_recovery_complete(sb, es);
1991 printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n", 2026 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
1992 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2027 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
1993 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2028 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
1994 "writeback"); 2029 "writeback");
@@ -1998,7 +2033,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1998 2033
1999cantfind_ext3: 2034cantfind_ext3:
2000 if (!silent) 2035 if (!silent)
2001 printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n", 2036 ext3_msg(sb, KERN_INFO,
2037 "error: can't find ext3 filesystem on dev %s.",
2002 sb->s_id); 2038 sb->s_id);
2003 goto failed_mount; 2039 goto failed_mount;
2004 2040
@@ -2066,27 +2102,27 @@ static journal_t *ext3_get_journal(struct super_block *sb,
2066 2102
2067 journal_inode = ext3_iget(sb, journal_inum); 2103 journal_inode = ext3_iget(sb, journal_inum);
2068 if (IS_ERR(journal_inode)) { 2104 if (IS_ERR(journal_inode)) {
2069 printk(KERN_ERR "EXT3-fs: no journal found.\n"); 2105 ext3_msg(sb, KERN_ERR, "error: no journal found");
2070 return NULL; 2106 return NULL;
2071 } 2107 }
2072 if (!journal_inode->i_nlink) { 2108 if (!journal_inode->i_nlink) {
2073 make_bad_inode(journal_inode); 2109 make_bad_inode(journal_inode);
2074 iput(journal_inode); 2110 iput(journal_inode);
2075 printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n"); 2111 ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
2076 return NULL; 2112 return NULL;
2077 } 2113 }
2078 2114
2079 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 2115 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
2080 journal_inode, journal_inode->i_size); 2116 journal_inode, journal_inode->i_size);
2081 if (!S_ISREG(journal_inode->i_mode)) { 2117 if (!S_ISREG(journal_inode->i_mode)) {
2082 printk(KERN_ERR "EXT3-fs: invalid journal inode.\n"); 2118 ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
2083 iput(journal_inode); 2119 iput(journal_inode);
2084 return NULL; 2120 return NULL;
2085 } 2121 }
2086 2122
2087 journal = journal_init_inode(journal_inode); 2123 journal = journal_init_inode(journal_inode);
2088 if (!journal) { 2124 if (!journal) {
2089 printk(KERN_ERR "EXT3-fs: Could not load journal inode\n"); 2125 ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
2090 iput(journal_inode); 2126 iput(journal_inode);
2091 return NULL; 2127 return NULL;
2092 } 2128 }
@@ -2108,13 +2144,13 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2108 struct ext3_super_block * es; 2144 struct ext3_super_block * es;
2109 struct block_device *bdev; 2145 struct block_device *bdev;
2110 2146
2111 bdev = ext3_blkdev_get(j_dev); 2147 bdev = ext3_blkdev_get(j_dev, sb);
2112 if (bdev == NULL) 2148 if (bdev == NULL)
2113 return NULL; 2149 return NULL;
2114 2150
2115 if (bd_claim(bdev, sb)) { 2151 if (bd_claim(bdev, sb)) {
2116 printk(KERN_ERR 2152 ext3_msg(sb, KERN_ERR,
2117 "EXT3: failed to claim external journal device.\n"); 2153 "error: failed to claim external journal device");
2118 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 2154 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2119 return NULL; 2155 return NULL;
2120 } 2156 }
@@ -2122,8 +2158,8 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2122 blocksize = sb->s_blocksize; 2158 blocksize = sb->s_blocksize;
2123 hblock = bdev_logical_block_size(bdev); 2159 hblock = bdev_logical_block_size(bdev);
2124 if (blocksize < hblock) { 2160 if (blocksize < hblock) {
2125 printk(KERN_ERR 2161 ext3_msg(sb, KERN_ERR,
2126 "EXT3-fs: blocksize too small for journal device.\n"); 2162 "error: blocksize too small for journal device");
2127 goto out_bdev; 2163 goto out_bdev;
2128 } 2164 }
2129 2165
@@ -2131,8 +2167,8 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2131 offset = EXT3_MIN_BLOCK_SIZE % blocksize; 2167 offset = EXT3_MIN_BLOCK_SIZE % blocksize;
2132 set_blocksize(bdev, blocksize); 2168 set_blocksize(bdev, blocksize);
2133 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2169 if (!(bh = __bread(bdev, sb_block, blocksize))) {
2134 printk(KERN_ERR "EXT3-fs: couldn't read superblock of " 2170 ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
2135 "external journal\n"); 2171 "external journal");
2136 goto out_bdev; 2172 goto out_bdev;
2137 } 2173 }
2138 2174
@@ -2140,14 +2176,14 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2140 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 2176 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
2141 !(le32_to_cpu(es->s_feature_incompat) & 2177 !(le32_to_cpu(es->s_feature_incompat) &
2142 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2178 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2143 printk(KERN_ERR "EXT3-fs: external journal has " 2179 ext3_msg(sb, KERN_ERR, "error: external journal has "
2144 "bad superblock\n"); 2180 "bad superblock");
2145 brelse(bh); 2181 brelse(bh);
2146 goto out_bdev; 2182 goto out_bdev;
2147 } 2183 }
2148 2184
2149 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2185 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2150 printk(KERN_ERR "EXT3-fs: journal UUID does not match\n"); 2186 ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
2151 brelse(bh); 2187 brelse(bh);
2152 goto out_bdev; 2188 goto out_bdev;
2153 } 2189 }
@@ -2159,19 +2195,21 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2159 journal = journal_init_dev(bdev, sb->s_bdev, 2195 journal = journal_init_dev(bdev, sb->s_bdev,
2160 start, len, blocksize); 2196 start, len, blocksize);
2161 if (!journal) { 2197 if (!journal) {
2162 printk(KERN_ERR "EXT3-fs: failed to create device journal\n"); 2198 ext3_msg(sb, KERN_ERR,
2199 "error: failed to create device journal");
2163 goto out_bdev; 2200 goto out_bdev;
2164 } 2201 }
2165 journal->j_private = sb; 2202 journal->j_private = sb;
2166 ll_rw_block(READ, 1, &journal->j_sb_buffer); 2203 ll_rw_block(READ, 1, &journal->j_sb_buffer);
2167 wait_on_buffer(journal->j_sb_buffer); 2204 wait_on_buffer(journal->j_sb_buffer);
2168 if (!buffer_uptodate(journal->j_sb_buffer)) { 2205 if (!buffer_uptodate(journal->j_sb_buffer)) {
2169 printk(KERN_ERR "EXT3-fs: I/O error on journal device\n"); 2206 ext3_msg(sb, KERN_ERR, "I/O error on journal device");
2170 goto out_journal; 2207 goto out_journal;
2171 } 2208 }
2172 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2209 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2173 printk(KERN_ERR "EXT3-fs: External journal has more than one " 2210 ext3_msg(sb, KERN_ERR,
2174 "user (unsupported) - %d\n", 2211 "error: external journal has more than one "
2212 "user (unsupported) - %d",
2175 be32_to_cpu(journal->j_superblock->s_nr_users)); 2213 be32_to_cpu(journal->j_superblock->s_nr_users));
2176 goto out_journal; 2214 goto out_journal;
2177 } 2215 }
@@ -2197,8 +2235,8 @@ static int ext3_load_journal(struct super_block *sb,
2197 2235
2198 if (journal_devnum && 2236 if (journal_devnum &&
2199 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2237 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2200 printk(KERN_INFO "EXT3-fs: external journal device major/minor " 2238 ext3_msg(sb, KERN_INFO, "external journal device major/minor "
2201 "numbers have changed\n"); 2239 "numbers have changed");
2202 journal_dev = new_decode_dev(journal_devnum); 2240 journal_dev = new_decode_dev(journal_devnum);
2203 } else 2241 } else
2204 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2242 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -2213,21 +2251,21 @@ static int ext3_load_journal(struct super_block *sb,
2213 2251
2214 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) { 2252 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
2215 if (sb->s_flags & MS_RDONLY) { 2253 if (sb->s_flags & MS_RDONLY) {
2216 printk(KERN_INFO "EXT3-fs: INFO: recovery " 2254 ext3_msg(sb, KERN_INFO,
2217 "required on readonly filesystem.\n"); 2255 "recovery required on readonly filesystem");
2218 if (really_read_only) { 2256 if (really_read_only) {
2219 printk(KERN_ERR "EXT3-fs: write access " 2257 ext3_msg(sb, KERN_ERR, "error: write access "
2220 "unavailable, cannot proceed.\n"); 2258 "unavailable, cannot proceed");
2221 return -EROFS; 2259 return -EROFS;
2222 } 2260 }
2223 printk (KERN_INFO "EXT3-fs: write access will " 2261 ext3_msg(sb, KERN_INFO,
2224 "be enabled during recovery.\n"); 2262 "write access will be enabled during recovery");
2225 } 2263 }
2226 } 2264 }
2227 2265
2228 if (journal_inum && journal_dev) { 2266 if (journal_inum && journal_dev) {
2229 printk(KERN_ERR "EXT3-fs: filesystem has both journal " 2267 ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
2230 "and inode journals!\n"); 2268 "and inode journals");
2231 return -EINVAL; 2269 return -EINVAL;
2232 } 2270 }
2233 2271
@@ -2242,7 +2280,7 @@ static int ext3_load_journal(struct super_block *sb,
2242 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2280 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2243 err = journal_update_format(journal); 2281 err = journal_update_format(journal);
2244 if (err) { 2282 if (err) {
2245 printk(KERN_ERR "EXT3-fs: error updating journal.\n"); 2283 ext3_msg(sb, KERN_ERR, "error updating journal");
2246 journal_destroy(journal); 2284 journal_destroy(journal);
2247 return err; 2285 return err;
2248 } 2286 }
@@ -2254,7 +2292,7 @@ static int ext3_load_journal(struct super_block *sb,
2254 err = journal_load(journal); 2292 err = journal_load(journal);
2255 2293
2256 if (err) { 2294 if (err) {
2257 printk(KERN_ERR "EXT3-fs: error loading journal.\n"); 2295 ext3_msg(sb, KERN_ERR, "error loading journal");
2258 journal_destroy(journal); 2296 journal_destroy(journal);
2259 return err; 2297 return err;
2260 } 2298 }
@@ -2273,16 +2311,17 @@ static int ext3_load_journal(struct super_block *sb,
2273 return 0; 2311 return 0;
2274} 2312}
2275 2313
2276static int ext3_create_journal(struct super_block * sb, 2314static int ext3_create_journal(struct super_block *sb,
2277 struct ext3_super_block * es, 2315 struct ext3_super_block *es,
2278 unsigned int journal_inum) 2316 unsigned int journal_inum)
2279{ 2317{
2280 journal_t *journal; 2318 journal_t *journal;
2281 int err; 2319 int err;
2282 2320
2283 if (sb->s_flags & MS_RDONLY) { 2321 if (sb->s_flags & MS_RDONLY) {
2284 printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " 2322 ext3_msg(sb, KERN_ERR,
2285 "create journal.\n"); 2323 "error: readonly filesystem when trying to "
2324 "create journal");
2286 return -EROFS; 2325 return -EROFS;
2287 } 2326 }
2288 2327
@@ -2290,12 +2329,12 @@ static int ext3_create_journal(struct super_block * sb,
2290 if (!journal) 2329 if (!journal)
2291 return -EINVAL; 2330 return -EINVAL;
2292 2331
2293 printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", 2332 ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
2294 journal_inum); 2333 journal_inum);
2295 2334
2296 err = journal_create(journal); 2335 err = journal_create(journal);
2297 if (err) { 2336 if (err) {
2298 printk(KERN_ERR "EXT3-fs: error creating journal.\n"); 2337 ext3_msg(sb, KERN_ERR, "error creating journal");
2299 journal_destroy(journal); 2338 journal_destroy(journal);
2300 return -EIO; 2339 return -EIO;
2301 } 2340 }
@@ -2359,13 +2398,11 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
2359 if (journal_flush(journal) < 0) 2398 if (journal_flush(journal) < 0)
2360 goto out; 2399 goto out;
2361 2400
2362 lock_super(sb);
2363 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2401 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2364 sb->s_flags & MS_RDONLY) { 2402 sb->s_flags & MS_RDONLY) {
2365 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2403 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2366 ext3_commit_super(sb, es, 1); 2404 ext3_commit_super(sb, es, 1);
2367 } 2405 }
2368 unlock_super(sb);
2369 2406
2370out: 2407out:
2371 journal_unlock_updates(journal); 2408 journal_unlock_updates(journal);
@@ -2376,8 +2413,8 @@ out:
2376 * has recorded an error from a previous lifetime, move that error to the 2413 * has recorded an error from a previous lifetime, move that error to the
2377 * main filesystem now. 2414 * main filesystem now.
2378 */ 2415 */
2379static void ext3_clear_journal_err(struct super_block * sb, 2416static void ext3_clear_journal_err(struct super_block *sb,
2380 struct ext3_super_block * es) 2417 struct ext3_super_block *es)
2381{ 2418{
2382 journal_t *journal; 2419 journal_t *journal;
2383 int j_errno; 2420 int j_errno;
@@ -2557,21 +2594,15 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2557 (sbi->s_mount_state & EXT3_VALID_FS)) 2594 (sbi->s_mount_state & EXT3_VALID_FS))
2558 es->s_state = cpu_to_le16(sbi->s_mount_state); 2595 es->s_state = cpu_to_le16(sbi->s_mount_state);
2559 2596
2560 /*
2561 * We have to unlock super so that we can wait for
2562 * transactions.
2563 */
2564 unlock_super(sb);
2565 ext3_mark_recovery_complete(sb, es); 2597 ext3_mark_recovery_complete(sb, es);
2566 lock_super(sb);
2567 } else { 2598 } else {
2568 __le32 ret; 2599 __le32 ret;
2569 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2600 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
2570 ~EXT3_FEATURE_RO_COMPAT_SUPP))) { 2601 ~EXT3_FEATURE_RO_COMPAT_SUPP))) {
2571 printk(KERN_WARNING "EXT3-fs: %s: couldn't " 2602 ext3_msg(sb, KERN_WARNING,
2572 "remount RDWR because of unsupported " 2603 "warning: couldn't remount RDWR "
2573 "optional features (%x).\n", 2604 "because of unsupported optional "
2574 sb->s_id, le32_to_cpu(ret)); 2605 "features (%x)", le32_to_cpu(ret));
2575 err = -EROFS; 2606 err = -EROFS;
2576 goto restore_opts; 2607 goto restore_opts;
2577 } 2608 }
@@ -2582,11 +2613,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2582 * require a full umount/remount for now. 2613 * require a full umount/remount for now.
2583 */ 2614 */
2584 if (es->s_last_orphan) { 2615 if (es->s_last_orphan) {
2585 printk(KERN_WARNING "EXT3-fs: %s: couldn't " 2616 ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2586 "remount RDWR because of unprocessed " 2617 "remount RDWR because of unprocessed "
2587 "orphan inode list. Please " 2618 "orphan inode list. Please "
2588 "umount/remount instead.\n", 2619 "umount/remount instead.");
2589 sb->s_id);
2590 err = -EINVAL; 2620 err = -EINVAL;
2591 goto restore_opts; 2621 goto restore_opts;
2592 } 2622 }
@@ -2686,13 +2716,11 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2686 buf->f_bsize = sb->s_blocksize; 2716 buf->f_bsize = sb->s_blocksize;
2687 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; 2717 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
2688 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); 2718 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
2689 es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
2690 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 2719 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
2691 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 2720 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
2692 buf->f_bavail = 0; 2721 buf->f_bavail = 0;
2693 buf->f_files = le32_to_cpu(es->s_inodes_count); 2722 buf->f_files = le32_to_cpu(es->s_inodes_count);
2694 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 2723 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
2695 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
2696 buf->f_namelen = EXT3_NAME_LEN; 2724 buf->f_namelen = EXT3_NAME_LEN;
2697 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2725 fsid = le64_to_cpup((void *)es->s_uuid) ^
2698 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2726 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
@@ -2837,9 +2865,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2837 if (EXT3_SB(sb)->s_qf_names[type]) { 2865 if (EXT3_SB(sb)->s_qf_names[type]) {
2838 /* Quotafile not of fs root? */ 2866 /* Quotafile not of fs root? */
2839 if (path.dentry->d_parent != sb->s_root) 2867 if (path.dentry->d_parent != sb->s_root)
2840 printk(KERN_WARNING 2868 ext3_msg(sb, KERN_WARNING,
2841 "EXT3-fs: Quota file not on filesystem root. " 2869 "warning: Quota file not on filesystem root. "
2842 "Journaled quota will not work.\n"); 2870 "Journaled quota will not work.");
2843 } 2871 }
2844 2872
2845 /* 2873 /*
@@ -2921,8 +2949,9 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
2921 handle_t *handle = journal_current_handle(); 2949 handle_t *handle = journal_current_handle();
2922 2950
2923 if (!handle) { 2951 if (!handle) {
2924 printk(KERN_WARNING "EXT3-fs: Quota write (off=%Lu, len=%Lu)" 2952 ext3_msg(sb, KERN_WARNING,
2925 " cancelled because transaction is not started.\n", 2953 "warning: quota write (off=%llu, len=%llu)"
2954 " cancelled because transaction is not started.",
2926 (unsigned long long)off, (unsigned long long)len); 2955 (unsigned long long)off, (unsigned long long)len);
2927 return -EIO; 2956 return -EIO;
2928 } 2957 }
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 545e37c4b91..66895ccf76c 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,7 +99,7 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
99 struct mb_cache_entry **); 99 struct mb_cache_entry **);
100static void ext3_xattr_rehash(struct ext3_xattr_header *, 100static void ext3_xattr_rehash(struct ext3_xattr_header *,
101 struct ext3_xattr_entry *); 101 struct ext3_xattr_entry *);
102static int ext3_xattr_list(struct inode *inode, char *buffer, 102static int ext3_xattr_list(struct dentry *dentry, char *buffer,
103 size_t buffer_size); 103 size_t buffer_size);
104 104
105static struct mb_cache *ext3_xattr_cache; 105static struct mb_cache *ext3_xattr_cache;
@@ -147,7 +147,7 @@ ext3_xattr_handler(int name_index)
147ssize_t 147ssize_t
148ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) 148ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
149{ 149{
150 return ext3_xattr_list(dentry->d_inode, buffer, size); 150 return ext3_xattr_list(dentry, buffer, size);
151} 151}
152 152
153static int 153static int
@@ -332,7 +332,7 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
332} 332}
333 333
334static int 334static int
335ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry, 335ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
336 char *buffer, size_t buffer_size) 336 char *buffer, size_t buffer_size)
337{ 337{
338 size_t rest = buffer_size; 338 size_t rest = buffer_size;
@@ -342,9 +342,10 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
342 ext3_xattr_handler(entry->e_name_index); 342 ext3_xattr_handler(entry->e_name_index);
343 343
344 if (handler) { 344 if (handler) {
345 size_t size = handler->list(inode, buffer, rest, 345 size_t size = handler->list(dentry, buffer, rest,
346 entry->e_name, 346 entry->e_name,
347 entry->e_name_len); 347 entry->e_name_len,
348 handler->flags);
348 if (buffer) { 349 if (buffer) {
349 if (size > rest) 350 if (size > rest)
350 return -ERANGE; 351 return -ERANGE;
@@ -357,8 +358,9 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
357} 358}
358 359
359static int 360static int
360ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) 361ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
361{ 362{
363 struct inode *inode = dentry->d_inode;
362 struct buffer_head *bh = NULL; 364 struct buffer_head *bh = NULL;
363 int error; 365 int error;
364 366
@@ -383,7 +385,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
383 goto cleanup; 385 goto cleanup;
384 } 386 }
385 ext3_xattr_cache_insert(bh); 387 ext3_xattr_cache_insert(bh);
386 error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); 388 error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
387 389
388cleanup: 390cleanup:
389 brelse(bh); 391 brelse(bh);
@@ -392,8 +394,9 @@ cleanup:
392} 394}
393 395
394static int 396static int
395ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) 397ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
396{ 398{
399 struct inode *inode = dentry->d_inode;
397 struct ext3_xattr_ibody_header *header; 400 struct ext3_xattr_ibody_header *header;
398 struct ext3_inode *raw_inode; 401 struct ext3_inode *raw_inode;
399 struct ext3_iloc iloc; 402 struct ext3_iloc iloc;
@@ -411,7 +414,7 @@ ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
411 error = ext3_xattr_check_names(IFIRST(header), end); 414 error = ext3_xattr_check_names(IFIRST(header), end);
412 if (error) 415 if (error)
413 goto cleanup; 416 goto cleanup;
414 error = ext3_xattr_list_entries(inode, IFIRST(header), 417 error = ext3_xattr_list_entries(dentry, IFIRST(header),
415 buffer, buffer_size); 418 buffer, buffer_size);
416 419
417cleanup: 420cleanup:
@@ -430,12 +433,12 @@ cleanup:
430 * used / required on success. 433 * used / required on success.
431 */ 434 */
432static int 435static int
433ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 436ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
434{ 437{
435 int i_error, b_error; 438 int i_error, b_error;
436 439
437 down_read(&EXT3_I(inode)->xattr_sem); 440 down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
438 i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size); 441 i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
439 if (i_error < 0) { 442 if (i_error < 0) {
440 b_error = 0; 443 b_error = 0;
441 } else { 444 } else {
@@ -443,11 +446,11 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
443 buffer += i_error; 446 buffer += i_error;
444 buffer_size -= i_error; 447 buffer_size -= i_error;
445 } 448 }
446 b_error = ext3_xattr_block_list(inode, buffer, buffer_size); 449 b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
447 if (b_error < 0) 450 if (b_error < 0)
448 i_error = 0; 451 i_error = 0;
449 } 452 }
450 up_read(&EXT3_I(inode)->xattr_sem); 453 up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
451 return i_error + b_error; 454 return i_error + b_error;
452} 455}
453 456
@@ -960,6 +963,10 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
960 if (error) 963 if (error)
961 goto cleanup; 964 goto cleanup;
962 965
966 error = ext3_journal_get_write_access(handle, is.iloc.bh);
967 if (error)
968 goto cleanup;
969
963 if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) { 970 if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
964 struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc); 971 struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
965 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); 972 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
@@ -985,9 +992,6 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
985 if (flags & XATTR_CREATE) 992 if (flags & XATTR_CREATE)
986 goto cleanup; 993 goto cleanup;
987 } 994 }
988 error = ext3_journal_get_write_access(handle, is.iloc.bh);
989 if (error)
990 goto cleanup;
991 if (!value) { 995 if (!value) {
992 if (!is.s.not_found) 996 if (!is.s.not_found)
993 error = ext3_xattr_ibody_set(handle, inode, &i, &is); 997 error = ext3_xattr_ibody_set(handle, inode, &i, &is);
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 37b81097bdf..474348788dd 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -12,8 +12,8 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
28} 28}
29 29
30static int 30static int
31ext3_xattr_security_get(struct inode *inode, const char *name, 31ext3_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size) 32 void *buffer, size_t size, int type)
33{ 33{
34 if (strcmp(name, "") == 0) 34 if (strcmp(name, "") == 0)
35 return -EINVAL; 35 return -EINVAL;
36 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name, 36 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
37 buffer, size); 37 name, buffer, size);
38} 38}
39 39
40static int 40static int
41ext3_xattr_security_set(struct inode *inode, const char *name, 41ext3_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags) 42 const void *value, size_t size, int flags, int type)
43{ 43{
44 if (strcmp(name, "") == 0) 44 if (strcmp(name, "") == 0)
45 return -EINVAL; 45 return -EINVAL;
46 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name, 46 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
47 value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int 50int
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index c7c41a410c4..e5562845ed9 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -14,8 +14,8 @@
14#include "xattr.h" 14#include "xattr.h"
15 15
16static size_t 16static size_t
17ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
18 const char *name, size_t name_len) 18 const char *name, size_t name_len, int type)
19{ 19{
20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
@@ -32,22 +32,22 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
32} 32}
33 33
34static int 34static int
35ext3_xattr_trusted_get(struct inode *inode, const char *name, 35ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
36 void *buffer, size_t size) 36 void *buffer, size_t size, int type)
37{ 37{
38 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
39 return -EINVAL; 39 return -EINVAL;
40 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name, 40 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
41 buffer, size); 41 name, buffer, size);
42} 42}
43 43
44static int 44static int
45ext3_xattr_trusted_set(struct inode *inode, const char *name, 45ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
47{ 47{
48 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
49 return -EINVAL; 49 return -EINVAL;
50 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name, 50 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
51 value, size, flags); 51 value, size, flags);
52} 52}
53 53
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 430fe63b31b..3bcfe9ee0a6 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -13,13 +13,13 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
21 21
22 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(dentry->d_sb, XATTR_USER))
23 return 0; 23 return 0;
24 24
25 if (list && total_len <= list_size) { 25 if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext3_xattr_user_get(struct inode *inode, const char *name, 34ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
35 void *buffer, size_t size) 35 size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 if (!test_opt(inode->i_sb, XATTR_USER)) 39 if (!test_opt(dentry->d_sb, XATTR_USER))
40 return -EOPNOTSUPP; 40 return -EOPNOTSUPP;
41 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size); 41 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
42 name, buffer, size);
42} 43}
43 44
44static int 45static int
45ext3_xattr_user_set(struct inode *inode, const char *name, 46ext3_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags, int type)
47{ 48{
48 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
49 return -EINVAL; 50 return -EINVAL;
50 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
52 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name, 53 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext3_xattr_user_handler = { 57struct xattr_handler ext3_xattr_user_handler = {
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9f2d45d75b1..9ed1bb1f319 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -26,6 +26,17 @@ config EXT4_FS
26 26
27 If unsure, say N. 27 If unsure, say N.
28 28
29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
32 depends on EXT3_FS=n || EXT2_FS=n
33 default y
34 help
35 Allow the ext4 file system driver code to be used for ext2 or
36 ext3 file system mounts. This allows users to reduce their
37 compiled kernel size by using one file system driver for
38 ext2, ext3, and ext4 file systems.
39
29config EXT4_FS_XATTR 40config EXT4_FS_XATTR
30 bool "Ext4 extended attributes" 41 bool "Ext4 extended attributes"
31 depends on EXT4_FS 42 depends on EXT4_FS
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0df88b2a69b..8a2a29d35a6 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -364,12 +364,12 @@ out:
364 * Extended attribute handlers 364 * Extended attribute handlers
365 */ 365 */
366static size_t 366static size_t
367ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 367ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
368 const char *name, size_t name_len) 368 const char *name, size_t name_len, int type)
369{ 369{
370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
371 371
372 if (!test_opt(inode->i_sb, POSIX_ACL)) 372 if (!test_opt(dentry->d_sb, POSIX_ACL))
373 return 0; 373 return 0;
374 if (list && size <= list_len) 374 if (list && size <= list_len)
375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
377} 377}
378 378
379static size_t 379static size_t
380ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 380ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
381 const char *name, size_t name_len) 381 const char *name, size_t name_len, int type)
382{ 382{
383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
384 384
385 if (!test_opt(inode->i_sb, POSIX_ACL)) 385 if (!test_opt(dentry->d_sb, POSIX_ACL))
386 return 0; 386 return 0;
387 if (list && size <= list_len) 387 if (list && size <= list_len)
388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
390} 390}
391 391
392static int 392static int
393ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 393ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
394 size_t size, int type)
394{ 395{
395 struct posix_acl *acl; 396 struct posix_acl *acl;
396 int error; 397 int error;
397 398
398 if (!test_opt(inode->i_sb, POSIX_ACL)) 399 if (strcmp(name, "") != 0)
400 return -EINVAL;
401 if (!test_opt(dentry->d_sb, POSIX_ACL))
399 return -EOPNOTSUPP; 402 return -EOPNOTSUPP;
400 403
401 acl = ext4_get_acl(inode, type); 404 acl = ext4_get_acl(dentry->d_inode, type);
402 if (IS_ERR(acl)) 405 if (IS_ERR(acl))
403 return PTR_ERR(acl); 406 return PTR_ERR(acl);
404 if (acl == NULL) 407 if (acl == NULL)
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
410} 413}
411 414
412static int 415static int
413ext4_xattr_get_acl_access(struct inode *inode, const char *name, 416ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
414 void *buffer, size_t size) 417 size_t size, int flags, int type)
415{
416 if (strcmp(name, "") != 0)
417 return -EINVAL;
418 return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
419}
420
421static int
422ext4_xattr_get_acl_default(struct inode *inode, const char *name,
423 void *buffer, size_t size)
424{
425 if (strcmp(name, "") != 0)
426 return -EINVAL;
427 return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
428}
429
430static int
431ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
432 size_t size)
433{ 418{
419 struct inode *inode = dentry->d_inode;
434 handle_t *handle; 420 handle_t *handle;
435 struct posix_acl *acl; 421 struct posix_acl *acl;
436 int error, retries = 0; 422 int error, retries = 0;
437 423
424 if (strcmp(name, "") != 0)
425 return -EINVAL;
438 if (!test_opt(inode->i_sb, POSIX_ACL)) 426 if (!test_opt(inode->i_sb, POSIX_ACL))
439 return -EOPNOTSUPP; 427 return -EOPNOTSUPP;
440 if (!is_owner_or_cap(inode)) 428 if (!is_owner_or_cap(inode))
@@ -466,34 +454,18 @@ release_and_out:
466 return error; 454 return error;
467} 455}
468 456
469static int
470ext4_xattr_set_acl_access(struct inode *inode, const char *name,
471 const void *value, size_t size, int flags)
472{
473 if (strcmp(name, "") != 0)
474 return -EINVAL;
475 return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
476}
477
478static int
479ext4_xattr_set_acl_default(struct inode *inode, const char *name,
480 const void *value, size_t size, int flags)
481{
482 if (strcmp(name, "") != 0)
483 return -EINVAL;
484 return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
485}
486
487struct xattr_handler ext4_xattr_acl_access_handler = { 457struct xattr_handler ext4_xattr_acl_access_handler = {
488 .prefix = POSIX_ACL_XATTR_ACCESS, 458 .prefix = POSIX_ACL_XATTR_ACCESS,
459 .flags = ACL_TYPE_ACCESS,
489 .list = ext4_xattr_list_acl_access, 460 .list = ext4_xattr_list_acl_access,
490 .get = ext4_xattr_get_acl_access, 461 .get = ext4_xattr_get_acl,
491 .set = ext4_xattr_set_acl_access, 462 .set = ext4_xattr_set_acl,
492}; 463};
493 464
494struct xattr_handler ext4_xattr_acl_default_handler = { 465struct xattr_handler ext4_xattr_acl_default_handler = {
495 .prefix = POSIX_ACL_XATTR_DEFAULT, 466 .prefix = POSIX_ACL_XATTR_DEFAULT,
467 .flags = ACL_TYPE_DEFAULT,
496 .list = ext4_xattr_list_acl_default, 468 .list = ext4_xattr_list_acl_default,
497 .get = ext4_xattr_get_acl_default, 469 .get = ext4_xattr_get_acl,
498 .set = ext4_xattr_set_acl_default, 470 .set = ext4_xattr_set_acl,
499}; 471};
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1d0418980f8..22bc7435d91 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -499,44 +499,6 @@ error_return:
499} 499}
500 500
501/** 501/**
502 * ext4_free_blocks() -- Free given blocks and update quota
503 * @handle: handle for this transaction
504 * @inode: inode
505 * @block: start physical block to free
506 * @count: number of blocks to count
507 * @metadata: Are these metadata blocks
508 */
509void ext4_free_blocks(handle_t *handle, struct inode *inode,
510 ext4_fsblk_t block, unsigned long count,
511 int metadata)
512{
513 struct super_block *sb;
514 unsigned long dquot_freed_blocks;
515
516 /* this isn't the right place to decide whether block is metadata
517 * inode.c/extents.c knows better, but for safety ... */
518 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
519 metadata = 1;
520
521 /* We need to make sure we don't reuse
522 * block released untill the transaction commit.
523 * writeback mode have weak data consistency so
524 * don't force data as metadata when freeing block
525 * for writeback mode.
526 */
527 if (metadata == 0 && !ext4_should_writeback_data(inode))
528 metadata = 1;
529
530 sb = inode->i_sb;
531
532 ext4_mb_free_blocks(handle, inode, block, count,
533 metadata, &dquot_freed_blocks);
534 if (dquot_freed_blocks)
535 vfs_dq_free_block(inode, dquot_freed_blocks);
536 return;
537}
538
539/**
540 * ext4_has_free_blocks() 502 * ext4_has_free_blocks()
541 * @sbi: in-core super block structure. 503 * @sbi: in-core super block structure.
542 * @nblocks: number of needed blocks 504 * @nblocks: number of needed blocks
@@ -761,7 +723,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
761static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, 723static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
762 ext4_group_t group) 724 ext4_group_t group)
763{ 725{
764 return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; 726 if (!ext4_bg_has_super(sb, group))
727 return 0;
728
729 if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
730 return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
731 else
732 return EXT4_SB(sb)->s_gdb_count;
765} 733}
766 734
767/** 735/**
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 50784ef0756..a60ab9aad57 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include "ext4.h" 21#include "ext4.h"
@@ -160,7 +159,7 @@ int ext4_setup_system_zone(struct super_block *sb)
160 if (ext4_bg_has_super(sb, i) && 159 if (ext4_bg_has_super(sb, i) &&
161 ((i < 5) || ((i % flex_size) == 0))) 160 ((i < 5) || ((i % flex_size) == 0)))
162 add_system_zone(sbi, ext4_group_first_block_no(sb, i), 161 add_system_zone(sbi, ext4_group_first_block_no(sb, i),
163 sbi->s_gdb_count + 1); 162 ext4_bg_num_gdb(sb, i) + 1);
164 gdp = ext4_get_group_desc(sb, i, NULL); 163 gdp = ext4_get_group_desc(sb, i, NULL);
165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); 164 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
166 if (ret) 165 if (ret)
@@ -228,6 +227,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
228 struct rb_node *n = sbi->system_blks.rb_node; 227 struct rb_node *n = sbi->system_blks.rb_node;
229 228
230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || 229 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
230 (start_blk + count < start_blk) ||
231 (start_blk + count > ext4_blocks_count(sbi->s_es))) 231 (start_blk + count > ext4_blocks_count(sbi->s_es)))
232 return 0; 232 return 0;
233 while (n) { 233 while (n) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8825515eedd..af7b62699ea 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -376,6 +376,12 @@ struct ext4_new_group_data {
376 EXT4_GET_BLOCKS_DIO_CREATE_EXT) 376 EXT4_GET_BLOCKS_DIO_CREATE_EXT)
377 377
378/* 378/*
379 * Flags used by ext4_free_blocks
380 */
381#define EXT4_FREE_BLOCKS_METADATA 0x0001
382#define EXT4_FREE_BLOCKS_FORGET 0x0002
383
384/*
379 * ioctl commands 385 * ioctl commands
380 */ 386 */
381#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS 387#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
@@ -693,16 +699,29 @@ struct ext4_inode_info {
693 unsigned int i_reserved_meta_blocks; 699 unsigned int i_reserved_meta_blocks;
694 unsigned int i_allocated_meta_blocks; 700 unsigned int i_allocated_meta_blocks;
695 unsigned short i_delalloc_reserved_flag; 701 unsigned short i_delalloc_reserved_flag;
702 sector_t i_da_metadata_calc_last_lblock;
703 int i_da_metadata_calc_len;
696 704
697 /* on-disk additional length */ 705 /* on-disk additional length */
698 __u16 i_extra_isize; 706 __u16 i_extra_isize;
699 707
700 spinlock_t i_block_reservation_lock; 708 spinlock_t i_block_reservation_lock;
709#ifdef CONFIG_QUOTA
710 /* quota space reservation, managed internally by quota code */
711 qsize_t i_reserved_quota;
712#endif
701 713
702 /* completed async DIOs that might need unwritten extents handling */ 714 /* completed async DIOs that might need unwritten extents handling */
703 struct list_head i_aio_dio_complete_list; 715 struct list_head i_aio_dio_complete_list;
704 /* current io_end structure for async DIO write*/ 716 /* current io_end structure for async DIO write*/
705 ext4_io_end_t *cur_aio_dio; 717 ext4_io_end_t *cur_aio_dio;
718
719 /*
720 * Transactions that contain inode's metadata needed to complete
721 * fsync and fdatasync, respectively.
722 */
723 tid_t i_sync_tid;
724 tid_t i_datasync_tid;
706}; 725};
707 726
708/* 727/*
@@ -750,6 +769,7 @@ struct ext4_inode_info {
750#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 769#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
751#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 770#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
752#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 771#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
772#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
753 773
754#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 774#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
755#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 775#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -1324,8 +1344,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1324 ext4_fsblk_t goal, unsigned long *count, int *errp); 1344 ext4_fsblk_t goal, unsigned long *count, int *errp);
1325extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1345extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1326extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1346extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1327extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1328 ext4_fsblk_t block, unsigned long count, int metadata);
1329extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 1347extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1330 ext4_fsblk_t block, unsigned long count); 1348 ext4_fsblk_t block, unsigned long count);
1331extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1349extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1384,16 +1402,15 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
1384extern void ext4_discard_preallocations(struct inode *); 1402extern void ext4_discard_preallocations(struct inode *);
1385extern int __init init_ext4_mballoc(void); 1403extern int __init init_ext4_mballoc(void);
1386extern void exit_ext4_mballoc(void); 1404extern void exit_ext4_mballoc(void);
1387extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1405extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1388 ext4_fsblk_t, unsigned long, int, unsigned long *); 1406 struct buffer_head *bh, ext4_fsblk_t block,
1407 unsigned long count, int flags);
1389extern int ext4_mb_add_groupinfo(struct super_block *sb, 1408extern int ext4_mb_add_groupinfo(struct super_block *sb,
1390 ext4_group_t i, struct ext4_group_desc *desc); 1409 ext4_group_t i, struct ext4_group_desc *desc);
1391extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); 1410extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
1392extern void ext4_mb_put_buddy_cache_lock(struct super_block *, 1411extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
1393 ext4_group_t, int); 1412 ext4_group_t, int);
1394/* inode.c */ 1413/* inode.c */
1395int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1396 struct buffer_head *bh, ext4_fsblk_t blocknr);
1397struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1414struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1398 ext4_lblk_t, int, int *); 1415 ext4_lblk_t, int, int *);
1399struct buffer_head *ext4_bread(handle_t *, struct inode *, 1416struct buffer_head *ext4_bread(handle_t *, struct inode *,
@@ -1424,7 +1441,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1424extern int ext4_block_truncate_page(handle_t *handle, 1441extern int ext4_block_truncate_page(handle_t *handle,
1425 struct address_space *mapping, loff_t from); 1442 struct address_space *mapping, loff_t from);
1426extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1443extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1427extern qsize_t ext4_get_reserved_space(struct inode *inode); 1444extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1428extern int flush_aio_dio_completed_IO(struct inode *inode); 1445extern int flush_aio_dio_completed_IO(struct inode *inode);
1429/* ioctl.c */ 1446/* ioctl.c */
1430extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1447extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e8..bdb6ce7e2eb 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 6a9409920de..b57e5c711b6 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -4,6 +4,8 @@
4 4
5#include "ext4_jbd2.h" 5#include "ext4_jbd2.h"
6 6
7#include <trace/events/ext4.h>
8
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 9int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 10 struct buffer_head *bh)
9{ 11{
@@ -32,35 +34,69 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
32 return err; 34 return err;
33} 35}
34 36
35int __ext4_journal_forget(const char *where, handle_t *handle, 37/*
36 struct buffer_head *bh) 38 * The ext4 forget function must perform a revoke if we are freeing data
39 * which has been journaled. Metadata (eg. indirect blocks) must be
40 * revoked in all cases.
41 *
42 * "bh" may be NULL: a metadata block may have been freed from memory
43 * but there may still be a record of it in the journal, and that record
44 * still needs to be revoked.
45 *
46 * If the handle isn't valid we're not journaling, but we still need to
47 * call into ext4_journal_revoke() to put the buffer head.
48 */
49int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
50 struct inode *inode, struct buffer_head *bh,
51 ext4_fsblk_t blocknr)
37{ 52{
38 int err = 0; 53 int err;
39 54
40 if (ext4_handle_valid(handle)) { 55 might_sleep();
41 err = jbd2_journal_forget(handle, bh); 56
42 if (err) 57 trace_ext4_forget(inode, is_metadata, blocknr);
43 ext4_journal_abort_handle(where, __func__, bh, 58 BUFFER_TRACE(bh, "enter");
44 handle, err); 59
45 } 60 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
46 else 61 "data mode %x\n",
62 bh, is_metadata, inode->i_mode,
63 test_opt(inode->i_sb, DATA_FLAGS));
64
65 /* In the no journal case, we can just do a bforget and return */
66 if (!ext4_handle_valid(handle)) {
47 bforget(bh); 67 bforget(bh);
48 return err; 68 return 0;
49} 69 }
50 70
51int __ext4_journal_revoke(const char *where, handle_t *handle, 71 /* Never use the revoke function if we are doing full data
52 ext4_fsblk_t blocknr, struct buffer_head *bh) 72 * journaling: there is no need to, and a V1 superblock won't
53{ 73 * support it. Otherwise, only skip the revoke on un-journaled
54 int err = 0; 74 * data blocks. */
55 75
56 if (ext4_handle_valid(handle)) { 76 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
57 err = jbd2_journal_revoke(handle, blocknr, bh); 77 (!is_metadata && !ext4_should_journal_data(inode))) {
58 if (err) 78 if (bh) {
59 ext4_journal_abort_handle(where, __func__, bh, 79 BUFFER_TRACE(bh, "call jbd2_journal_forget");
60 handle, err); 80 err = jbd2_journal_forget(handle, bh);
81 if (err)
82 ext4_journal_abort_handle(where, __func__, bh,
83 handle, err);
84 return err;
85 }
86 return 0;
61 } 87 }
62 else 88
63 bforget(bh); 89 /*
90 * data!=journal && (is_metadata || should_journal_data(inode))
91 */
92 BUFFER_TRACE(bh, "call jbd2_journal_revoke");
93 err = jbd2_journal_revoke(handle, blocknr, bh);
94 if (err) {
95 ext4_journal_abort_handle(where, __func__, bh, handle, err);
96 ext4_abort(inode->i_sb, __func__,
97 "error %d when attempting revoke", err);
98 }
99 BUFFER_TRACE(bh, "exit");
64 return err; 100 return err;
65} 101}
66 102
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index a2865980342..05eca817d70 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -49,7 +49,7 @@
49 49
50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ 50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \ 51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 52 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
53 53
54/* 54/*
55 * Define the number of metadata blocks we need to account to modify data. 55 * Define the number of metadata blocks we need to account to modify data.
@@ -57,7 +57,7 @@
57 * This include super block, inode block, quota blocks and xattr blocks 57 * This include super block, inode block, quota blocks and xattr blocks
58 */ 58 */
59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ 59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
60 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 60 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
61 61
62/* Delete operations potentially hit one directory's namespace plus an 62/* Delete operations potentially hit one directory's namespace plus an
63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be 63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
@@ -92,6 +92,7 @@
92 * but inode, sb and group updates are done only once */ 92 * but inode, sb and group updates are done only once */
93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ 93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
94 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) 94 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
95
95#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ 96#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
96 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) 97 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
97#else 98#else
@@ -99,6 +100,9 @@
99#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 100#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
100#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 101#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
101#endif 102#endif
103#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
102 106
103int 107int
104ext4_mark_iloc_dirty(handle_t *handle, 108ext4_mark_iloc_dirty(handle_t *handle,
@@ -116,12 +120,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
116int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); 120int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
117 121
118/* 122/*
119 * Wrapper functions with which ext4 calls into JBD. The intent here is 123 * Wrapper functions with which ext4 calls into JBD.
120 * to allow these to be turned into appropriate stubs so ext4 can control
121 * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
122 * been done yet.
123 */ 124 */
124
125void ext4_journal_abort_handle(const char *caller, const char *err_fn, 125void ext4_journal_abort_handle(const char *caller, const char *err_fn,
126 struct buffer_head *bh, handle_t *handle, int err); 126 struct buffer_head *bh, handle_t *handle, int err);
127 127
@@ -131,13 +131,9 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
131int __ext4_journal_get_write_access(const char *where, handle_t *handle, 131int __ext4_journal_get_write_access(const char *where, handle_t *handle,
132 struct buffer_head *bh); 132 struct buffer_head *bh);
133 133
134/* When called with an invalid handle, this will still do a put on the BH */ 134int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
135int __ext4_journal_forget(const char *where, handle_t *handle, 135 struct inode *inode, struct buffer_head *bh,
136 struct buffer_head *bh); 136 ext4_fsblk_t blocknr);
137
138/* When called with an invalid handle, this will still do a put on the BH */
139int __ext4_journal_revoke(const char *where, handle_t *handle,
140 ext4_fsblk_t blocknr, struct buffer_head *bh);
141 137
142int __ext4_journal_get_create_access(const char *where, 138int __ext4_journal_get_create_access(const char *where,
143 handle_t *handle, struct buffer_head *bh); 139 handle_t *handle, struct buffer_head *bh);
@@ -149,12 +145,11 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
149 __ext4_journal_get_undo_access(__func__, (handle), (bh)) 145 __ext4_journal_get_undo_access(__func__, (handle), (bh))
150#define ext4_journal_get_write_access(handle, bh) \ 146#define ext4_journal_get_write_access(handle, bh) \
151 __ext4_journal_get_write_access(__func__, (handle), (bh)) 147 __ext4_journal_get_write_access(__func__, (handle), (bh))
152#define ext4_journal_revoke(handle, blocknr, bh) \ 148#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
153 __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) 149 __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
150 (block_nr))
154#define ext4_journal_get_create_access(handle, bh) \ 151#define ext4_journal_get_create_access(handle, bh) \
155 __ext4_journal_get_create_access(__func__, (handle), (bh)) 152 __ext4_journal_get_create_access(__func__, (handle), (bh))
156#define ext4_journal_forget(handle, bh) \
157 __ext4_journal_forget(__func__, (handle), (bh))
158#define ext4_handle_dirty_metadata(handle, inode, bh) \ 153#define ext4_handle_dirty_metadata(handle, inode, bh) \
159 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) 154 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
160 155
@@ -254,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
254 return 0; 249 return 0;
255} 250}
256 251
252static inline void ext4_update_inode_fsync_trans(handle_t *handle,
253 struct inode *inode,
254 int datasync)
255{
256 struct ext4_inode_info *ei = EXT4_I(inode);
257
258 if (ext4_handle_valid(handle)) {
259 ei->i_sync_tid = handle->h_transaction->t_tid;
260 if (datasync)
261 ei->i_datasync_tid = handle->h_transaction->t_tid;
262 }
263}
264
257/* super.c */ 265/* super.c */
258int ext4_force_commit(struct super_block *sb); 266int ext4_force_commit(struct super_block *sb);
259 267
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 715264b4bae..7d7b74e9468 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 296 * to allocate @blocks
297 * Worse case is one block per extent 297 * Worse case is one block per extent
298 */ 298 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 300{
301 int lcap, icap, rcap, leafs, idxs, num; 301 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 302 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 303
308 /* number of new leaf blocks needed */ 304 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 305 / sizeof(struct ext4_extent_idx));
310 306
311 /* 307 /*
312 * Worse case, we need separate index block(s) 308 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 309 * previous da block, it can share index blocks with the
310 * previous block, so we only need to allocate a new index
311 * block every idxs leaf blocks. At ldxs**2 blocks, we need
312 * an additional index block, and at ldxs**3 blocks, yet
313 * another index blocks.
314 */ 314 */
315 idxs = (leafs + icap - 1) / icap; 315 if (ei->i_da_metadata_calc_len &&
316 do { 316 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 317 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 318 num++;
319 } while (idxs > rcap); 319 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
320 num++;
321 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
322 num++;
323 ei->i_da_metadata_calc_len = 0;
324 } else
325 ei->i_da_metadata_calc_len++;
326 ei->i_da_metadata_calc_last_lblock++;
327 return num;
328 }
320 329
321 return num; 330 /*
331 * In the worst case we need a new set of index blocks at
332 * every level of the inode's extent tree.
333 */
334 ei->i_da_metadata_calc_len = 1;
335 ei->i_da_metadata_calc_last_lblock = lblock;
336 return ext_depth(inode) + 1;
322} 337}
323 338
324static int 339static int
@@ -1007,7 +1022,8 @@ cleanup:
1007 for (i = 0; i < depth; i++) { 1022 for (i = 0; i < depth; i++) {
1008 if (!ablocks[i]) 1023 if (!ablocks[i])
1009 continue; 1024 continue;
1010 ext4_free_blocks(handle, inode, ablocks[i], 1, 1); 1025 ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
1026 EXT4_FREE_BLOCKS_METADATA);
1011 } 1027 }
1012 } 1028 }
1013 kfree(ablocks); 1029 kfree(ablocks);
@@ -1761,7 +1777,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1761 while (block < last && block != EXT_MAX_BLOCK) { 1777 while (block < last && block != EXT_MAX_BLOCK) {
1762 num = last - block; 1778 num = last - block;
1763 /* find extent for this block */ 1779 /* find extent for this block */
1780 down_read(&EXT4_I(inode)->i_data_sem);
1764 path = ext4_ext_find_extent(inode, block, path); 1781 path = ext4_ext_find_extent(inode, block, path);
1782 up_read(&EXT4_I(inode)->i_data_sem);
1765 if (IS_ERR(path)) { 1783 if (IS_ERR(path)) {
1766 err = PTR_ERR(path); 1784 err = PTR_ERR(path);
1767 path = NULL; 1785 path = NULL;
@@ -1957,7 +1975,6 @@ errout:
1957static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 1975static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1958 struct ext4_ext_path *path) 1976 struct ext4_ext_path *path)
1959{ 1977{
1960 struct buffer_head *bh;
1961 int err; 1978 int err;
1962 ext4_fsblk_t leaf; 1979 ext4_fsblk_t leaf;
1963 1980
@@ -1973,9 +1990,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1973 if (err) 1990 if (err)
1974 return err; 1991 return err;
1975 ext_debug("index is empty, remove it, free block %llu\n", leaf); 1992 ext_debug("index is empty, remove it, free block %llu\n", leaf);
1976 bh = sb_find_get_block(inode->i_sb, leaf); 1993 ext4_free_blocks(handle, inode, 0, leaf, 1,
1977 ext4_forget(handle, 1, inode, bh, leaf); 1994 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1978 ext4_free_blocks(handle, inode, leaf, 1, 1);
1979 return err; 1995 return err;
1980} 1996}
1981 1997
@@ -2042,12 +2058,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2042 struct ext4_extent *ex, 2058 struct ext4_extent *ex,
2043 ext4_lblk_t from, ext4_lblk_t to) 2059 ext4_lblk_t from, ext4_lblk_t to)
2044{ 2060{
2045 struct buffer_head *bh;
2046 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2061 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2047 int i, metadata = 0; 2062 int flags = EXT4_FREE_BLOCKS_FORGET;
2048 2063
2049 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2064 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2050 metadata = 1; 2065 flags |= EXT4_FREE_BLOCKS_METADATA;
2051#ifdef EXTENTS_STATS 2066#ifdef EXTENTS_STATS
2052 { 2067 {
2053 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2068 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2087,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2072 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2087 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2073 start = ext_pblock(ex) + ee_len - num; 2088 start = ext_pblock(ex) + ee_len - num;
2074 ext_debug("free last %u blocks starting %llu\n", num, start); 2089 ext_debug("free last %u blocks starting %llu\n", num, start);
2075 for (i = 0; i < num; i++) { 2090 ext4_free_blocks(handle, inode, 0, start, num, flags);
2076 bh = sb_find_get_block(inode->i_sb, start + i);
2077 ext4_forget(handle, 0, inode, bh, start + i);
2078 }
2079 ext4_free_blocks(handle, inode, start, num, metadata);
2080 } else if (from == le32_to_cpu(ex->ee_block) 2091 } else if (from == le32_to_cpu(ex->ee_block)
2081 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2092 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2082 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2093 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2167,7 +2178,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2167 correct_index = 1; 2178 correct_index = 1;
2168 credits += (ext_depth(inode)) + 1; 2179 credits += (ext_depth(inode)) + 1;
2169 } 2180 }
2170 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 2181 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2171 2182
2172 err = ext4_ext_truncate_extend_restart(handle, inode, credits); 2183 err = ext4_ext_truncate_extend_restart(handle, inode, credits);
2173 if (err) 2184 if (err)
@@ -3027,6 +3038,14 @@ out:
3027 return err; 3038 return err;
3028} 3039}
3029 3040
3041static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3042 sector_t block, int count)
3043{
3044 int i;
3045 for (i = 0; i < count; i++)
3046 unmap_underlying_metadata(bdev, block + i);
3047}
3048
3030static int 3049static int
3031ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3050ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3032 ext4_lblk_t iblock, unsigned int max_blocks, 3051 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3064,6 +3083,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3064 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { 3083 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
3065 ret = ext4_convert_unwritten_extents_dio(handle, inode, 3084 ret = ext4_convert_unwritten_extents_dio(handle, inode,
3066 path); 3085 path);
3086 if (ret >= 0)
3087 ext4_update_inode_fsync_trans(handle, inode, 1);
3067 goto out2; 3088 goto out2;
3068 } 3089 }
3069 /* buffered IO case */ 3090 /* buffered IO case */
@@ -3091,6 +3112,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3091 ret = ext4_ext_convert_to_initialized(handle, inode, 3112 ret = ext4_ext_convert_to_initialized(handle, inode,
3092 path, iblock, 3113 path, iblock,
3093 max_blocks); 3114 max_blocks);
3115 if (ret >= 0)
3116 ext4_update_inode_fsync_trans(handle, inode, 1);
3094out: 3117out:
3095 if (ret <= 0) { 3118 if (ret <= 0) {
3096 err = ret; 3119 err = ret;
@@ -3098,6 +3121,18 @@ out:
3098 } else 3121 } else
3099 allocated = ret; 3122 allocated = ret;
3100 set_buffer_new(bh_result); 3123 set_buffer_new(bh_result);
3124 /*
3125 * if we allocated more blocks than requested
3126 * we need to make sure we unmap the extra block
3127 * allocated. The actual needed block will get
3128 * unmapped later when we find the buffer_head marked
3129 * new.
3130 */
3131 if (allocated > max_blocks) {
3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3133 newblock + max_blocks,
3134 allocated - max_blocks);
3135 }
3101map_out: 3136map_out:
3102 set_buffer_mapped(bh_result); 3137 set_buffer_mapped(bh_result);
3103out1: 3138out1:
@@ -3190,7 +3225,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3225 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3226 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3227 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3228 if (path[depth].p_ext == NULL && depth != 0) {
3229 ext4_error(inode->i_sb, __func__, "bad extent address "
3230 "inode: %lu, iblock: %d, depth: %d",
3231 inode->i_ino, iblock, depth);
3232 err = -EIO;
3233 goto out2;
3234 }
3194 eh = path[depth].p_hdr; 3235 eh = path[depth].p_hdr;
3195 3236
3196 ex = path[depth].p_ext; 3237 ex = path[depth].p_ext;
@@ -3319,8 +3360,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3319 /* not a good idea to call discard here directly, 3360 /* not a good idea to call discard here directly,
3320 * but otherwise we'd need to call it every free() */ 3361 * but otherwise we'd need to call it every free() */
3321 ext4_discard_preallocations(inode); 3362 ext4_discard_preallocations(inode);
3322 ext4_free_blocks(handle, inode, ext_pblock(&newex), 3363 ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
3323 ext4_ext_get_actual_len(&newex), 0); 3364 ext4_ext_get_actual_len(&newex), 0);
3324 goto out2; 3365 goto out2;
3325 } 3366 }
3326 3367
@@ -3329,10 +3370,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3329 allocated = ext4_ext_get_actual_len(&newex); 3370 allocated = ext4_ext_get_actual_len(&newex);
3330 set_buffer_new(bh_result); 3371 set_buffer_new(bh_result);
3331 3372
3332 /* Cache only when it is _not_ an uninitialized extent */ 3373 /*
3333 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) 3374 * Cache the extent and update transaction to commit on fdatasync only
3375 * when it is _not_ an uninitialized extent.
3376 */
3377 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
3334 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 3378 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
3335 EXT4_EXT_CACHE_EXTENT); 3379 EXT4_EXT_CACHE_EXTENT);
3380 ext4_update_inode_fsync_trans(handle, inode, 1);
3381 } else
3382 ext4_update_inode_fsync_trans(handle, inode, 0);
3336out: 3383out:
3337 if (allocated > max_blocks) 3384 if (allocated > max_blocks)
3338 allocated = max_blocks; 3385 allocated = max_blocks;
@@ -3720,10 +3767,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3720 * Walk the extent tree gathering extent information. 3767 * Walk the extent tree gathering extent information.
3721 * ext4_ext_fiemap_cb will push extents back to user. 3768 * ext4_ext_fiemap_cb will push extents back to user.
3722 */ 3769 */
3723 down_read(&EXT4_I(inode)->i_data_sem);
3724 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3770 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3725 ext4_ext_fiemap_cb, fieinfo); 3771 ext4_ext_fiemap_cb, fieinfo);
3726 up_read(&EXT4_I(inode)->i_data_sem);
3727 } 3772 }
3728 3773
3729 return error; 3774 return error;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2b1531266ee..98bd140aad0 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -51,25 +51,30 @@
51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) 51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
52{ 52{
53 struct inode *inode = dentry->d_inode; 53 struct inode *inode = dentry->d_inode;
54 struct ext4_inode_info *ei = EXT4_I(inode);
54 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 55 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
55 int err, ret = 0; 56 int ret;
57 tid_t commit_tid;
56 58
57 J_ASSERT(ext4_journal_current_handle() == NULL); 59 J_ASSERT(ext4_journal_current_handle() == NULL);
58 60
59 trace_ext4_sync_file(file, dentry, datasync); 61 trace_ext4_sync_file(file, dentry, datasync);
60 62
63 if (inode->i_sb->s_flags & MS_RDONLY)
64 return 0;
65
61 ret = flush_aio_dio_completed_IO(inode); 66 ret = flush_aio_dio_completed_IO(inode);
62 if (ret < 0) 67 if (ret < 0)
63 goto out; 68 return ret;
69
70 if (!journal)
71 return simple_fsync(file, dentry, datasync);
72
64 /* 73 /*
65 * data=writeback: 74 * data=writeback,ordered:
66 * The caller's filemap_fdatawrite()/wait will sync the data. 75 * The caller's filemap_fdatawrite()/wait will sync the data.
67 * sync_inode() will sync the metadata 76 * Metadata is in the journal, we wait for proper transaction to
68 * 77 * commit here.
69 * data=ordered:
70 * The caller's filemap_fdatawrite() will write the data and
71 * sync_inode() will write the inode if it is dirty. Then the caller's
72 * filemap_fdatawait() will wait on the pages.
73 * 78 *
74 * data=journal: 79 * data=journal:
75 * filemap_fdatawrite won't do anything (the buffers are clean). 80 * filemap_fdatawrite won't do anything (the buffers are clean).
@@ -79,32 +84,25 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
79 * (they were dirtied by commit). But that's OK - the blocks are 84 * (they were dirtied by commit). But that's OK - the blocks are
80 * safe in-journal, which is all fsync() needs to ensure. 85 * safe in-journal, which is all fsync() needs to ensure.
81 */ 86 */
82 if (ext4_should_journal_data(inode)) { 87 if (ext4_should_journal_data(inode))
83 ret = ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
84 goto out;
85 }
86 89
87 if (!journal) 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
88 ret = sync_mapping_buffers(inode->i_mapping); 91 if (jbd2_log_start_commit(journal, commit_tid)) {
89 92 /*
90 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 93 * When the journal is on a different device than the
91 goto out; 94 * fs data disk, we need to issue the barrier in
92 95 * writeback mode. (In ordered mode, the jbd2 layer
93 /* 96 * will take care of issuing the barrier. In
94 * The VFS has written the file data. If the inode is unaltered 97 * data=journal, all of the data blocks are written to
95 * then we need not start a commit. 98 * the journal device.)
96 */ 99 */
97 if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { 100 if (ext4_should_writeback_data(inode) &&
98 struct writeback_control wbc = { 101 (journal->j_fs_dev != journal->j_dev) &&
99 .sync_mode = WB_SYNC_ALL, 102 (journal->j_flags & JBD2_BARRIER))
100 .nr_to_write = 0, /* sys_fsync did this */ 103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
101 }; 104 jbd2_log_wait_commit(journal, commit_tid);
102 err = sync_inode(inode, &wbc); 105 } else if (journal->j_flags & JBD2_BARRIER)
103 if (ret == 0)
104 ret = err;
105 }
106out:
107 if (journal && (journal->j_flags & JBD2_BARRIER))
108 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
109 return ret; 107 return ret;
110} 108}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2c8caa51add..c818972c830 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,58 +71,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
71} 71}
72 72
73/* 73/*
74 * The ext4 forget function must perform a revoke if we are freeing data
75 * which has been journaled. Metadata (eg. indirect blocks) must be
76 * revoked in all cases.
77 *
78 * "bh" may be NULL: a metadata block may have been freed from memory
79 * but there may still be a record of it in the journal, and that record
80 * still needs to be revoked.
81 *
82 * If the handle isn't valid we're not journaling, but we still need to
83 * call into ext4_journal_revoke() to put the buffer head.
84 */
85int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
86 struct buffer_head *bh, ext4_fsblk_t blocknr)
87{
88 int err;
89
90 might_sleep();
91
92 BUFFER_TRACE(bh, "enter");
93
94 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
95 "data mode %x\n",
96 bh, is_metadata, inode->i_mode,
97 test_opt(inode->i_sb, DATA_FLAGS));
98
99 /* Never use the revoke function if we are doing full data
100 * journaling: there is no need to, and a V1 superblock won't
101 * support it. Otherwise, only skip the revoke on un-journaled
102 * data blocks. */
103
104 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
105 (!is_metadata && !ext4_should_journal_data(inode))) {
106 if (bh) {
107 BUFFER_TRACE(bh, "call jbd2_journal_forget");
108 return ext4_journal_forget(handle, bh);
109 }
110 return 0;
111 }
112
113 /*
114 * data!=journal && (is_metadata || should_journal_data(inode))
115 */
116 BUFFER_TRACE(bh, "call ext4_journal_revoke");
117 err = ext4_journal_revoke(handle, blocknr, bh);
118 if (err)
119 ext4_abort(inode->i_sb, __func__,
120 "error %d when attempting revoke", err);
121 BUFFER_TRACE(bh, "exit");
122 return err;
123}
124
125/*
126 * Work out how many blocks we need to proceed with the next chunk of a 74 * Work out how many blocks we need to proceed with the next chunk of a
127 * truncate transaction. 75 * truncate transaction.
128 */ 76 */
@@ -721,7 +669,7 @@ allocated:
721 return ret; 669 return ret;
722failed_out: 670failed_out:
723 for (i = 0; i < index; i++) 671 for (i = 0; i < index; i++)
724 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 672 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
725 return ret; 673 return ret;
726} 674}
727 675
@@ -817,14 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
817 return err; 765 return err;
818failed: 766failed:
819 /* Allocation failed, free what we already allocated */ 767 /* Allocation failed, free what we already allocated */
768 ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
820 for (i = 1; i <= n ; i++) { 769 for (i = 1; i <= n ; i++) {
821 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); 770 /*
822 ext4_journal_forget(handle, branch[i].bh); 771 * branch[i].bh is newly allocated, so there is no
772 * need to revoke the block, which is why we don't
773 * need to set EXT4_FREE_BLOCKS_METADATA.
774 */
775 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
776 EXT4_FREE_BLOCKS_FORGET);
823 } 777 }
824 for (i = 0; i < indirect_blks; i++) 778 for (i = n+1; i < indirect_blks; i++)
825 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 779 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
826 780
827 ext4_free_blocks(handle, inode, new_blocks[i], num, 0); 781 ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
828 782
829 return err; 783 return err;
830} 784}
@@ -903,12 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
903 857
904err_out: 858err_out:
905 for (i = 1; i <= num; i++) { 859 for (i = 1; i <= num; i++) {
906 BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); 860 /*
907 ext4_journal_forget(handle, where[i].bh); 861 * branch[i].bh is newly allocated, so there is no
908 ext4_free_blocks(handle, inode, 862 * need to revoke the block, which is why we don't
909 le32_to_cpu(where[i-1].key), 1, 0); 863 * need to set EXT4_FREE_BLOCKS_METADATA.
864 */
865 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
866 EXT4_FREE_BLOCKS_FORGET);
910 } 867 }
911 ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); 868 ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
869 blks, 0);
912 870
913 return err; 871 return err;
914} 872}
@@ -1021,10 +979,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
1021 if (!err) 979 if (!err)
1022 err = ext4_splice_branch(handle, inode, iblock, 980 err = ext4_splice_branch(handle, inode, iblock,
1023 partial, indirect_blks, count); 981 partial, indirect_blks, count);
1024 else 982 if (err)
1025 goto cleanup; 983 goto cleanup;
1026 984
1027 set_buffer_new(bh_result); 985 set_buffer_new(bh_result);
986
987 ext4_update_inode_fsync_trans(handle, inode, 1);
1028got_it: 988got_it:
1029 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 989 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
1030 if (count > blocks_to_boundary) 990 if (count > blocks_to_boundary)
@@ -1043,83 +1003,94 @@ out:
1043 return err; 1003 return err;
1044} 1004}
1045 1005
1046qsize_t ext4_get_reserved_space(struct inode *inode) 1006#ifdef CONFIG_QUOTA
1007qsize_t *ext4_get_reserved_space(struct inode *inode)
1047{ 1008{
1048 unsigned long long total; 1009 return &EXT4_I(inode)->i_reserved_quota;
1049
1050 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1051 total = EXT4_I(inode)->i_reserved_data_blocks +
1052 EXT4_I(inode)->i_reserved_meta_blocks;
1053 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1054
1055 return total;
1056} 1010}
1011#endif
1012
1057/* 1013/*
1058 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1059 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1060 */ 1016 */
1061static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1062{ 1019{
1063 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1064 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1065 1022 int blk_bits;
1066 /* number of new indirect blocks needed */
1067 ind_blks = (blocks + icap - 1) / icap;
1068 1023
1069 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1070 1026
1071 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1072 1028
1073 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1074} 1038}
1075 1039
1076/* 1040/*
1077 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1078 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1079 */ 1043 */
1080static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1081{ 1045{
1082 if (!blocks)
1083 return 0;
1084
1085 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1086 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1087 1048
1088 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1089} 1050}
1090 1051
1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1091static void ext4_da_update_reserve_space(struct inode *inode, int used) 1056static void ext4_da_update_reserve_space(struct inode *inode, int used)
1092{ 1057{
1093 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1058 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1094 int total, mdb, mdb_free; 1059 struct ext4_inode_info *ei = EXT4_I(inode);
1095 1060 int mdb_free = 0;
1096 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1061
1097 /* recalculate the number of metablocks still need to be reserved */ 1062 spin_lock(&ei->i_block_reservation_lock);
1098 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1063 if (unlikely(used > ei->i_reserved_data_blocks)) {
1099 mdb = ext4_calc_metadata_amount(inode, total); 1064 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1100 1065 "with only %d reserved data blocks\n",
1101 /* figure out how many metablocks to release */ 1066 __func__, inode->i_ino, used,
1102 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1067 ei->i_reserved_data_blocks);
1103 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1068 WARN_ON(1);
1104 1069 used = ei->i_reserved_data_blocks;
1105 if (mdb_free) { 1070 }
1106 /* Account for allocated meta_blocks */ 1071
1107 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; 1072 /* Update per-inode reservations */
1108 1073 ei->i_reserved_data_blocks -= used;
1109 /* update fs dirty blocks counter */ 1074 used += ei->i_allocated_meta_blocks;
1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1076 ei->i_allocated_meta_blocks = 0;
1077 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1078
1079 if (ei->i_reserved_data_blocks == 0) {
1080 /*
1081 * We can release all of the reserved metadata blocks
1082 * only when we have written all of the delayed
1083 * allocation blocks.
1084 */
1085 mdb_free = ei->i_reserved_meta_blocks;
1086 ei->i_reserved_meta_blocks = 0;
1087 ei->i_da_metadata_calc_len = 0;
1110 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1088 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1111 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1112 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1113 } 1089 }
1114
1115 /* update per-inode reservations */
1116 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1117 EXT4_I(inode)->i_reserved_data_blocks -= used;
1118 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1119 1091
1120 /* 1092 /* Update quota subsystem */
1121 * free those over-booking quota for metadata blocks 1093 vfs_dq_claim_block(inode, used);
1122 */
1123 if (mdb_free) 1094 if (mdb_free)
1124 vfs_dq_release_reservation_block(inode, mdb_free); 1095 vfs_dq_release_reservation_block(inode, mdb_free);
1125 1096
@@ -1128,7 +1099,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1128 * there aren't any writers on the inode, we can discard the 1099 * there aren't any writers on the inode, we can discard the
1129 * inode's preallocations. 1100 * inode's preallocations.
1130 */ 1101 */
1131 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1102 if ((ei->i_reserved_data_blocks == 0) &&
1103 (atomic_read(&inode->i_writecount) == 0))
1132 ext4_discard_preallocations(inode); 1104 ext4_discard_preallocations(inode);
1133} 1105}
1134 1106
@@ -1534,6 +1506,16 @@ static int do_journal_get_write_access(handle_t *handle,
1534 return ext4_journal_get_write_access(handle, bh); 1506 return ext4_journal_get_write_access(handle, bh);
1535} 1507}
1536 1508
1509/*
1510 * Truncate blocks that were not used by write. We have to truncate the
1511 * pagecache as well so that corresponding buffers get properly unmapped.
1512 */
1513static void ext4_truncate_failed_write(struct inode *inode)
1514{
1515 truncate_inode_pages(inode->i_mapping, inode->i_size);
1516 ext4_truncate(inode);
1517}
1518
1537static int ext4_write_begin(struct file *file, struct address_space *mapping, 1519static int ext4_write_begin(struct file *file, struct address_space *mapping,
1538 loff_t pos, unsigned len, unsigned flags, 1520 loff_t pos, unsigned len, unsigned flags,
1539 struct page **pagep, void **fsdata) 1521 struct page **pagep, void **fsdata)
@@ -1599,7 +1581,7 @@ retry:
1599 1581
1600 ext4_journal_stop(handle); 1582 ext4_journal_stop(handle);
1601 if (pos + len > inode->i_size) { 1583 if (pos + len > inode->i_size) {
1602 ext4_truncate(inode); 1584 ext4_truncate_failed_write(inode);
1603 /* 1585 /*
1604 * If truncate failed early the inode might 1586 * If truncate failed early the inode might
1605 * still be on the orphan list; we need to 1587 * still be on the orphan list; we need to
@@ -1709,7 +1691,7 @@ static int ext4_ordered_write_end(struct file *file,
1709 ret = ret2; 1691 ret = ret2;
1710 1692
1711 if (pos + len > inode->i_size) { 1693 if (pos + len > inode->i_size) {
1712 ext4_truncate(inode); 1694 ext4_truncate_failed_write(inode);
1713 /* 1695 /*
1714 * If truncate failed early the inode might still be 1696 * If truncate failed early the inode might still be
1715 * on the orphan list; we need to make sure the inode 1697 * on the orphan list; we need to make sure the inode
@@ -1751,7 +1733,7 @@ static int ext4_writeback_write_end(struct file *file,
1751 ret = ret2; 1733 ret = ret2;
1752 1734
1753 if (pos + len > inode->i_size) { 1735 if (pos + len > inode->i_size) {
1754 ext4_truncate(inode); 1736 ext4_truncate_failed_write(inode);
1755 /* 1737 /*
1756 * If truncate failed early the inode might still be 1738 * If truncate failed early the inode might still be
1757 * on the orphan list; we need to make sure the inode 1739 * on the orphan list; we need to make sure the inode
@@ -1814,7 +1796,7 @@ static int ext4_journalled_write_end(struct file *file,
1814 if (!ret) 1796 if (!ret)
1815 ret = ret2; 1797 ret = ret2;
1816 if (pos + len > inode->i_size) { 1798 if (pos + len > inode->i_size) {
1817 ext4_truncate(inode); 1799 ext4_truncate_failed_write(inode);
1818 /* 1800 /*
1819 * If truncate failed early the inode might still be 1801 * If truncate failed early the inode might still be
1820 * on the orphan list; we need to make sure the inode 1802 * on the orphan list; we need to make sure the inode
@@ -1827,11 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
1827 return ret ? ret : copied; 1809 return ret ? ret : copied;
1828} 1810}
1829 1811
1830static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1812/*
1813 * Reserve a single block located at lblock
1814 */
1815static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1831{ 1816{
1832 int retries = 0; 1817 int retries = 0;
1833 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1818 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1834 unsigned long md_needed, mdblocks, total = 0; 1819 struct ext4_inode_info *ei = EXT4_I(inode);
1820 unsigned long md_needed, md_reserved;
1835 1821
1836 /* 1822 /*
1837 * recalculate the amount of metadata blocks to reserve 1823 * recalculate the amount of metadata blocks to reserve
@@ -1839,86 +1825,90 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1839 * worse case is one extent per block 1825 * worse case is one extent per block
1840 */ 1826 */
1841repeat: 1827repeat:
1842 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1828 spin_lock(&ei->i_block_reservation_lock);
1843 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1829 md_reserved = ei->i_reserved_meta_blocks;
1844 mdblocks = ext4_calc_metadata_amount(inode, total); 1830 md_needed = ext4_calc_metadata_amount(inode, lblock);
1845 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1831 spin_unlock(&ei->i_block_reservation_lock);
1846
1847 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1848 total = md_needed + nrblocks;
1849 1832
1850 /* 1833 /*
1851 * Make quota reservation here to prevent quota overflow 1834 * Make quota reservation here to prevent quota overflow
1852 * later. Real quota accounting is done at pages writeout 1835 * later. Real quota accounting is done at pages writeout
1853 * time. 1836 * time.
1854 */ 1837 */
1855 if (vfs_dq_reserve_block(inode, total)) { 1838 if (vfs_dq_reserve_block(inode, md_needed + 1)) {
1856 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1839 /*
1840 * We tend to badly over-estimate the amount of
1841 * metadata blocks which are needed, so if we have
1842 * reserved any metadata blocks, try to force out the
1843 * inode and see if we have any better luck.
1844 */
1845 if (md_reserved && retries++ <= 3)
1846 goto retry;
1857 return -EDQUOT; 1847 return -EDQUOT;
1858 } 1848 }
1859 1849
1860 if (ext4_claim_free_blocks(sbi, total)) { 1850 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1861 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1851 vfs_dq_release_reservation_block(inode, md_needed + 1);
1862 vfs_dq_release_reservation_block(inode, total);
1863 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1852 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1853 retry:
1854 if (md_reserved)
1855 write_inode_now(inode, (retries == 3));
1864 yield(); 1856 yield();
1865 goto repeat; 1857 goto repeat;
1866 } 1858 }
1867 return -ENOSPC; 1859 return -ENOSPC;
1868 } 1860 }
1869 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1861 spin_lock(&ei->i_block_reservation_lock);
1870 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; 1862 ei->i_reserved_data_blocks++;
1863 ei->i_reserved_meta_blocks += md_needed;
1864 spin_unlock(&ei->i_block_reservation_lock);
1871 1865
1872 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1873 return 0; /* success */ 1866 return 0; /* success */
1874} 1867}
1875 1868
1876static void ext4_da_release_space(struct inode *inode, int to_free) 1869static void ext4_da_release_space(struct inode *inode, int to_free)
1877{ 1870{
1878 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1871 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1879 int total, mdb, mdb_free, release; 1872 struct ext4_inode_info *ei = EXT4_I(inode);
1880 1873
1881 if (!to_free) 1874 if (!to_free)
1882 return; /* Nothing to release, exit */ 1875 return; /* Nothing to release, exit */
1883 1876
1884 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1877 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1885 1878
1886 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1879 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1887 /* 1880 /*
1888 * if there is no reserved blocks, but we try to free some 1881 * if there aren't enough reserved blocks, then the
1889 * then the counter is messed up somewhere. 1882 * counter is messed up somewhere. Since this
1890 * but since this function is called from invalidate 1883 * function is called from invalidate page, it's
1891 * page, it's harmless to return without any action 1884 * harmless to return without any action.
1892 */ 1885 */
1893 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1886 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1894 "blocks for inode %lu, but there is no reserved " 1887 "ino %lu, to_free %d with only %d reserved "
1895 "data blocks\n", to_free, inode->i_ino); 1888 "data blocks\n", inode->i_ino, to_free,
1896 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1889 ei->i_reserved_data_blocks);
1897 return; 1890 WARN_ON(1);
1891 to_free = ei->i_reserved_data_blocks;
1898 } 1892 }
1893 ei->i_reserved_data_blocks -= to_free;
1899 1894
1900 /* recalculate the number of metablocks still need to be reserved */ 1895 if (ei->i_reserved_data_blocks == 0) {
1901 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1896 /*
1902 mdb = ext4_calc_metadata_amount(inode, total); 1897 * We can release all of the reserved metadata blocks
1903 1898 * only when we have written all of the delayed
1904 /* figure out how many metablocks to release */ 1899 * allocation blocks.
1905 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1900 */
1906 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1901 to_free += ei->i_reserved_meta_blocks;
1907 1902 ei->i_reserved_meta_blocks = 0;
1908 release = to_free + mdb_free; 1903 ei->i_da_metadata_calc_len = 0;
1909 1904 }
1910 /* update fs dirty blocks counter for truncate case */
1911 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1912 1905
1913 /* update per-inode reservations */ 1906 /* update fs dirty blocks counter */
1914 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1907 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1915 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1916 1908
1917 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1918 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1919 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1909 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1920 1910
1921 vfs_dq_release_reservation_block(inode, release); 1911 vfs_dq_release_reservation_block(inode, to_free);
1922} 1912}
1923 1913
1924static void ext4_da_page_release_reservation(struct page *page, 1914static void ext4_da_page_release_reservation(struct page *page,
@@ -2524,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2524 * XXX: __block_prepare_write() unmaps passed block, 2514 * XXX: __block_prepare_write() unmaps passed block,
2525 * is it OK? 2515 * is it OK?
2526 */ 2516 */
2527 ret = ext4_da_reserve_space(inode, 1); 2517 ret = ext4_da_reserve_space(inode, iblock);
2528 if (ret) 2518 if (ret)
2529 /* not enough space to reserve */ 2519 /* not enough space to reserve */
2530 return ret; 2520 return ret;
@@ -2600,7 +2590,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
2600} 2590}
2601 2591
2602static int __ext4_journalled_writepage(struct page *page, 2592static int __ext4_journalled_writepage(struct page *page,
2603 struct writeback_control *wbc,
2604 unsigned int len) 2593 unsigned int len)
2605{ 2594{
2606 struct address_space *mapping = page->mapping; 2595 struct address_space *mapping = page->mapping;
@@ -2758,7 +2747,7 @@ static int ext4_writepage(struct page *page,
2758 * doesn't seem much point in redirtying the page here. 2747 * doesn't seem much point in redirtying the page here.
2759 */ 2748 */
2760 ClearPageChecked(page); 2749 ClearPageChecked(page);
2761 return __ext4_journalled_writepage(page, wbc, len); 2750 return __ext4_journalled_writepage(page, len);
2762 } 2751 }
2763 2752
2764 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2753 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -2788,7 +2777,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2788 * number of contiguous block. So we will limit 2777 * number of contiguous block. So we will limit
2789 * number of contiguous block to a sane value 2778 * number of contiguous block to a sane value
2790 */ 2779 */
2791 if (!(inode->i_flags & EXT4_EXTENTS_FL) && 2780 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
2792 (max_blocks > EXT4_MAX_TRANS_DATA)) 2781 (max_blocks > EXT4_MAX_TRANS_DATA))
2793 max_blocks = EXT4_MAX_TRANS_DATA; 2782 max_blocks = EXT4_MAX_TRANS_DATA;
2794 2783
@@ -2933,7 +2922,7 @@ retry:
2933 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, 2922 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
2934 &mpd); 2923 &mpd);
2935 /* 2924 /*
2936 * If we have a contigous extent of pages and we 2925 * If we have a contiguous extent of pages and we
2937 * haven't done the I/O yet, map the blocks and submit 2926 * haven't done the I/O yet, map the blocks and submit
2938 * them for I/O. 2927 * them for I/O.
2939 */ 2928 */
@@ -2999,8 +2988,7 @@ retry:
2999out_writepages: 2988out_writepages:
3000 if (!no_nrwrite_index_update) 2989 if (!no_nrwrite_index_update)
3001 wbc->no_nrwrite_index_update = 0; 2990 wbc->no_nrwrite_index_update = 0;
3002 if (wbc->nr_to_write > nr_to_writebump) 2991 wbc->nr_to_write -= nr_to_writebump;
3003 wbc->nr_to_write -= nr_to_writebump;
3004 wbc->range_start = range_start; 2992 wbc->range_start = range_start;
3005 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2993 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
3006 return ret; 2994 return ret;
@@ -3025,11 +3013,18 @@ static int ext4_nonda_switch(struct super_block *sb)
3025 if (2 * free_blocks < 3 * dirty_blocks || 3013 if (2 * free_blocks < 3 * dirty_blocks ||
3026 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3014 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
3027 /* 3015 /*
3028 * free block count is less that 150% of dirty blocks 3016 * free block count is less than 150% of dirty blocks
3029 * or free blocks is less that watermark 3017 * or free blocks is less than watermark
3030 */ 3018 */
3031 return 1; 3019 return 1;
3032 } 3020 }
3021 /*
3022 * Even if we don't switch but are nearing capacity,
3023 * start pushing delalloc when 1/2 of free blocks are dirty.
3024 */
3025 if (free_blocks < 2 * dirty_blocks)
3026 writeback_inodes_sb_if_idle(sb);
3027
3033 return 0; 3028 return 0;
3034} 3029}
3035 3030
@@ -3091,7 +3086,7 @@ retry:
3091 * i_size_read because we hold i_mutex. 3086 * i_size_read because we hold i_mutex.
3092 */ 3087 */
3093 if (pos + len > inode->i_size) 3088 if (pos + len > inode->i_size)
3094 ext4_truncate(inode); 3089 ext4_truncate_failed_write(inode);
3095 } 3090 }
3096 3091
3097 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3092 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -4064,7 +4059,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
4064 int k, err; 4059 int k, err;
4065 4060
4066 *top = 0; 4061 *top = 0;
4067 /* Make k index the deepest non-null offest + 1 */ 4062 /* Make k index the deepest non-null offset + 1 */
4068 for (k = depth; k > 1 && !offsets[k-1]; k--) 4063 for (k = depth; k > 1 && !offsets[k-1]; k--)
4069 ; 4064 ;
4070 partial = ext4_get_branch(inode, k, offsets, chain, &err); 4065 partial = ext4_get_branch(inode, k, offsets, chain, &err);
@@ -4120,6 +4115,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4120 __le32 *last) 4115 __le32 *last)
4121{ 4116{
4122 __le32 *p; 4117 __le32 *p;
4118 int flags = EXT4_FREE_BLOCKS_FORGET;
4119
4120 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4121 flags |= EXT4_FREE_BLOCKS_METADATA;
4122
4123 if (try_to_extend_transaction(handle, inode)) { 4123 if (try_to_extend_transaction(handle, inode)) {
4124 if (bh) { 4124 if (bh) {
4125 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4125 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4134,27 +4134,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4134 } 4134 }
4135 } 4135 }
4136 4136
4137 /* 4137 for (p = first; p < last; p++)
4138 * Any buffers which are on the journal will be in memory. We 4138 *p = 0;
4139 * find them on the hash table so jbd2_journal_revoke() will
4140 * run jbd2_journal_forget() on them. We've already detached
4141 * each block from the file, so bforget() in
4142 * jbd2_journal_forget() should be safe.
4143 *
4144 * AKPM: turn on bforget in jbd2_journal_forget()!!!
4145 */
4146 for (p = first; p < last; p++) {
4147 u32 nr = le32_to_cpu(*p);
4148 if (nr) {
4149 struct buffer_head *tbh;
4150
4151 *p = 0;
4152 tbh = sb_find_get_block(inode->i_sb, nr);
4153 ext4_forget(handle, 0, inode, tbh, nr);
4154 }
4155 }
4156 4139
4157 ext4_free_blocks(handle, inode, block_to_free, count, 0); 4140 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
4158} 4141}
4159 4142
4160/** 4143/**
@@ -4342,7 +4325,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4342 blocks_for_truncate(inode)); 4325 blocks_for_truncate(inode));
4343 } 4326 }
4344 4327
4345 ext4_free_blocks(handle, inode, nr, 1, 1); 4328 ext4_free_blocks(handle, inode, 0, nr, 1,
4329 EXT4_FREE_BLOCKS_METADATA);
4346 4330
4347 if (parent_bh) { 4331 if (parent_bh) {
4348 /* 4332 /*
@@ -4781,8 +4765,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4781 struct ext4_iloc iloc; 4765 struct ext4_iloc iloc;
4782 struct ext4_inode *raw_inode; 4766 struct ext4_inode *raw_inode;
4783 struct ext4_inode_info *ei; 4767 struct ext4_inode_info *ei;
4784 struct buffer_head *bh;
4785 struct inode *inode; 4768 struct inode *inode;
4769 journal_t *journal = EXT4_SB(sb)->s_journal;
4786 long ret; 4770 long ret;
4787 int block; 4771 int block;
4788 4772
@@ -4793,11 +4777,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4793 return inode; 4777 return inode;
4794 4778
4795 ei = EXT4_I(inode); 4779 ei = EXT4_I(inode);
4780 iloc.bh = 0;
4796 4781
4797 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4782 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4798 if (ret < 0) 4783 if (ret < 0)
4799 goto bad_inode; 4784 goto bad_inode;
4800 bh = iloc.bh;
4801 raw_inode = ext4_raw_inode(&iloc); 4785 raw_inode = ext4_raw_inode(&iloc);
4802 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 4786 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
4803 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 4787 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
@@ -4820,7 +4804,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4820 if (inode->i_mode == 0 || 4804 if (inode->i_mode == 0 ||
4821 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { 4805 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
4822 /* this inode is deleted */ 4806 /* this inode is deleted */
4823 brelse(bh);
4824 ret = -ESTALE; 4807 ret = -ESTALE;
4825 goto bad_inode; 4808 goto bad_inode;
4826 } 4809 }
@@ -4837,6 +4820,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4837 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4820 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4838 inode->i_size = ext4_isize(raw_inode); 4821 inode->i_size = ext4_isize(raw_inode);
4839 ei->i_disksize = inode->i_size; 4822 ei->i_disksize = inode->i_size;
4823#ifdef CONFIG_QUOTA
4824 ei->i_reserved_quota = 0;
4825#endif
4840 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4826 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4841 ei->i_block_group = iloc.block_group; 4827 ei->i_block_group = iloc.block_group;
4842 ei->i_last_alloc_group = ~0; 4828 ei->i_last_alloc_group = ~0;
@@ -4848,11 +4834,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4848 ei->i_data[block] = raw_inode->i_block[block]; 4834 ei->i_data[block] = raw_inode->i_block[block];
4849 INIT_LIST_HEAD(&ei->i_orphan); 4835 INIT_LIST_HEAD(&ei->i_orphan);
4850 4836
4837 /*
4838 * Set transaction id's of transactions that have to be committed
4839 * to finish f[data]sync. We set them to currently running transaction
4840 * as we cannot be sure that the inode or some of its metadata isn't
4841 * part of the transaction - the inode could have been reclaimed and
4842 * now it is reread from disk.
4843 */
4844 if (journal) {
4845 transaction_t *transaction;
4846 tid_t tid;
4847
4848 spin_lock(&journal->j_state_lock);
4849 if (journal->j_running_transaction)
4850 transaction = journal->j_running_transaction;
4851 else
4852 transaction = journal->j_committing_transaction;
4853 if (transaction)
4854 tid = transaction->t_tid;
4855 else
4856 tid = journal->j_commit_sequence;
4857 spin_unlock(&journal->j_state_lock);
4858 ei->i_sync_tid = tid;
4859 ei->i_datasync_tid = tid;
4860 }
4861
4851 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4862 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4852 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 4863 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
4853 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 4864 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
4854 EXT4_INODE_SIZE(inode->i_sb)) { 4865 EXT4_INODE_SIZE(inode->i_sb)) {
4855 brelse(bh);
4856 ret = -EIO; 4866 ret = -EIO;
4857 goto bad_inode; 4867 goto bad_inode;
4858 } 4868 }
@@ -4884,10 +4894,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4884 4894
4885 ret = 0; 4895 ret = 0;
4886 if (ei->i_file_acl && 4896 if (ei->i_file_acl &&
4887 ((ei->i_file_acl < 4897 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4888 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4889 EXT4_SB(sb)->s_gdb_count)) ||
4890 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
4891 ext4_error(sb, __func__, 4898 ext4_error(sb, __func__,
4892 "bad extended attribute block %llu in inode #%lu", 4899 "bad extended attribute block %llu in inode #%lu",
4893 ei->i_file_acl, inode->i_ino); 4900 ei->i_file_acl, inode->i_ino);
@@ -4905,10 +4912,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4905 /* Validate block references which are part of inode */ 4912 /* Validate block references which are part of inode */
4906 ret = ext4_check_inode_blockref(inode); 4913 ret = ext4_check_inode_blockref(inode);
4907 } 4914 }
4908 if (ret) { 4915 if (ret)
4909 brelse(bh);
4910 goto bad_inode; 4916 goto bad_inode;
4911 }
4912 4917
4913 if (S_ISREG(inode->i_mode)) { 4918 if (S_ISREG(inode->i_mode)) {
4914 inode->i_op = &ext4_file_inode_operations; 4919 inode->i_op = &ext4_file_inode_operations;
@@ -4936,7 +4941,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4936 init_special_inode(inode, inode->i_mode, 4941 init_special_inode(inode, inode->i_mode,
4937 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 4942 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4938 } else { 4943 } else {
4939 brelse(bh);
4940 ret = -EIO; 4944 ret = -EIO;
4941 ext4_error(inode->i_sb, __func__, 4945 ext4_error(inode->i_sb, __func__,
4942 "bogus i_mode (%o) for inode=%lu", 4946 "bogus i_mode (%o) for inode=%lu",
@@ -4949,6 +4953,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4949 return inode; 4953 return inode;
4950 4954
4951bad_inode: 4955bad_inode:
4956 brelse(iloc.bh);
4952 iget_failed(inode); 4957 iget_failed(inode);
4953 return ERR_PTR(ret); 4958 return ERR_PTR(ret);
4954} 4959}
@@ -5108,6 +5113,7 @@ static int ext4_do_update_inode(handle_t *handle,
5108 err = rc; 5113 err = rc;
5109 ei->i_state &= ~EXT4_STATE_NEW; 5114 ei->i_state &= ~EXT4_STATE_NEW;
5110 5115
5116 ext4_update_inode_fsync_trans(handle, inode, 0);
5111out_brelse: 5117out_brelse:
5112 brelse(bh); 5118 brelse(bh);
5113 ext4_std_error(inode->i_sb, err); 5119 ext4_std_error(inode->i_sb, err);
@@ -5227,8 +5233,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5227 5233
5228 /* (user+group)*(old+new) structure, inode write (sb, 5234 /* (user+group)*(old+new) structure, inode write (sb,
5229 * inode block, ? - but truncate inode update has it) */ 5235 * inode block, ? - but truncate inode update has it) */
5230 handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ 5236 handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
5231 EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); 5237 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
5232 if (IS_ERR(handle)) { 5238 if (IS_ERR(handle)) {
5233 error = PTR_ERR(handle); 5239 error = PTR_ERR(handle);
5234 goto err_out; 5240 goto err_out;
@@ -5376,7 +5382,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5376 * worse case, the indexs blocks spread over different block groups 5382 * worse case, the indexs blocks spread over different block groups
5377 * 5383 *
5378 * If datablocks are discontiguous, they are possible to spread over 5384 * If datablocks are discontiguous, they are possible to spread over
5379 * different block groups too. If they are contiugous, with flexbg, 5385 * different block groups too. If they are contiuguous, with flexbg,
5380 * they could still across block group boundary. 5386 * they could still across block group boundary.
5381 * 5387 *
5382 * Also account for superblock, inode, quota and xattr blocks 5388 * Also account for superblock, inode, quota and xattr blocks
@@ -5452,7 +5458,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
5452 * Calculate the journal credits for a chunk of data modification. 5458 * Calculate the journal credits for a chunk of data modification.
5453 * 5459 *
5454 * This is called from DIO, fallocate or whoever calling 5460 * This is called from DIO, fallocate or whoever calling
5455 * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. 5461 * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
5456 * 5462 *
5457 * journal buffers for data blocks are not included here, as DIO 5463 * journal buffers for data blocks are not included here, as DIO
5458 * and fallocate do no need to journal data buffers. 5464 * and fallocate do no need to journal data buffers.
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c1cdf613e72..b63d193126d 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -221,31 +221,38 @@ setversion_out:
221 struct file *donor_filp; 221 struct file *donor_filp;
222 int err; 222 int err;
223 223
224 if (!(filp->f_mode & FMODE_READ) ||
225 !(filp->f_mode & FMODE_WRITE))
226 return -EBADF;
227
224 if (copy_from_user(&me, 228 if (copy_from_user(&me,
225 (struct move_extent __user *)arg, sizeof(me))) 229 (struct move_extent __user *)arg, sizeof(me)))
226 return -EFAULT; 230 return -EFAULT;
231 me.moved_len = 0;
227 232
228 donor_filp = fget(me.donor_fd); 233 donor_filp = fget(me.donor_fd);
229 if (!donor_filp) 234 if (!donor_filp)
230 return -EBADF; 235 return -EBADF;
231 236
232 if (!capable(CAP_DAC_OVERRIDE)) { 237 if (!(donor_filp->f_mode & FMODE_WRITE)) {
233 if ((current->real_cred->fsuid != inode->i_uid) || 238 err = -EBADF;
234 !(inode->i_mode & S_IRUSR) || 239 goto mext_out;
235 !(donor_filp->f_dentry->d_inode->i_mode &
236 S_IRUSR)) {
237 fput(donor_filp);
238 return -EACCES;
239 }
240 } 240 }
241 241
242 err = mnt_want_write(filp->f_path.mnt);
243 if (err)
244 goto mext_out;
245
242 err = ext4_move_extents(filp, donor_filp, me.orig_start, 246 err = ext4_move_extents(filp, donor_filp, me.orig_start,
243 me.donor_start, me.len, &me.moved_len); 247 me.donor_start, me.len, &me.moved_len);
244 fput(donor_filp); 248 mnt_drop_write(filp->f_path.mnt);
249 if (me.moved_len > 0)
250 file_remove_suid(donor_filp);
245 251
246 if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) 252 if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
247 return -EFAULT; 253 err = -EFAULT;
248 254mext_out:
255 fput(donor_filp);
249 return err; 256 return err;
250 } 257 }
251 258
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bba12824def..d34afad3e13 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -142,7 +142,7 @@
142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The 142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
143 * value of s_mb_order2_reqs can be tuned via 143 * value of s_mb_order2_reqs can be tuned via
144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to 144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
145 * stripe size (sbi->s_stripe), we try to search for contigous block in 145 * stripe size (sbi->s_stripe), we try to search for contiguous block in
146 * stripe size. This should result in better allocation on RAID setups. If 146 * stripe size. This should result in better allocation on RAID setups. If
147 * not, we search in the specific group using bitmap for best extents. The 147 * not, we search in the specific group using bitmap for best extents. The
148 * tunable min_to_scan and max_to_scan control the behaviour here. 148 * tunable min_to_scan and max_to_scan control the behaviour here.
@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2529 struct ext4_group_info *db; 2529 struct ext4_group_info *db;
2530 int err, count = 0, count2 = 0; 2530 int err, count = 0, count2 = 0;
2531 struct ext4_free_data *entry; 2531 struct ext4_free_data *entry;
2532 ext4_fsblk_t discard_block;
2533 struct list_head *l, *ltmp; 2532 struct list_head *l, *ltmp;
2534 2533
2535 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2534 list_for_each_safe(l, ltmp, &txn->t_private_list) {
@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2559 page_cache_release(e4b.bd_bitmap_page); 2558 page_cache_release(e4b.bd_bitmap_page);
2560 } 2559 }
2561 ext4_unlock_group(sb, entry->group); 2560 ext4_unlock_group(sb, entry->group);
2562 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) 2561 if (test_opt(sb, DISCARD)) {
2563 + entry->start_blk 2562 ext4_fsblk_t discard_block;
2564 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 2563 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
2565 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, 2564
2566 entry->count); 2565 discard_block = (ext4_fsblk_t)entry->group *
2567 sb_issue_discard(sb, discard_block, entry->count); 2566 EXT4_BLOCKS_PER_GROUP(sb)
2568 2567 + entry->start_blk
2568 + le32_to_cpu(es->s_first_data_block);
2569 trace_ext4_discard_blocks(sb,
2570 (unsigned long long)discard_block,
2571 entry->count);
2572 sb_issue_discard(sb, discard_block, entry->count);
2573 }
2569 kmem_cache_free(ext4_free_ext_cachep, entry); 2574 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b); 2575 ext4_mb_release_desc(&e4b);
2571 } 2576 }
@@ -2750,12 +2755,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2750 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2755 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2751 /* release all the reserved blocks if non delalloc */ 2756 /* release all the reserved blocks if non delalloc */
2752 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2757 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2753 else {
2754 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2755 ac->ac_b_ex.fe_len);
2756 /* convert reserved quota blocks to real quota blocks */
2757 vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
2758 }
2759 2758
2760 if (sbi->s_log_groups_per_flex) { 2759 if (sbi->s_log_groups_per_flex) {
2761 ext4_group_t flex_group = ext4_flex_group(sbi, 2760 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3006,6 +3005,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3006} 3005}
3007 3006
3008/* 3007/*
3008 * Called on failure; free up any blocks from the inode PA for this
3009 * context. We don't need this for MB_GROUP_PA because we only change
3010 * pa_free in ext4_mb_release_context(), but on failure, we've already
3011 * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
3012 */
3013static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3014{
3015 struct ext4_prealloc_space *pa = ac->ac_pa;
3016 int len;
3017
3018 if (pa && pa->pa_type == MB_INODE_PA) {
3019 len = ac->ac_b_ex.fe_len;
3020 pa->pa_free += len;
3021 }
3022
3023}
3024
3025/*
3009 * use blocks preallocated to inode 3026 * use blocks preallocated to inode
3010 */ 3027 */
3011static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3028static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
@@ -3932,7 +3949,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3932 * per cpu locality group is to reduce the contention between block 3949 * per cpu locality group is to reduce the contention between block
3933 * request from multiple CPUs. 3950 * request from multiple CPUs.
3934 */ 3951 */
3935 ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); 3952 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
3936 3953
3937 /* we're going to use group allocation */ 3954 /* we're going to use group allocation */
3938 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; 3955 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@ -4290,6 +4307,7 @@ repeat:
4290 ac->ac_status = AC_STATUS_CONTINUE; 4307 ac->ac_status = AC_STATUS_CONTINUE;
4291 goto repeat; 4308 goto repeat;
4292 } else if (*errp) { 4309 } else if (*errp) {
4310 ext4_discard_allocated_blocks(ac);
4293 ac->ac_b_ex.fe_len = 0; 4311 ac->ac_b_ex.fe_len = 0;
4294 ar->len = 0; 4312 ar->len = 0;
4295 ext4_mb_show_ac(ac); 4313 ext4_mb_show_ac(ac);
@@ -4422,18 +4440,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 return 0; 4440 return 0;
4423} 4441}
4424 4442
4425/* 4443/**
4426 * Main entry point into mballoc to free blocks 4444 * ext4_free_blocks() -- Free given blocks and update quota
4445 * @handle: handle for this transaction
4446 * @inode: inode
4447 * @block: start physical block to free
4448 * @count: number of blocks to count
4449 * @metadata: Are these metadata blocks
4427 */ 4450 */
4428void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, 4451void ext4_free_blocks(handle_t *handle, struct inode *inode,
4429 ext4_fsblk_t block, unsigned long count, 4452 struct buffer_head *bh, ext4_fsblk_t block,
4430 int metadata, unsigned long *freed) 4453 unsigned long count, int flags)
4431{ 4454{
4432 struct buffer_head *bitmap_bh = NULL; 4455 struct buffer_head *bitmap_bh = NULL;
4433 struct super_block *sb = inode->i_sb; 4456 struct super_block *sb = inode->i_sb;
4434 struct ext4_allocation_context *ac = NULL; 4457 struct ext4_allocation_context *ac = NULL;
4435 struct ext4_group_desc *gdp; 4458 struct ext4_group_desc *gdp;
4436 struct ext4_super_block *es; 4459 struct ext4_super_block *es;
4460 unsigned long freed = 0;
4437 unsigned int overflow; 4461 unsigned int overflow;
4438 ext4_grpblk_t bit; 4462 ext4_grpblk_t bit;
4439 struct buffer_head *gd_bh; 4463 struct buffer_head *gd_bh;
@@ -4443,13 +4467,16 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4443 int err = 0; 4467 int err = 0;
4444 int ret; 4468 int ret;
4445 4469
4446 *freed = 0; 4470 if (bh) {
4471 if (block)
4472 BUG_ON(block != bh->b_blocknr);
4473 else
4474 block = bh->b_blocknr;
4475 }
4447 4476
4448 sbi = EXT4_SB(sb); 4477 sbi = EXT4_SB(sb);
4449 es = EXT4_SB(sb)->s_es; 4478 es = EXT4_SB(sb)->s_es;
4450 if (block < le32_to_cpu(es->s_first_data_block) || 4479 if (!ext4_data_block_valid(sbi, block, count)) {
4451 block + count < block ||
4452 block + count > ext4_blocks_count(es)) {
4453 ext4_error(sb, __func__, 4480 ext4_error(sb, __func__,
4454 "Freeing blocks not in datazone - " 4481 "Freeing blocks not in datazone - "
4455 "block = %llu, count = %lu", block, count); 4482 "block = %llu, count = %lu", block, count);
@@ -4457,7 +4484,32 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4457 } 4484 }
4458 4485
4459 ext4_debug("freeing block %llu\n", block); 4486 ext4_debug("freeing block %llu\n", block);
4460 trace_ext4_free_blocks(inode, block, count, metadata); 4487 trace_ext4_free_blocks(inode, block, count, flags);
4488
4489 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4490 struct buffer_head *tbh = bh;
4491 int i;
4492
4493 BUG_ON(bh && (count > 1));
4494
4495 for (i = 0; i < count; i++) {
4496 if (!bh)
4497 tbh = sb_find_get_block(inode->i_sb,
4498 block + i);
4499 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4500 inode, tbh, block + i);
4501 }
4502 }
4503
4504 /*
4505 * We need to make sure we don't reuse the freed block until
4506 * after the transaction is committed, which we can do by
4507 * treating the block as metadata, below. We make an
4508 * exception if the inode is to be written in writeback mode
4509 * since writeback mode has weak data consistency guarantees.
4510 */
4511 if (!ext4_should_writeback_data(inode))
4512 flags |= EXT4_FREE_BLOCKS_METADATA;
4461 4513
4462 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4514 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4463 if (ac) { 4515 if (ac) {
@@ -4533,7 +4585,8 @@ do_more:
4533 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4585 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4534 if (err) 4586 if (err)
4535 goto error_return; 4587 goto error_return;
4536 if (metadata && ext4_handle_valid(handle)) { 4588
4589 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4537 struct ext4_free_data *new_entry; 4590 struct ext4_free_data *new_entry;
4538 /* 4591 /*
4539 * blocks being freed are metadata. these blocks shouldn't 4592 * blocks being freed are metadata. these blocks shouldn't
@@ -4572,7 +4625,7 @@ do_more:
4572 4625
4573 ext4_mb_release_desc(&e4b); 4626 ext4_mb_release_desc(&e4b);
4574 4627
4575 *freed += count; 4628 freed += count;
4576 4629
4577 /* We dirtied the bitmap block */ 4630 /* We dirtied the bitmap block */
4578 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4631 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -4592,6 +4645,8 @@ do_more:
4592 } 4645 }
4593 sb->s_dirt = 1; 4646 sb->s_dirt = 1;
4594error_return: 4647error_return:
4648 if (freed)
4649 vfs_dq_free_block(inode, freed);
4595 brelse(bitmap_bh); 4650 brelse(bitmap_bh);
4596 ext4_std_error(sb, err); 4651 ext4_std_error(sb, err);
4597 if (ac) 4652 if (ac)
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc..436521cae45 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a93d5b80f3e..81415814b00 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
238 * So allocate a credit of 3. We may update 238 * So allocate a credit of 3. We may update
239 * quota (user and group). 239 * quota (user and group).
240 */ 240 */
241 needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 241 needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
242 242
243 if (ext4_journal_extend(handle, needed) != 0) 243 if (ext4_journal_extend(handle, needed) != 0)
244 retval = ext4_journal_restart(handle, needed); 244 retval = ext4_journal_restart(handle, needed);
@@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle,
262 for (i = 0; i < max_entries; i++) { 262 for (i = 0; i < max_entries; i++) {
263 if (tmp_idata[i]) { 263 if (tmp_idata[i]) {
264 extend_credit_for_blkdel(handle, inode); 264 extend_credit_for_blkdel(handle, inode);
265 ext4_free_blocks(handle, inode, 265 ext4_free_blocks(handle, inode, 0,
266 le32_to_cpu(tmp_idata[i]), 1, 1); 266 le32_to_cpu(tmp_idata[i]), 1,
267 EXT4_FREE_BLOCKS_METADATA |
268 EXT4_FREE_BLOCKS_FORGET);
267 } 269 }
268 } 270 }
269 put_bh(bh); 271 put_bh(bh);
270 extend_credit_for_blkdel(handle, inode); 272 extend_credit_for_blkdel(handle, inode);
271 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 273 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
274 EXT4_FREE_BLOCKS_METADATA |
275 EXT4_FREE_BLOCKS_FORGET);
272 return 0; 276 return 0;
273} 277}
274 278
@@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle,
297 } 301 }
298 put_bh(bh); 302 put_bh(bh);
299 extend_credit_for_blkdel(handle, inode); 303 extend_credit_for_blkdel(handle, inode);
300 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 304 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
305 EXT4_FREE_BLOCKS_METADATA |
306 EXT4_FREE_BLOCKS_FORGET);
301 return 0; 307 return 0;
302} 308}
303 309
@@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
308 /* ei->i_data[EXT4_IND_BLOCK] */ 314 /* ei->i_data[EXT4_IND_BLOCK] */
309 if (i_data[0]) { 315 if (i_data[0]) {
310 extend_credit_for_blkdel(handle, inode); 316 extend_credit_for_blkdel(handle, inode);
311 ext4_free_blocks(handle, inode, 317 ext4_free_blocks(handle, inode, 0,
312 le32_to_cpu(i_data[0]), 1, 1); 318 le32_to_cpu(i_data[0]), 1,
319 EXT4_FREE_BLOCKS_METADATA |
320 EXT4_FREE_BLOCKS_FORGET);
313 } 321 }
314 322
315 /* ei->i_data[EXT4_DIND_BLOCK] */ 323 /* ei->i_data[EXT4_DIND_BLOCK] */
@@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
419 } 427 }
420 put_bh(bh); 428 put_bh(bh);
421 extend_credit_for_blkdel(handle, inode); 429 extend_credit_for_blkdel(handle, inode);
422 ext4_free_blocks(handle, inode, block, 1, 1); 430 ext4_free_blocks(handle, inode, 0, block, 1,
431 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
423 return retval; 432 return retval;
424} 433}
425 434
@@ -477,7 +486,7 @@ int ext4_ext_migrate(struct inode *inode)
477 handle = ext4_journal_start(inode, 486 handle = ext4_journal_start(inode,
478 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 487 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
479 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 488 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
480 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) 489 EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
481 + 1); 490 + 1);
482 if (IS_ERR(handle)) { 491 if (IS_ERR(handle)) {
483 retval = PTR_ERR(handle); 492 retval = PTR_ERR(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 25b6b145736..82c415be87a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -77,12 +77,14 @@ static int
77mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 77mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
78 struct ext4_extent **extent) 78 struct ext4_extent **extent)
79{ 79{
80 struct ext4_extent_header *eh;
80 int ppos, leaf_ppos = path->p_depth; 81 int ppos, leaf_ppos = path->p_depth;
81 82
82 ppos = leaf_ppos; 83 ppos = leaf_ppos;
83 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { 84 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
84 /* leaf block */ 85 /* leaf block */
85 *extent = ++path[ppos].p_ext; 86 *extent = ++path[ppos].p_ext;
87 path[ppos].p_block = ext_pblock(path[ppos].p_ext);
86 return 0; 88 return 0;
87 } 89 }
88 90
@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
119 ext_block_hdr(path[cur_ppos+1].p_bh); 121 ext_block_hdr(path[cur_ppos+1].p_bh);
120 } 122 }
121 123
124 path[leaf_ppos].p_ext = *extent = NULL;
125
126 eh = path[leaf_ppos].p_hdr;
127 if (le16_to_cpu(eh->eh_entries) == 0)
128 /* empty leaf is found */
129 return -ENODATA;
130
122 /* leaf block */ 131 /* leaf block */
123 path[leaf_ppos].p_ext = *extent = 132 path[leaf_ppos].p_ext = *extent =
124 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); 133 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
134 path[leaf_ppos].p_block =
135 ext_pblock(path[leaf_ppos].p_ext);
125 return 0; 136 return 0;
126 } 137 }
127 } 138 }
@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
155} 166}
156 167
157/** 168/**
158 * mext_double_down_read - Acquire two inodes' read semaphore 169 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
159 *
160 * @orig_inode: original inode structure
161 * @donor_inode: donor inode structure
162 * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
163 */
164static void
165mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
166{
167 struct inode *first = orig_inode, *second = donor_inode;
168
169 /*
170 * Use the inode number to provide the stable locking order instead
171 * of its address, because the C language doesn't guarantee you can
172 * compare pointers that don't come from the same array.
173 */
174 if (donor_inode->i_ino < orig_inode->i_ino) {
175 first = donor_inode;
176 second = orig_inode;
177 }
178
179 down_read(&EXT4_I(first)->i_data_sem);
180 down_read(&EXT4_I(second)->i_data_sem);
181}
182
183/**
184 * mext_double_down_write - Acquire two inodes' write semaphore
185 * 170 *
186 * @orig_inode: original inode structure 171 * @orig_inode: original inode structure
187 * @donor_inode: donor inode structure 172 * @donor_inode: donor inode structure
188 * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. 173 * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
174 * i_ino order.
189 */ 175 */
190static void 176static void
191mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) 177double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
192{ 178{
193 struct inode *first = orig_inode, *second = donor_inode; 179 struct inode *first = orig_inode, *second = donor_inode;
194 180
@@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
203 } 189 }
204 190
205 down_write(&EXT4_I(first)->i_data_sem); 191 down_write(&EXT4_I(first)->i_data_sem);
206 down_write(&EXT4_I(second)->i_data_sem); 192 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
207} 193}
208 194
209/** 195/**
210 * mext_double_up_read - Release two inodes' read semaphore 196 * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
211 * 197 *
212 * @orig_inode: original inode structure to be released its lock first 198 * @orig_inode: original inode structure to be released its lock first
213 * @donor_inode: donor inode structure to be released its lock second 199 * @donor_inode: donor inode structure to be released its lock second
214 * Release read semaphore of two inodes (orig and donor). 200 * Release write lock of i_data_sem of two inodes (orig and donor).
215 */ 201 */
216static void 202static void
217mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) 203double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
218{
219 up_read(&EXT4_I(orig_inode)->i_data_sem);
220 up_read(&EXT4_I(donor_inode)->i_data_sem);
221}
222
223/**
224 * mext_double_up_write - Release two inodes' write semaphore
225 *
226 * @orig_inode: original inode structure to be released its lock first
227 * @donor_inode: donor inode structure to be released its lock second
228 * Release write semaphore of two inodes (orig and donor).
229 */
230static void
231mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
232{ 204{
233 up_write(&EXT4_I(orig_inode)->i_data_sem); 205 up_write(&EXT4_I(orig_inode)->i_data_sem);
234 up_write(&EXT4_I(donor_inode)->i_data_sem); 206 up_write(&EXT4_I(donor_inode)->i_data_sem);
@@ -596,7 +568,7 @@ out:
596 * @tmp_oext: the extent that will belong to the donor inode 568 * @tmp_oext: the extent that will belong to the donor inode
597 * @orig_off: block offset of original inode 569 * @orig_off: block offset of original inode
598 * @donor_off: block offset of donor inode 570 * @donor_off: block offset of donor inode
599 * @max_count: the maximun length of extents 571 * @max_count: the maximum length of extents
600 * 572 *
601 * Return 0 on success, or a negative error value on failure. 573 * Return 0 on success, or a negative error value on failure.
602 */ 574 */
@@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
661 * @donor_inode: donor inode 633 * @donor_inode: donor inode
662 * @from: block offset of orig_inode 634 * @from: block offset of orig_inode
663 * @count: block count to be replaced 635 * @count: block count to be replaced
636 * @err: pointer to save return value
664 * 637 *
665 * Replace original inode extents and donor inode extents page by page. 638 * Replace original inode extents and donor inode extents page by page.
666 * We implement this replacement in the following three steps: 639 * We implement this replacement in the following three steps:
@@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
671 * 3. Change the block information of donor inode to point at the saved 644 * 3. Change the block information of donor inode to point at the saved
672 * original inode blocks in the dummy extents. 645 * original inode blocks in the dummy extents.
673 * 646 *
674 * Return 0 on success, or a negative error value on failure. 647 * Return replaced block count.
675 */ 648 */
676static int 649static int
677mext_replace_branches(handle_t *handle, struct inode *orig_inode, 650mext_replace_branches(handle_t *handle, struct inode *orig_inode,
678 struct inode *donor_inode, ext4_lblk_t from, 651 struct inode *donor_inode, ext4_lblk_t from,
679 ext4_lblk_t count) 652 ext4_lblk_t count, int *err)
680{ 653{
681 struct ext4_ext_path *orig_path = NULL; 654 struct ext4_ext_path *orig_path = NULL;
682 struct ext4_ext_path *donor_path = NULL; 655 struct ext4_ext_path *donor_path = NULL;
683 struct ext4_extent *oext, *dext; 656 struct ext4_extent *oext, *dext;
684 struct ext4_extent tmp_dext, tmp_oext; 657 struct ext4_extent tmp_dext, tmp_oext;
685 ext4_lblk_t orig_off = from, donor_off = from; 658 ext4_lblk_t orig_off = from, donor_off = from;
686 int err = 0;
687 int depth; 659 int depth;
688 int replaced_count = 0; 660 int replaced_count = 0;
689 int dext_alen; 661 int dext_alen;
690 662
691 mext_double_down_write(orig_inode, donor_inode); 663 /* Protect extent trees against block allocations via delalloc */
664 double_down_write_data_sem(orig_inode, donor_inode);
692 665
693 /* Get the original extent for the block "orig_off" */ 666 /* Get the original extent for the block "orig_off" */
694 err = get_ext_path(orig_inode, orig_off, &orig_path); 667 *err = get_ext_path(orig_inode, orig_off, &orig_path);
695 if (err) 668 if (*err)
696 goto out; 669 goto out;
697 670
698 /* Get the donor extent for the head */ 671 /* Get the donor extent for the head */
699 err = get_ext_path(donor_inode, donor_off, &donor_path); 672 *err = get_ext_path(donor_inode, donor_off, &donor_path);
700 if (err) 673 if (*err)
701 goto out; 674 goto out;
702 depth = ext_depth(orig_inode); 675 depth = ext_depth(orig_inode);
703 oext = orig_path[depth].p_ext; 676 oext = orig_path[depth].p_ext;
@@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
707 dext = donor_path[depth].p_ext; 680 dext = donor_path[depth].p_ext;
708 tmp_dext = *dext; 681 tmp_dext = *dext;
709 682
710 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 683 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
711 donor_off, count); 684 donor_off, count);
712 if (err) 685 if (*err)
713 goto out; 686 goto out;
714 687
715 /* Loop for the donor extents */ 688 /* Loop for the donor extents */
@@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
718 if (!dext) { 691 if (!dext) {
719 ext4_error(donor_inode->i_sb, __func__, 692 ext4_error(donor_inode->i_sb, __func__,
720 "The extent for donor must be found"); 693 "The extent for donor must be found");
721 err = -EIO; 694 *err = -EIO;
722 goto out; 695 goto out;
723 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { 696 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
724 ext4_error(donor_inode->i_sb, __func__, 697 ext4_error(donor_inode->i_sb, __func__,
@@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
726 "extent(%u) should be equal", 699 "extent(%u) should be equal",
727 donor_off, 700 donor_off,
728 le32_to_cpu(tmp_dext.ee_block)); 701 le32_to_cpu(tmp_dext.ee_block));
729 err = -EIO; 702 *err = -EIO;
730 goto out; 703 goto out;
731 } 704 }
732 705
733 /* Set donor extent to orig extent */ 706 /* Set donor extent to orig extent */
734 err = mext_leaf_block(handle, orig_inode, 707 *err = mext_leaf_block(handle, orig_inode,
735 orig_path, &tmp_dext, &orig_off); 708 orig_path, &tmp_dext, &orig_off);
736 if (err < 0) 709 if (*err)
737 goto out; 710 goto out;
738 711
739 /* Set orig extent to donor extent */ 712 /* Set orig extent to donor extent */
740 err = mext_leaf_block(handle, donor_inode, 713 *err = mext_leaf_block(handle, donor_inode,
741 donor_path, &tmp_oext, &donor_off); 714 donor_path, &tmp_oext, &donor_off);
742 if (err < 0) 715 if (*err)
743 goto out; 716 goto out;
744 717
745 dext_alen = ext4_ext_get_actual_len(&tmp_dext); 718 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
@@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
753 726
754 if (orig_path) 727 if (orig_path)
755 ext4_ext_drop_refs(orig_path); 728 ext4_ext_drop_refs(orig_path);
756 err = get_ext_path(orig_inode, orig_off, &orig_path); 729 *err = get_ext_path(orig_inode, orig_off, &orig_path);
757 if (err) 730 if (*err)
758 goto out; 731 goto out;
759 depth = ext_depth(orig_inode); 732 depth = ext_depth(orig_inode);
760 oext = orig_path[depth].p_ext; 733 oext = orig_path[depth].p_ext;
761 if (le32_to_cpu(oext->ee_block) +
762 ext4_ext_get_actual_len(oext) <= orig_off) {
763 err = 0;
764 goto out;
765 }
766 tmp_oext = *oext; 734 tmp_oext = *oext;
767 735
768 if (donor_path) 736 if (donor_path)
769 ext4_ext_drop_refs(donor_path); 737 ext4_ext_drop_refs(donor_path);
770 err = get_ext_path(donor_inode, donor_off, &donor_path); 738 *err = get_ext_path(donor_inode, donor_off, &donor_path);
771 if (err) 739 if (*err)
772 goto out; 740 goto out;
773 depth = ext_depth(donor_inode); 741 depth = ext_depth(donor_inode);
774 dext = donor_path[depth].p_ext; 742 dext = donor_path[depth].p_ext;
775 if (le32_to_cpu(dext->ee_block) +
776 ext4_ext_get_actual_len(dext) <= donor_off) {
777 err = 0;
778 goto out;
779 }
780 tmp_dext = *dext; 743 tmp_dext = *dext;
781 744
782 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 745 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
783 donor_off, count - replaced_count); 746 donor_off, count - replaced_count);
784 if (err) 747 if (*err)
785 goto out; 748 goto out;
786 } 749 }
787 750
@@ -795,8 +758,12 @@ out:
795 kfree(donor_path); 758 kfree(donor_path);
796 } 759 }
797 760
798 mext_double_up_write(orig_inode, donor_inode); 761 ext4_ext_invalidate_cache(orig_inode);
799 return err; 762 ext4_ext_invalidate_cache(donor_inode);
763
764 double_up_write_data_sem(orig_inode, donor_inode);
765
766 return replaced_count;
800} 767}
801 768
802/** 769/**
@@ -808,16 +775,17 @@ out:
808 * @data_offset_in_page: block index where data swapping starts 775 * @data_offset_in_page: block index where data swapping starts
809 * @block_len_in_page: the number of blocks to be swapped 776 * @block_len_in_page: the number of blocks to be swapped
810 * @uninit: orig extent is uninitialized or not 777 * @uninit: orig extent is uninitialized or not
778 * @err: pointer to save return value
811 * 779 *
812 * Save the data in original inode blocks and replace original inode extents 780 * Save the data in original inode blocks and replace original inode extents
813 * with donor inode extents by calling mext_replace_branches(). 781 * with donor inode extents by calling mext_replace_branches().
814 * Finally, write out the saved data in new original inode blocks. Return 0 782 * Finally, write out the saved data in new original inode blocks. Return
815 * on success, or a negative error value on failure. 783 * replaced block count.
816 */ 784 */
817static int 785static int
818move_extent_per_page(struct file *o_filp, struct inode *donor_inode, 786move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
819 pgoff_t orig_page_offset, int data_offset_in_page, 787 pgoff_t orig_page_offset, int data_offset_in_page,
820 int block_len_in_page, int uninit) 788 int block_len_in_page, int uninit, int *err)
821{ 789{
822 struct inode *orig_inode = o_filp->f_dentry->d_inode; 790 struct inode *orig_inode = o_filp->f_dentry->d_inode;
823 struct address_space *mapping = orig_inode->i_mapping; 791 struct address_space *mapping = orig_inode->i_mapping;
@@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
829 long long offs = orig_page_offset << PAGE_CACHE_SHIFT; 797 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
830 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 798 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
831 unsigned int w_flags = 0; 799 unsigned int w_flags = 0;
832 unsigned int tmp_data_len, data_len; 800 unsigned int tmp_data_size, data_size, replaced_size;
833 void *fsdata; 801 void *fsdata;
834 int ret, i, jblocks; 802 int i, jblocks;
803 int err2 = 0;
804 int replaced_count = 0;
835 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 805 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
836 806
837 /* 807 /*
@@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
841 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 811 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
842 handle = ext4_journal_start(orig_inode, jblocks); 812 handle = ext4_journal_start(orig_inode, jblocks);
843 if (IS_ERR(handle)) { 813 if (IS_ERR(handle)) {
844 ret = PTR_ERR(handle); 814 *err = PTR_ERR(handle);
845 return ret; 815 return 0;
846 } 816 }
847 817
848 if (segment_eq(get_fs(), KERNEL_DS)) 818 if (segment_eq(get_fs(), KERNEL_DS))
@@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
858 * Just swap data blocks between orig and donor. 828 * Just swap data blocks between orig and donor.
859 */ 829 */
860 if (uninit) { 830 if (uninit) {
861 ret = mext_replace_branches(handle, orig_inode, 831 replaced_count = mext_replace_branches(handle, orig_inode,
862 donor_inode, orig_blk_offset, 832 donor_inode, orig_blk_offset,
863 block_len_in_page); 833 block_len_in_page, err);
864
865 /* Clear the inode cache not to refer to the old data */
866 ext4_ext_invalidate_cache(orig_inode);
867 ext4_ext_invalidate_cache(donor_inode);
868 goto out2; 834 goto out2;
869 } 835 }
870 836
871 offs = (long long)orig_blk_offset << orig_inode->i_blkbits; 837 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
872 838
873 /* Calculate data_len */ 839 /* Calculate data_size */
874 if ((orig_blk_offset + block_len_in_page - 1) == 840 if ((orig_blk_offset + block_len_in_page - 1) ==
875 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { 841 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
876 /* Replace the last block */ 842 /* Replace the last block */
877 tmp_data_len = orig_inode->i_size & (blocksize - 1); 843 tmp_data_size = orig_inode->i_size & (blocksize - 1);
878 /* 844 /*
879 * If data_len equal zero, it shows data_len is multiples of 845 * If data_size equal zero, it shows data_size is multiples of
880 * blocksize. So we set appropriate value. 846 * blocksize. So we set appropriate value.
881 */ 847 */
882 if (tmp_data_len == 0) 848 if (tmp_data_size == 0)
883 tmp_data_len = blocksize; 849 tmp_data_size = blocksize;
884 850
885 data_len = tmp_data_len + 851 data_size = tmp_data_size +
886 ((block_len_in_page - 1) << orig_inode->i_blkbits); 852 ((block_len_in_page - 1) << orig_inode->i_blkbits);
887 } else { 853 } else
888 data_len = block_len_in_page << orig_inode->i_blkbits; 854 data_size = block_len_in_page << orig_inode->i_blkbits;
889 } 855
856 replaced_size = data_size;
890 857
891 ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, 858 *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
892 &page, &fsdata); 859 &page, &fsdata);
893 if (unlikely(ret < 0)) 860 if (unlikely(*err < 0))
894 goto out; 861 goto out;
895 862
896 if (!PageUptodate(page)) { 863 if (!PageUptodate(page)) {
@@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
911 /* Release old bh and drop refs */ 878 /* Release old bh and drop refs */
912 try_to_release_page(page, 0); 879 try_to_release_page(page, 0);
913 880
914 ret = mext_replace_branches(handle, orig_inode, donor_inode, 881 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
915 orig_blk_offset, block_len_in_page); 882 orig_blk_offset, block_len_in_page,
916 if (ret < 0) 883 &err2);
917 goto out; 884 if (err2) {
918 885 if (replaced_count) {
919 /* Clear the inode cache not to refer to the old data */ 886 block_len_in_page = replaced_count;
920 ext4_ext_invalidate_cache(orig_inode); 887 replaced_size =
921 ext4_ext_invalidate_cache(donor_inode); 888 block_len_in_page << orig_inode->i_blkbits;
889 } else
890 goto out;
891 }
922 892
923 if (!page_has_buffers(page)) 893 if (!page_has_buffers(page))
924 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); 894 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
@@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
928 bh = bh->b_this_page; 898 bh = bh->b_this_page;
929 899
930 for (i = 0; i < block_len_in_page; i++) { 900 for (i = 0; i < block_len_in_page; i++) {
931 ret = ext4_get_block(orig_inode, 901 *err = ext4_get_block(orig_inode,
932 (sector_t)(orig_blk_offset + i), bh, 0); 902 (sector_t)(orig_blk_offset + i), bh, 0);
933 if (ret < 0) 903 if (*err < 0)
934 goto out; 904 goto out;
935 905
936 if (bh->b_this_page != NULL) 906 if (bh->b_this_page != NULL)
937 bh = bh->b_this_page; 907 bh = bh->b_this_page;
938 } 908 }
939 909
940 ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, 910 *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
941 page, fsdata); 911 page, fsdata);
942 page = NULL; 912 page = NULL;
943 913
@@ -951,7 +921,10 @@ out:
951out2: 921out2:
952 ext4_journal_stop(handle); 922 ext4_journal_stop(handle);
953 923
954 return ret < 0 ? ret : 0; 924 if (err2)
925 *err = err2;
926
927 return replaced_count;
955} 928}
956 929
957/** 930/**
@@ -962,7 +935,6 @@ out2:
962 * @orig_start: logical start offset in block for orig 935 * @orig_start: logical start offset in block for orig
963 * @donor_start: logical start offset in block for donor 936 * @donor_start: logical start offset in block for donor
964 * @len: the number of blocks to be moved 937 * @len: the number of blocks to be moved
965 * @moved_len: moved block length
966 * 938 *
967 * Check the arguments of ext4_move_extents() whether the files can be 939 * Check the arguments of ext4_move_extents() whether the files can be
968 * exchanged with each other. 940 * exchanged with each other.
@@ -970,8 +942,8 @@ out2:
970 */ 942 */
971static int 943static int
972mext_check_arguments(struct inode *orig_inode, 944mext_check_arguments(struct inode *orig_inode,
973 struct inode *donor_inode, __u64 orig_start, 945 struct inode *donor_inode, __u64 orig_start,
974 __u64 donor_start, __u64 *len, __u64 moved_len) 946 __u64 donor_start, __u64 *len)
975{ 947{
976 ext4_lblk_t orig_blocks, donor_blocks; 948 ext4_lblk_t orig_blocks, donor_blocks;
977 unsigned int blkbits = orig_inode->i_blkbits; 949 unsigned int blkbits = orig_inode->i_blkbits;
@@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode,
985 return -EINVAL; 957 return -EINVAL;
986 } 958 }
987 959
960 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
961 ext4_debug("ext4 move extent: suid or sgid is set"
962 " to donor file [ino:orig %lu, donor %lu]\n",
963 orig_inode->i_ino, donor_inode->i_ino);
964 return -EINVAL;
965 }
966
988 /* Ext4 move extent does not support swapfile */ 967 /* Ext4 move extent does not support swapfile */
989 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { 968 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
990 ext4_debug("ext4 move extent: The argument files should " 969 ext4_debug("ext4 move extent: The argument files should "
@@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode,
1025 return -EINVAL; 1004 return -EINVAL;
1026 } 1005 }
1027 1006
1028 if (moved_len) {
1029 ext4_debug("ext4 move extent: moved_len should be 0 "
1030 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1031 donor_inode->i_ino);
1032 return -EINVAL;
1033 }
1034
1035 if ((orig_start > EXT_MAX_BLOCK) || 1007 if ((orig_start > EXT_MAX_BLOCK) ||
1036 (donor_start > EXT_MAX_BLOCK) || 1008 (donor_start > EXT_MAX_BLOCK) ||
1037 (*len > EXT_MAX_BLOCK) || 1009 (*len > EXT_MAX_BLOCK) ||
@@ -1088,7 +1060,7 @@ mext_check_arguments(struct inode *orig_inode,
1088 } 1060 }
1089 1061
1090 if (!*len) { 1062 if (!*len) {
1091 ext4_debug("ext4 move extent: len shoudld not be 0 " 1063 ext4_debug("ext4 move extent: len should not be 0 "
1092 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, 1064 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1093 donor_inode->i_ino); 1065 donor_inode->i_ino);
1094 return -EINVAL; 1066 return -EINVAL;
@@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1232 return -EINVAL; 1204 return -EINVAL;
1233 } 1205 }
1234 1206
1235 /* protect orig and donor against a truncate */ 1207 /* Protect orig and donor inodes against a truncate */
1236 ret1 = mext_inode_double_lock(orig_inode, donor_inode); 1208 ret1 = mext_inode_double_lock(orig_inode, donor_inode);
1237 if (ret1 < 0) 1209 if (ret1 < 0)
1238 return ret1; 1210 return ret1;
1239 1211
1240 mext_double_down_read(orig_inode, donor_inode); 1212 /* Protect extent tree against block allocations via delalloc */
1213 double_down_write_data_sem(orig_inode, donor_inode);
1241 /* Check the filesystem environment whether move_extent can be done */ 1214 /* Check the filesystem environment whether move_extent can be done */
1242 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, 1215 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
1243 donor_start, &len, *moved_len); 1216 donor_start, &len);
1244 mext_double_up_read(orig_inode, donor_inode);
1245 if (ret1) 1217 if (ret1)
1246 goto out; 1218 goto out;
1247 1219
@@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1355 seq_start = le32_to_cpu(ext_cur->ee_block); 1327 seq_start = le32_to_cpu(ext_cur->ee_block);
1356 rest_blocks = seq_blocks; 1328 rest_blocks = seq_blocks;
1357 1329
1358 /* Discard preallocations of two inodes */ 1330 /*
1359 down_write(&EXT4_I(orig_inode)->i_data_sem); 1331 * Up semaphore to avoid following problems:
1360 ext4_discard_preallocations(orig_inode); 1332 * a. transaction deadlock among ext4_journal_start,
1361 up_write(&EXT4_I(orig_inode)->i_data_sem); 1333 * ->write_begin via pagefault, and jbd2_journal_commit
1362 1334 * b. racing with ->readpage, ->write_begin, and ext4_get_block
1363 down_write(&EXT4_I(donor_inode)->i_data_sem); 1335 * in move_extent_per_page
1364 ext4_discard_preallocations(donor_inode); 1336 */
1365 up_write(&EXT4_I(donor_inode)->i_data_sem); 1337 double_up_write_data_sem(orig_inode, donor_inode);
1366 1338
1367 while (orig_page_offset <= seq_end_page) { 1339 while (orig_page_offset <= seq_end_page) {
1368 1340
1369 /* Swap original branches with new branches */ 1341 /* Swap original branches with new branches */
1370 ret1 = move_extent_per_page(o_filp, donor_inode, 1342 block_len_in_page = move_extent_per_page(
1343 o_filp, donor_inode,
1371 orig_page_offset, 1344 orig_page_offset,
1372 data_offset_in_page, 1345 data_offset_in_page,
1373 block_len_in_page, uninit); 1346 block_len_in_page, uninit,
1374 if (ret1 < 0) 1347 &ret1);
1375 goto out; 1348
1376 orig_page_offset++;
1377 /* Count how many blocks we have exchanged */ 1349 /* Count how many blocks we have exchanged */
1378 *moved_len += block_len_in_page; 1350 *moved_len += block_len_in_page;
1351 if (ret1 < 0)
1352 break;
1379 if (*moved_len > len) { 1353 if (*moved_len > len) {
1380 ext4_error(orig_inode->i_sb, __func__, 1354 ext4_error(orig_inode->i_sb, __func__,
1381 "We replaced blocks too much! " 1355 "We replaced blocks too much! "
1382 "sum of replaced: %llu requested: %llu", 1356 "sum of replaced: %llu requested: %llu",
1383 *moved_len, len); 1357 *moved_len, len);
1384 ret1 = -EIO; 1358 ret1 = -EIO;
1385 goto out; 1359 break;
1386 } 1360 }
1387 1361
1362 orig_page_offset++;
1388 data_offset_in_page = 0; 1363 data_offset_in_page = 0;
1389 rest_blocks -= block_len_in_page; 1364 rest_blocks -= block_len_in_page;
1390 if (rest_blocks > blocks_per_page) 1365 if (rest_blocks > blocks_per_page)
@@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1393 block_len_in_page = rest_blocks; 1368 block_len_in_page = rest_blocks;
1394 } 1369 }
1395 1370
1371 double_down_write_data_sem(orig_inode, donor_inode);
1372 if (ret1 < 0)
1373 break;
1374
1396 /* Decrease buffer counter */ 1375 /* Decrease buffer counter */
1397 if (holecheck_path) 1376 if (holecheck_path)
1398 ext4_ext_drop_refs(holecheck_path); 1377 ext4_ext_drop_refs(holecheck_path);
@@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1414 1393
1415 } 1394 }
1416out: 1395out:
1396 if (*moved_len) {
1397 ext4_discard_preallocations(orig_inode);
1398 ext4_discard_preallocations(donor_inode);
1399 }
1400
1417 if (orig_path) { 1401 if (orig_path) {
1418 ext4_ext_drop_refs(orig_path); 1402 ext4_ext_drop_refs(orig_path);
1419 kfree(orig_path); 1403 kfree(orig_path);
@@ -1422,7 +1406,7 @@ out:
1422 ext4_ext_drop_refs(holecheck_path); 1406 ext4_ext_drop_refs(holecheck_path);
1423 kfree(holecheck_path); 1407 kfree(holecheck_path);
1424 } 1408 }
1425 1409 double_up_write_data_sem(orig_inode, donor_inode);
1426 ret2 = mext_inode_double_unlock(orig_inode, donor_inode); 1410 ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
1427 1411
1428 if (ret1) 1412 if (ret1)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6d2c1b897fc..17a17e10dd6 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1292,9 +1292,6 @@ errout:
1292 * add_dirent_to_buf will attempt search the directory block for 1292 * add_dirent_to_buf will attempt search the directory block for
1293 * space. It will return -ENOSPC if no space is available, and -EIO 1293 * space. It will return -ENOSPC if no space is available, and -EIO
1294 * and -EEXIST if directory entry already exists. 1294 * and -EEXIST if directory entry already exists.
1295 *
1296 * NOTE! bh is NOT released in the case where ENOSPC is returned. In
1297 * all other cases bh is released.
1298 */ 1295 */
1299static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, 1296static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1300 struct inode *inode, struct ext4_dir_entry_2 *de, 1297 struct inode *inode, struct ext4_dir_entry_2 *de,
@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1315 top = bh->b_data + blocksize - reclen; 1312 top = bh->b_data + blocksize - reclen;
1316 while ((char *) de <= top) { 1313 while ((char *) de <= top) {
1317 if (!ext4_check_dir_entry("ext4_add_entry", dir, de, 1314 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
1318 bh, offset)) { 1315 bh, offset))
1319 brelse(bh);
1320 return -EIO; 1316 return -EIO;
1321 } 1317 if (ext4_match(namelen, name, de))
1322 if (ext4_match(namelen, name, de)) {
1323 brelse(bh);
1324 return -EEXIST; 1318 return -EEXIST;
1325 }
1326 nlen = EXT4_DIR_REC_LEN(de->name_len); 1319 nlen = EXT4_DIR_REC_LEN(de->name_len);
1327 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); 1320 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1328 if ((de->inode? rlen - nlen: rlen) >= reclen) 1321 if ((de->inode? rlen - nlen: rlen) >= reclen)
@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1337 err = ext4_journal_get_write_access(handle, bh); 1330 err = ext4_journal_get_write_access(handle, bh);
1338 if (err) { 1331 if (err) {
1339 ext4_std_error(dir->i_sb, err); 1332 ext4_std_error(dir->i_sb, err);
1340 brelse(bh);
1341 return err; 1333 return err;
1342 } 1334 }
1343 1335
@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1377 err = ext4_handle_dirty_metadata(handle, dir, bh); 1369 err = ext4_handle_dirty_metadata(handle, dir, bh);
1378 if (err) 1370 if (err)
1379 ext4_std_error(dir->i_sb, err); 1371 ext4_std_error(dir->i_sb, err);
1380 brelse(bh);
1381 return 0; 1372 return 0;
1382} 1373}
1383 1374
@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1471 if (!(de)) 1462 if (!(de))
1472 return retval; 1463 return retval;
1473 1464
1474 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1465 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1466 brelse(bh);
1467 return retval;
1475} 1468}
1476 1469
1477/* 1470/*
@@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1514 if(!bh) 1507 if(!bh)
1515 return retval; 1508 return retval;
1516 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1509 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1517 if (retval != -ENOSPC) 1510 if (retval != -ENOSPC) {
1511 brelse(bh);
1518 return retval; 1512 return retval;
1513 }
1519 1514
1520 if (blocks == 1 && !dx_fallback && 1515 if (blocks == 1 && !dx_fallback &&
1521 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) 1516 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
@@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1528 de = (struct ext4_dir_entry_2 *) bh->b_data; 1523 de = (struct ext4_dir_entry_2 *) bh->b_data;
1529 de->inode = 0; 1524 de->inode = 0;
1530 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); 1525 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
1531 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1526 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1527 brelse(bh);
1528 return retval;
1532} 1529}
1533 1530
1534/* 1531/*
@@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1561 goto journal_error; 1558 goto journal_error;
1562 1559
1563 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1560 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1564 if (err != -ENOSPC) { 1561 if (err != -ENOSPC)
1565 bh = NULL;
1566 goto cleanup; 1562 goto cleanup;
1567 }
1568 1563
1569 /* Block full, should compress but for now just split */ 1564 /* Block full, should compress but for now just split */
1570 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", 1565 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
@@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1657 if (!de) 1652 if (!de)
1658 goto cleanup; 1653 goto cleanup;
1659 err = add_dirent_to_buf(handle, dentry, inode, de, bh); 1654 err = add_dirent_to_buf(handle, dentry, inode, de, bh);
1660 bh = NULL;
1661 goto cleanup; 1655 goto cleanup;
1662 1656
1663journal_error: 1657journal_error:
@@ -1775,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
1775retry: 1769retry:
1776 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1770 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1777 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1771 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1778 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1772 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1779 if (IS_ERR(handle)) 1773 if (IS_ERR(handle))
1780 return PTR_ERR(handle); 1774 return PTR_ERR(handle);
1781 1775
@@ -1809,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1809retry: 1803retry:
1810 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1804 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1811 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1805 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1812 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1806 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1813 if (IS_ERR(handle)) 1807 if (IS_ERR(handle))
1814 return PTR_ERR(handle); 1808 return PTR_ERR(handle);
1815 1809
@@ -1846,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1846retry: 1840retry:
1847 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1841 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1848 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1842 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1849 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1843 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1850 if (IS_ERR(handle)) 1844 if (IS_ERR(handle))
1851 return PTR_ERR(handle); 1845 return PTR_ERR(handle);
1852 1846
@@ -2259,7 +2253,7 @@ static int ext4_symlink(struct inode *dir,
2259retry: 2253retry:
2260 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2254 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2261 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2255 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2262 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 2256 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2263 if (IS_ERR(handle)) 2257 if (IS_ERR(handle))
2264 return PTR_ERR(handle); 2258 return PTR_ERR(handle);
2265 2259
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3cfc343c41b..3b2c5541d8a 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb,
247 goto exit_bh; 247 goto exit_bh;
248 248
249 if (IS_ERR(gdb = bclean(handle, sb, block))) { 249 if (IS_ERR(gdb = bclean(handle, sb, block))) {
250 err = PTR_ERR(bh); 250 err = PTR_ERR(gdb);
251 goto exit_bh; 251 goto exit_bh;
252 } 252 }
253 ext4_handle_dirty_metadata(handle, NULL, gdb); 253 ext4_handle_dirty_metadata(handle, NULL, gdb);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d4ca92aab51..735c20d5fd5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb)
603 if (sb->s_dirt) 603 if (sb->s_dirt)
604 ext4_commit_super(sb, 1); 604 ext4_commit_super(sb, 1);
605 605
606 ext4_release_system_zone(sb);
607 ext4_mb_release(sb);
608 ext4_ext_release(sb);
609 ext4_xattr_put_super(sb);
610 if (sbi->s_journal) { 606 if (sbi->s_journal) {
611 err = jbd2_journal_destroy(sbi->s_journal); 607 err = jbd2_journal_destroy(sbi->s_journal);
612 sbi->s_journal = NULL; 608 sbi->s_journal = NULL;
@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb)
614 ext4_abort(sb, __func__, 610 ext4_abort(sb, __func__,
615 "Couldn't clean up the journal"); 611 "Couldn't clean up the journal");
616 } 612 }
613
614 ext4_release_system_zone(sb);
615 ext4_mb_release(sb);
616 ext4_ext_release(sb);
617 ext4_xattr_put_super(sb);
618
617 if (!(sb->s_flags & MS_RDONLY)) { 619 if (!(sb->s_flags & MS_RDONLY)) {
618 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 620 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
619 es->s_state = cpu_to_le16(sbi->s_mount_state); 621 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -700,10 +702,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
700 ei->i_reserved_data_blocks = 0; 702 ei->i_reserved_data_blocks = 0;
701 ei->i_reserved_meta_blocks = 0; 703 ei->i_reserved_meta_blocks = 0;
702 ei->i_allocated_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0;
705 ei->i_da_metadata_calc_len = 0;
703 ei->i_delalloc_reserved_flag = 0; 706 ei->i_delalloc_reserved_flag = 0;
704 spin_lock_init(&(ei->i_block_reservation_lock)); 707 spin_lock_init(&(ei->i_block_reservation_lock));
708#ifdef CONFIG_QUOTA
709 ei->i_reserved_quota = 0;
710#endif
705 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); 711 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
706 ei->cur_aio_dio = NULL; 712 ei->cur_aio_dio = NULL;
713 ei->i_sync_tid = 0;
714 ei->i_datasync_tid = 0;
707 715
708 return &ei->vfs_inode; 716 return &ei->vfs_inode;
709} 717}
@@ -765,9 +773,22 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
765#if defined(CONFIG_QUOTA) 773#if defined(CONFIG_QUOTA)
766 struct ext4_sb_info *sbi = EXT4_SB(sb); 774 struct ext4_sb_info *sbi = EXT4_SB(sb);
767 775
768 if (sbi->s_jquota_fmt) 776 if (sbi->s_jquota_fmt) {
769 seq_printf(seq, ",jqfmt=%s", 777 char *fmtname = "";
770 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); 778
779 switch (sbi->s_jquota_fmt) {
780 case QFMT_VFS_OLD:
781 fmtname = "vfsold";
782 break;
783 case QFMT_VFS_V0:
784 fmtname = "vfsv0";
785 break;
786 case QFMT_VFS_V1:
787 fmtname = "vfsv1";
788 break;
789 }
790 seq_printf(seq, ",jqfmt=%s", fmtname);
791 }
771 792
772 if (sbi->s_qf_names[USRQUOTA]) 793 if (sbi->s_qf_names[USRQUOTA])
773 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 794 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -899,6 +920,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
899 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 920 if (test_opt(sb, NO_AUTO_DA_ALLOC))
900 seq_puts(seq, ",noauto_da_alloc"); 921 seq_puts(seq, ",noauto_da_alloc");
901 922
923 if (test_opt(sb, DISCARD))
924 seq_puts(seq, ",discard");
925
926 if (test_opt(sb, NOLOAD))
927 seq_puts(seq, ",norecovery");
928
902 ext4_show_quota_options(seq, sb); 929 ext4_show_quota_options(seq, sb);
903 930
904 return 0; 931 return 0;
@@ -991,7 +1018,9 @@ static const struct dquot_operations ext4_quota_operations = {
991 .reserve_space = dquot_reserve_space, 1018 .reserve_space = dquot_reserve_space,
992 .claim_space = dquot_claim_space, 1019 .claim_space = dquot_claim_space,
993 .release_rsv = dquot_release_reserved_space, 1020 .release_rsv = dquot_release_reserved_space,
1021#ifdef CONFIG_QUOTA
994 .get_reserved_space = ext4_get_reserved_space, 1022 .get_reserved_space = ext4_get_reserved_space,
1023#endif
995 .alloc_inode = dquot_alloc_inode, 1024 .alloc_inode = dquot_alloc_inode,
996 .free_space = dquot_free_space, 1025 .free_space = dquot_free_space,
997 .free_inode = dquot_free_inode, 1026 .free_inode = dquot_free_inode,
@@ -1074,12 +1103,13 @@ enum {
1074 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1103 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1075 Opt_data_err_abort, Opt_data_err_ignore, 1104 Opt_data_err_abort, Opt_data_err_ignore,
1076 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1105 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1077 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1106 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1078 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, 1107 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
1079 Opt_usrquota, Opt_grpquota, Opt_i_version, 1108 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
1080 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1109 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1081 Opt_block_validity, Opt_noblock_validity, 1110 Opt_block_validity, Opt_noblock_validity,
1082 Opt_inode_readahead_blks, Opt_journal_ioprio 1111 Opt_inode_readahead_blks, Opt_journal_ioprio,
1112 Opt_discard, Opt_nodiscard,
1083}; 1113};
1084 1114
1085static const match_table_t tokens = { 1115static const match_table_t tokens = {
@@ -1104,6 +1134,7 @@ static const match_table_t tokens = {
1104 {Opt_acl, "acl"}, 1134 {Opt_acl, "acl"},
1105 {Opt_noacl, "noacl"}, 1135 {Opt_noacl, "noacl"},
1106 {Opt_noload, "noload"}, 1136 {Opt_noload, "noload"},
1137 {Opt_noload, "norecovery"},
1107 {Opt_nobh, "nobh"}, 1138 {Opt_nobh, "nobh"},
1108 {Opt_bh, "bh"}, 1139 {Opt_bh, "bh"},
1109 {Opt_commit, "commit=%u"}, 1140 {Opt_commit, "commit=%u"},
@@ -1125,6 +1156,7 @@ static const match_table_t tokens = {
1125 {Opt_grpjquota, "grpjquota=%s"}, 1156 {Opt_grpjquota, "grpjquota=%s"},
1126 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1157 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1127 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1158 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1159 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1128 {Opt_grpquota, "grpquota"}, 1160 {Opt_grpquota, "grpquota"},
1129 {Opt_noquota, "noquota"}, 1161 {Opt_noquota, "noquota"},
1130 {Opt_quota, "quota"}, 1162 {Opt_quota, "quota"},
@@ -1144,6 +1176,8 @@ static const match_table_t tokens = {
1144 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1176 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1145 {Opt_auto_da_alloc, "auto_da_alloc"}, 1177 {Opt_auto_da_alloc, "auto_da_alloc"},
1146 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1178 {Opt_noauto_da_alloc, "noauto_da_alloc"},
1179 {Opt_discard, "discard"},
1180 {Opt_nodiscard, "nodiscard"},
1147 {Opt_err, NULL}, 1181 {Opt_err, NULL},
1148}; 1182};
1149 1183
@@ -1425,6 +1459,9 @@ clear_qf_name:
1425 goto set_qf_format; 1459 goto set_qf_format;
1426 case Opt_jqfmt_vfsv0: 1460 case Opt_jqfmt_vfsv0:
1427 qfmt = QFMT_VFS_V0; 1461 qfmt = QFMT_VFS_V0;
1462 goto set_qf_format;
1463 case Opt_jqfmt_vfsv1:
1464 qfmt = QFMT_VFS_V1;
1428set_qf_format: 1465set_qf_format:
1429 if (sb_any_quota_loaded(sb) && 1466 if (sb_any_quota_loaded(sb) &&
1430 sbi->s_jquota_fmt != qfmt) { 1467 sbi->s_jquota_fmt != qfmt) {
@@ -1467,6 +1504,7 @@ set_qf_format:
1467 case Opt_offgrpjquota: 1504 case Opt_offgrpjquota:
1468 case Opt_jqfmt_vfsold: 1505 case Opt_jqfmt_vfsold:
1469 case Opt_jqfmt_vfsv0: 1506 case Opt_jqfmt_vfsv0:
1507 case Opt_jqfmt_vfsv1:
1470 ext4_msg(sb, KERN_ERR, 1508 ext4_msg(sb, KERN_ERR,
1471 "journaled quota options not supported"); 1509 "journaled quota options not supported");
1472 break; 1510 break;
@@ -1565,6 +1603,12 @@ set_qf_format:
1565 else 1603 else
1566 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1604 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1567 break; 1605 break;
1606 case Opt_discard:
1607 set_opt(sbi->s_mount_opt, DISCARD);
1608 break;
1609 case Opt_nodiscard:
1610 clear_opt(sbi->s_mount_opt, DISCARD);
1611 break;
1568 default: 1612 default:
1569 ext4_msg(sb, KERN_ERR, 1613 ext4_msg(sb, KERN_ERR,
1570 "Unrecognized mount option \"%s\" " 1614 "Unrecognized mount option \"%s\" "
@@ -1673,14 +1717,14 @@ static int ext4_fill_flex_info(struct super_block *sb)
1673 size_t size; 1717 size_t size;
1674 int i; 1718 int i;
1675 1719
1676 if (!sbi->s_es->s_log_groups_per_flex) { 1720 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1721 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1722
1723 if (groups_per_flex < 2) {
1677 sbi->s_log_groups_per_flex = 0; 1724 sbi->s_log_groups_per_flex = 0;
1678 return 1; 1725 return 1;
1679 } 1726 }
1680 1727
1681 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1682 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1683
1684 /* We allocate both existing and potentially added groups */ 1728 /* We allocate both existing and potentially added groups */
1685 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1729 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1686 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1730 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
@@ -2099,11 +2143,8 @@ static int parse_strtoul(const char *buf,
2099{ 2143{
2100 char *endp; 2144 char *endp;
2101 2145
2102 while (*buf && isspace(*buf)) 2146 *value = simple_strtoul(skip_spaces(buf), &endp, 0);
2103 buf++; 2147 endp = skip_spaces(endp);
2104 *value = simple_strtoul(buf, &endp, 0);
2105 while (*endp && isspace(*endp))
2106 endp++;
2107 if (*endp || *value > max) 2148 if (*endp || *value > max)
2108 return -EINVAL; 2149 return -EINVAL;
2109 2150
@@ -2134,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2134 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2175 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2135 2176
2136 return snprintf(buf, PAGE_SIZE, "%llu\n", 2177 return snprintf(buf, PAGE_SIZE, "%llu\n",
2137 sbi->s_kbytes_written + 2178 (unsigned long long)(sbi->s_kbytes_written +
2138 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2179 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2139 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2180 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2140} 2181}
2141 2182
2142static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2183static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -2721,26 +2762,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2721 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2762 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2722 if (ext4_load_journal(sb, es, journal_devnum)) 2763 if (ext4_load_journal(sb, es, journal_devnum))
2723 goto failed_mount3; 2764 goto failed_mount3;
2724 if (!(sb->s_flags & MS_RDONLY) &&
2725 EXT4_SB(sb)->s_journal->j_failed_commit) {
2726 ext4_msg(sb, KERN_CRIT, "error: "
2727 "ext4_fill_super: Journal transaction "
2728 "%u is corrupt",
2729 EXT4_SB(sb)->s_journal->j_failed_commit);
2730 if (test_opt(sb, ERRORS_RO)) {
2731 ext4_msg(sb, KERN_CRIT,
2732 "Mounting filesystem read-only");
2733 sb->s_flags |= MS_RDONLY;
2734 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2735 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2736 }
2737 if (test_opt(sb, ERRORS_PANIC)) {
2738 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2739 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2740 ext4_commit_super(sb, 1);
2741 goto failed_mount4;
2742 }
2743 }
2744 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2765 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2745 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2766 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2746 ext4_msg(sb, KERN_ERR, "required journal recovery " 2767 ext4_msg(sb, KERN_ERR, "required journal recovery "
@@ -3668,13 +3689,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3668 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3689 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3669 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3690 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3670 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3691 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3671 ext4_free_blocks_count_set(es, buf->f_bfree);
3672 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3692 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3673 if (buf->f_bfree < ext4_r_blocks_count(es)) 3693 if (buf->f_bfree < ext4_r_blocks_count(es))
3674 buf->f_bavail = 0; 3694 buf->f_bavail = 0;
3675 buf->f_files = le32_to_cpu(es->s_inodes_count); 3695 buf->f_files = le32_to_cpu(es->s_inodes_count);
3676 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3696 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3677 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3678 buf->f_namelen = EXT4_NAME_LEN; 3697 buf->f_namelen = EXT4_NAME_LEN;
3679 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3698 fsid = le64_to_cpup((void *)es->s_uuid) ^
3680 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3699 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
@@ -3966,6 +3985,60 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags,
3966 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 3985 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3967} 3986}
3968 3987
3988#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
3989static struct file_system_type ext2_fs_type = {
3990 .owner = THIS_MODULE,
3991 .name = "ext2",
3992 .get_sb = ext4_get_sb,
3993 .kill_sb = kill_block_super,
3994 .fs_flags = FS_REQUIRES_DEV,
3995};
3996
3997static inline void register_as_ext2(void)
3998{
3999 int err = register_filesystem(&ext2_fs_type);
4000 if (err)
4001 printk(KERN_WARNING
4002 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
4003}
4004
4005static inline void unregister_as_ext2(void)
4006{
4007 unregister_filesystem(&ext2_fs_type);
4008}
4009MODULE_ALIAS("ext2");
4010#else
4011static inline void register_as_ext2(void) { }
4012static inline void unregister_as_ext2(void) { }
4013#endif
4014
4015#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
4016static struct file_system_type ext3_fs_type = {
4017 .owner = THIS_MODULE,
4018 .name = "ext3",
4019 .get_sb = ext4_get_sb,
4020 .kill_sb = kill_block_super,
4021 .fs_flags = FS_REQUIRES_DEV,
4022};
4023
4024static inline void register_as_ext3(void)
4025{
4026 int err = register_filesystem(&ext3_fs_type);
4027 if (err)
4028 printk(KERN_WARNING
4029 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
4030}
4031
4032static inline void unregister_as_ext3(void)
4033{
4034 unregister_filesystem(&ext3_fs_type);
4035}
4036MODULE_ALIAS("ext3");
4037#else
4038static inline void register_as_ext3(void) { }
4039static inline void unregister_as_ext3(void) { }
4040#endif
4041
3969static struct file_system_type ext4_fs_type = { 4042static struct file_system_type ext4_fs_type = {
3970 .owner = THIS_MODULE, 4043 .owner = THIS_MODULE,
3971 .name = "ext4", 4044 .name = "ext4",
@@ -3995,11 +4068,15 @@ static int __init init_ext4_fs(void)
3995 err = init_inodecache(); 4068 err = init_inodecache();
3996 if (err) 4069 if (err)
3997 goto out1; 4070 goto out1;
4071 register_as_ext2();
4072 register_as_ext3();
3998 err = register_filesystem(&ext4_fs_type); 4073 err = register_filesystem(&ext4_fs_type);
3999 if (err) 4074 if (err)
4000 goto out; 4075 goto out;
4001 return 0; 4076 return 0;
4002out: 4077out:
4078 unregister_as_ext2();
4079 unregister_as_ext3();
4003 destroy_inodecache(); 4080 destroy_inodecache();
4004out1: 4081out1:
4005 exit_ext4_xattr(); 4082 exit_ext4_xattr();
@@ -4015,6 +4092,8 @@ out4:
4015 4092
4016static void __exit exit_ext4_fs(void) 4093static void __exit exit_ext4_fs(void)
4017{ 4094{
4095 unregister_as_ext2();
4096 unregister_as_ext3();
4018 unregister_filesystem(&ext4_fs_type); 4097 unregister_filesystem(&ext4_fs_type);
4019 destroy_inodecache(); 4098 destroy_inodecache();
4020 exit_ext4_xattr(); 4099 exit_ext4_xattr();
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fed5b01d7a8..f3a2f7ed45a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer, 95static int ext4_xattr_list(struct dentry *dentry, char *buffer,
96 size_t buffer_size); 96 size_t buffer_size);
97 97
98static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index)
140ssize_t 140ssize_t
141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) 141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
142{ 142{
143 return ext4_xattr_list(dentry->d_inode, buffer, size); 143 return ext4_xattr_list(dentry, buffer, size);
144} 144}
145 145
146static int 146static int
@@ -325,7 +325,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
325} 325}
326 326
327static int 327static int
328ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, 328ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
329 char *buffer, size_t buffer_size) 329 char *buffer, size_t buffer_size)
330{ 330{
331 size_t rest = buffer_size; 331 size_t rest = buffer_size;
@@ -335,9 +335,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
335 ext4_xattr_handler(entry->e_name_index); 335 ext4_xattr_handler(entry->e_name_index);
336 336
337 if (handler) { 337 if (handler) {
338 size_t size = handler->list(inode, buffer, rest, 338 size_t size = handler->list(dentry, buffer, rest,
339 entry->e_name, 339 entry->e_name,
340 entry->e_name_len); 340 entry->e_name_len,
341 handler->flags);
341 if (buffer) { 342 if (buffer) {
342 if (size > rest) 343 if (size > rest)
343 return -ERANGE; 344 return -ERANGE;
@@ -350,8 +351,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
350} 351}
351 352
352static int 353static int
353ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) 354ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
354{ 355{
356 struct inode *inode = dentry->d_inode;
355 struct buffer_head *bh = NULL; 357 struct buffer_head *bh = NULL;
356 int error; 358 int error;
357 359
@@ -376,7 +378,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
376 goto cleanup; 378 goto cleanup;
377 } 379 }
378 ext4_xattr_cache_insert(bh); 380 ext4_xattr_cache_insert(bh);
379 error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); 381 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
380 382
381cleanup: 383cleanup:
382 brelse(bh); 384 brelse(bh);
@@ -385,8 +387,9 @@ cleanup:
385} 387}
386 388
387static int 389static int
388ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) 390ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
389{ 391{
392 struct inode *inode = dentry->d_inode;
390 struct ext4_xattr_ibody_header *header; 393 struct ext4_xattr_ibody_header *header;
391 struct ext4_inode *raw_inode; 394 struct ext4_inode *raw_inode;
392 struct ext4_iloc iloc; 395 struct ext4_iloc iloc;
@@ -404,7 +407,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
404 error = ext4_xattr_check_names(IFIRST(header), end); 407 error = ext4_xattr_check_names(IFIRST(header), end);
405 if (error) 408 if (error)
406 goto cleanup; 409 goto cleanup;
407 error = ext4_xattr_list_entries(inode, IFIRST(header), 410 error = ext4_xattr_list_entries(dentry, IFIRST(header),
408 buffer, buffer_size); 411 buffer, buffer_size);
409 412
410cleanup: 413cleanup:
@@ -423,12 +426,12 @@ cleanup:
423 * used / required on success. 426 * used / required on success.
424 */ 427 */
425static int 428static int
426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 429ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
427{ 430{
428 int i_error, b_error; 431 int i_error, b_error;
429 432
430 down_read(&EXT4_I(inode)->xattr_sem); 433 down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
431 i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size); 434 i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
432 if (i_error < 0) { 435 if (i_error < 0) {
433 b_error = 0; 436 b_error = 0;
434 } else { 437 } else {
@@ -436,11 +439,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
436 buffer += i_error; 439 buffer += i_error;
437 buffer_size -= i_error; 440 buffer_size -= i_error;
438 } 441 }
439 b_error = ext4_xattr_block_list(inode, buffer, buffer_size); 442 b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
440 if (b_error < 0) 443 if (b_error < 0)
441 i_error = 0; 444 i_error = 0;
442 } 445 }
443 up_read(&EXT4_I(inode)->xattr_sem); 446 up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
444 return i_error + b_error; 447 return i_error + b_error;
445} 448}
446 449
@@ -482,9 +485,10 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
482 ea_bdebug(bh, "refcount now=0; freeing"); 485 ea_bdebug(bh, "refcount now=0; freeing");
483 if (ce) 486 if (ce)
484 mb_cache_entry_free(ce); 487 mb_cache_entry_free(ce);
485 ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
486 get_bh(bh); 488 get_bh(bh);
487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 489 ext4_free_blocks(handle, inode, bh, 0, 1,
490 EXT4_FREE_BLOCKS_METADATA |
491 EXT4_FREE_BLOCKS_FORGET);
488 } else { 492 } else {
489 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 493 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
490 error = ext4_handle_dirty_metadata(handle, inode, bh); 494 error = ext4_handle_dirty_metadata(handle, inode, bh);
@@ -832,7 +836,8 @@ inserted:
832 new_bh = sb_getblk(sb, block); 836 new_bh = sb_getblk(sb, block);
833 if (!new_bh) { 837 if (!new_bh) {
834getblk_failed: 838getblk_failed:
835 ext4_free_blocks(handle, inode, block, 1, 1); 839 ext4_free_blocks(handle, inode, 0, block, 1,
840 EXT4_FREE_BLOCKS_METADATA);
836 error = -EIO; 841 error = -EIO;
837 goto cleanup; 842 goto cleanup;
838 } 843 }
@@ -988,6 +993,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
988 if (error) 993 if (error)
989 goto cleanup; 994 goto cleanup;
990 995
996 error = ext4_journal_get_write_access(handle, is.iloc.bh);
997 if (error)
998 goto cleanup;
999
991 if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { 1000 if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
992 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); 1001 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
993 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 1002 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
@@ -1013,9 +1022,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1013 if (flags & XATTR_CREATE) 1022 if (flags & XATTR_CREATE)
1014 goto cleanup; 1023 goto cleanup;
1015 } 1024 }
1016 error = ext4_journal_get_write_access(handle, is.iloc.bh);
1017 if (error)
1018 goto cleanup;
1019 if (!value) { 1025 if (!value) {
1020 if (!is.s.not_found) 1026 if (!is.s.not_found)
1021 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 1027 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
@@ -1326,6 +1332,8 @@ retry:
1326 goto cleanup; 1332 goto cleanup;
1327 kfree(b_entry_name); 1333 kfree(b_entry_name);
1328 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1329 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1330 kfree(is); 1338 kfree(is);
1331 kfree(bs); 1339 kfree(bs);
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index ca5f89fc6ca..983c253999a 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -12,8 +12,8 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
28} 28}
29 29
30static int 30static int
31ext4_xattr_security_get(struct inode *inode, const char *name, 31ext4_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size) 32 void *buffer, size_t size, int type)
33{ 33{
34 if (strcmp(name, "") == 0) 34 if (strcmp(name, "") == 0)
35 return -EINVAL; 35 return -EINVAL;
36 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, 36 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
37 buffer, size); 37 name, buffer, size);
38} 38}
39 39
40static int 40static int
41ext4_xattr_security_set(struct inode *inode, const char *name, 41ext4_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags) 42 const void *value, size_t size, int flags, int type)
43{ 43{
44 if (strcmp(name, "") == 0) 44 if (strcmp(name, "") == 0)
45 return -EINVAL; 45 return -EINVAL;
46 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, 46 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
47 value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int 50int
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index ac1a52cf2a3..15b50edc658 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -14,8 +14,8 @@
14#include "xattr.h" 14#include "xattr.h"
15 15
16static size_t 16static size_t
17ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
18 const char *name, size_t name_len) 18 const char *name, size_t name_len, int type)
19{ 19{
20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
32} 32}
33 33
34static int 34static int
35ext4_xattr_trusted_get(struct inode *inode, const char *name, 35ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
36 void *buffer, size_t size) 36 size_t size, int type)
37{ 37{
38 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
39 return -EINVAL; 39 return -EINVAL;
40 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, 40 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
41 buffer, size); 41 name, buffer, size);
42} 42}
43 43
44static int 44static int
45ext4_xattr_trusted_set(struct inode *inode, const char *name, 45ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
47{ 47{
48 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
49 return -EINVAL; 49 return -EINVAL;
50 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, 50 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
51 value, size, flags); 51 name, value, size, flags);
52} 52}
53 53
54struct xattr_handler ext4_xattr_trusted_handler = { 54struct xattr_handler ext4_xattr_trusted_handler = {
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d91aa61b42a..c4ce05746ce 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -13,13 +13,13 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
21 21
22 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(dentry->d_sb, XATTR_USER))
23 return 0; 23 return 0;
24 24
25 if (list && total_len <= list_size) { 25 if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext4_xattr_user_get(struct inode *inode, const char *name, 34ext4_xattr_user_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size) 35 void *buffer, size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 if (!test_opt(inode->i_sb, XATTR_USER)) 39 if (!test_opt(dentry->d_sb, XATTR_USER))
40 return -EOPNOTSUPP; 40 return -EOPNOTSUPP;
41 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); 41 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
42 name, buffer, size);
42} 43}
43 44
44static int 45static int
45ext4_xattr_user_set(struct inode *inode, const char *name, 46ext4_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags, int type)
47{ 48{
48 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
49 return -EINVAL; 50 return -EINVAL;
50 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
52 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, 53 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext4_xattr_user_handler = { 57struct xattr_handler ext4_xattr_user_handler = {
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7db0979c6b7..e6efdfa0f6d 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -44,7 +44,8 @@ struct fat_mount_options {
44 nocase:1, /* Does this need case conversion? 0=need case conversion*/ 44 nocase:1, /* Does this need case conversion? 0=need case conversion*/
45 usefree:1, /* Use free_clusters for FAT32 */ 45 usefree:1, /* Use free_clusters for FAT32 */
46 tz_utc:1, /* Filesystem timestamps are in UTC */ 46 tz_utc:1, /* Filesystem timestamps are in UTC */
47 rodir:1; /* allow ATTR_RO for directory */ 47 rodir:1, /* allow ATTR_RO for directory */
48 discard:1; /* Issue discard requests on deletions */
48}; 49};
49 50
50#define FAT_HASH_BITS 8 51#define FAT_HASH_BITS 8
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index a81037721a6..81184d3b75a 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -566,16 +566,21 @@ int fat_free_clusters(struct inode *inode, int cluster)
566 goto error; 566 goto error;
567 } 567 }
568 568
569 /* 569 if (sbi->options.discard) {
570 * Issue discard for the sectors we no longer care about, 570 /*
571 * batching contiguous clusters into one request 571 * Issue discard for the sectors we no longer
572 */ 572 * care about, batching contiguous clusters
573 if (cluster != fatent.entry + 1) { 573 * into one request
574 int nr_clus = fatent.entry - first_cl + 1; 574 */
575 575 if (cluster != fatent.entry + 1) {
576 sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), 576 int nr_clus = fatent.entry - first_cl + 1;
577 nr_clus * sbi->sec_per_clus); 577
578 first_cl = cluster; 578 sb_issue_discard(sb,
579 fat_clus_to_blknr(sbi, first_cl),
580 nr_clus * sbi->sec_per_clus);
581
582 first_cl = cluster;
583 }
579 } 584 }
580 585
581 ops->ent_put(&fatent, FAT_ENT_FREE); 586 ops->ent_put(&fatent, FAT_ENT_FREE);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 76b7961ab66..14da530b05c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -858,6 +858,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
858 seq_puts(m, ",errors=panic"); 858 seq_puts(m, ",errors=panic");
859 else 859 else
860 seq_puts(m, ",errors=remount-ro"); 860 seq_puts(m, ",errors=remount-ro");
861 if (opts->discard)
862 seq_puts(m, ",discard");
861 863
862 return 0; 864 return 0;
863} 865}
@@ -871,7 +873,7 @@ enum {
871 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 873 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
872 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 874 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
873 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, 875 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
874 Opt_err_panic, Opt_err_ro, Opt_err, 876 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err,
875}; 877};
876 878
877static const match_table_t fat_tokens = { 879static const match_table_t fat_tokens = {
@@ -899,6 +901,7 @@ static const match_table_t fat_tokens = {
899 {Opt_err_cont, "errors=continue"}, 901 {Opt_err_cont, "errors=continue"},
900 {Opt_err_panic, "errors=panic"}, 902 {Opt_err_panic, "errors=panic"},
901 {Opt_err_ro, "errors=remount-ro"}, 903 {Opt_err_ro, "errors=remount-ro"},
904 {Opt_discard, "discard"},
902 {Opt_obsolate, "conv=binary"}, 905 {Opt_obsolate, "conv=binary"},
903 {Opt_obsolate, "conv=text"}, 906 {Opt_obsolate, "conv=text"},
904 {Opt_obsolate, "conv=auto"}, 907 {Opt_obsolate, "conv=auto"},
@@ -1136,6 +1139,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1136 case Opt_rodir: 1139 case Opt_rodir:
1137 opts->rodir = 1; 1140 opts->rodir = 1;
1138 break; 1141 break;
1142 case Opt_discard:
1143 opts->discard = 1;
1144 break;
1139 1145
1140 /* obsolete mount options */ 1146 /* obsolete mount options */
1141 case Opt_obsolate: 1147 case Opt_obsolate:
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 0f55f5cb732..d3da05f2646 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/time.h>
12#include "fat.h" 13#include "fat.h"
13 14
14/* 15/*
@@ -157,10 +158,6 @@ extern struct timezone sys_tz;
157#define SECS_PER_MIN 60 158#define SECS_PER_MIN 60
158#define SECS_PER_HOUR (60 * 60) 159#define SECS_PER_HOUR (60 * 60)
159#define SECS_PER_DAY (SECS_PER_HOUR * 24) 160#define SECS_PER_DAY (SECS_PER_HOUR * 24)
160#define UNIX_SECS_1980 315532800L
161#if BITS_PER_LONG == 64
162#define UNIX_SECS_2108 4354819200L
163#endif
164/* days between 1.1.70 and 1.1.80 (2 leap days) */ 161/* days between 1.1.70 and 1.1.80 (2 leap days) */
165#define DAYS_DELTA (365 * 10 + 2) 162#define DAYS_DELTA (365 * 10 + 2)
166/* 120 (2100 - 1980) isn't leap year */ 163/* 120 (2100 - 1980) isn't leap year */
@@ -213,58 +210,35 @@ void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
213void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, 210void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
214 __le16 *time, __le16 *date, u8 *time_cs) 211 __le16 *time, __le16 *date, u8 *time_cs)
215{ 212{
216 time_t second = ts->tv_sec; 213 struct tm tm;
217 time_t day, leap_day, month, year; 214 time_to_tm(ts->tv_sec, sbi->options.tz_utc ? 0 :
215 -sys_tz.tz_minuteswest * 60, &tm);
218 216
219 if (!sbi->options.tz_utc) 217 /* FAT can only support year between 1980 to 2107 */
220 second -= sys_tz.tz_minuteswest * SECS_PER_MIN; 218 if (tm.tm_year < 1980 - 1900) {
221
222 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
223 if (second < UNIX_SECS_1980) {
224 *time = 0; 219 *time = 0;
225 *date = cpu_to_le16((0 << 9) | (1 << 5) | 1); 220 *date = cpu_to_le16((0 << 9) | (1 << 5) | 1);
226 if (time_cs) 221 if (time_cs)
227 *time_cs = 0; 222 *time_cs = 0;
228 return; 223 return;
229 } 224 }
230#if BITS_PER_LONG == 64 225 if (tm.tm_year > 2107 - 1900) {
231 if (second >= UNIX_SECS_2108) {
232 *time = cpu_to_le16((23 << 11) | (59 << 5) | 29); 226 *time = cpu_to_le16((23 << 11) | (59 << 5) | 29);
233 *date = cpu_to_le16((127 << 9) | (12 << 5) | 31); 227 *date = cpu_to_le16((127 << 9) | (12 << 5) | 31);
234 if (time_cs) 228 if (time_cs)
235 *time_cs = 199; 229 *time_cs = 199;
236 return; 230 return;
237 } 231 }
238#endif
239 232
240 day = second / SECS_PER_DAY - DAYS_DELTA; 233 /* from 1900 -> from 1980 */
241 year = day / 365; 234 tm.tm_year -= 80;
242 leap_day = (year + 3) / 4; 235 /* 0~11 -> 1~12 */
243 if (year > YEAR_2100) /* 2100 isn't leap year */ 236 tm.tm_mon++;
244 leap_day--; 237 /* 0~59 -> 0~29(2sec counts) */
245 if (year * 365 + leap_day > day) 238 tm.tm_sec >>= 1;
246 year--;
247 leap_day = (year + 3) / 4;
248 if (year > YEAR_2100) /* 2100 isn't leap year */
249 leap_day--;
250 day -= year * 365 + leap_day;
251
252 if (IS_LEAP_YEAR(year) && day == days_in_year[3]) {
253 month = 2;
254 } else {
255 if (IS_LEAP_YEAR(year) && day > days_in_year[3])
256 day--;
257 for (month = 1; month < 12; month++) {
258 if (days_in_year[month + 1] > day)
259 break;
260 }
261 }
262 day -= days_in_year[month];
263 239
264 *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11 240 *time = cpu_to_le16(tm.tm_hour << 11 | tm.tm_min << 5 | tm.tm_sec);
265 | ((second / SECS_PER_MIN) % 60) << 5 241 *date = cpu_to_le16(tm.tm_year << 9 | tm.tm_mon << 5 | tm.tm_mday);
266 | (second % SECS_PER_MIN) >> 1);
267 *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1));
268 if (time_cs) 242 if (time_cs)
269 *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000; 243 *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000;
270} 244}
@@ -285,4 +259,3 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
285 } 259 }
286 return err; 260 return err;
287} 261}
288
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2cf93ec40a6..97e01dc0d95 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -618,60 +618,90 @@ static DEFINE_RWLOCK(fasync_lock);
618static struct kmem_cache *fasync_cache __read_mostly; 618static struct kmem_cache *fasync_cache __read_mostly;
619 619
620/* 620/*
621 * fasync_helper() is used by almost all character device drivers 621 * Remove a fasync entry. If successfully removed, return
622 * to set up the fasync queue. It returns negative on error, 0 if it did 622 * positive and clear the FASYNC flag. If no entry exists,
623 * no changes and positive if it added/deleted the entry. 623 * do nothing and return 0.
624 *
625 * NOTE! It is very important that the FASYNC flag always
626 * match the state "is the filp on a fasync list".
627 *
628 * We always take the 'filp->f_lock', in since fasync_lock
629 * needs to be irq-safe.
624 */ 630 */
625int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 631static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
626{ 632{
627 struct fasync_struct *fa, **fp; 633 struct fasync_struct *fa, **fp;
628 struct fasync_struct *new = NULL;
629 int result = 0; 634 int result = 0;
630 635
631 if (on) { 636 spin_lock(&filp->f_lock);
632 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); 637 write_lock_irq(&fasync_lock);
633 if (!new) 638 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
634 return -ENOMEM; 639 if (fa->fa_file != filp)
640 continue;
641 *fp = fa->fa_next;
642 kmem_cache_free(fasync_cache, fa);
643 filp->f_flags &= ~FASYNC;
644 result = 1;
645 break;
635 } 646 }
647 write_unlock_irq(&fasync_lock);
648 spin_unlock(&filp->f_lock);
649 return result;
650}
651
652/*
653 * Add a fasync entry. Return negative on error, positive if
654 * added, and zero if did nothing but change an existing one.
655 *
656 * NOTE! It is very important that the FASYNC flag always
657 * match the state "is the filp on a fasync list".
658 */
659static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
660{
661 struct fasync_struct *new, *fa, **fp;
662 int result = 0;
663
664 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
665 if (!new)
666 return -ENOMEM;
636 667
637 /*
638 * We need to take f_lock first since it's not an IRQ-safe
639 * lock.
640 */
641 spin_lock(&filp->f_lock); 668 spin_lock(&filp->f_lock);
642 write_lock_irq(&fasync_lock); 669 write_lock_irq(&fasync_lock);
643 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 670 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
644 if (fa->fa_file == filp) { 671 if (fa->fa_file != filp)
645 if(on) { 672 continue;
646 fa->fa_fd = fd; 673 fa->fa_fd = fd;
647 kmem_cache_free(fasync_cache, new); 674 kmem_cache_free(fasync_cache, new);
648 } else { 675 goto out;
649 *fp = fa->fa_next;
650 kmem_cache_free(fasync_cache, fa);
651 result = 1;
652 }
653 goto out;
654 }
655 } 676 }
656 677
657 if (on) { 678 new->magic = FASYNC_MAGIC;
658 new->magic = FASYNC_MAGIC; 679 new->fa_file = filp;
659 new->fa_file = filp; 680 new->fa_fd = fd;
660 new->fa_fd = fd; 681 new->fa_next = *fapp;
661 new->fa_next = *fapp; 682 *fapp = new;
662 *fapp = new; 683 result = 1;
663 result = 1; 684 filp->f_flags |= FASYNC;
664 } 685
665out: 686out:
666 if (on)
667 filp->f_flags |= FASYNC;
668 else
669 filp->f_flags &= ~FASYNC;
670 write_unlock_irq(&fasync_lock); 687 write_unlock_irq(&fasync_lock);
671 spin_unlock(&filp->f_lock); 688 spin_unlock(&filp->f_lock);
672 return result; 689 return result;
673} 690}
674 691
692/*
693 * fasync_helper() is used by almost all character device drivers
694 * to set up the fasync queue, and for regular files by the file
695 * lease code. It returns negative on error, 0 if it did no changes
696 * and positive if it added/deleted the entry.
697 */
698int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
699{
700 if (!on)
701 return fasync_remove_entry(filp, fapp);
702 return fasync_add_entry(fd, filp, fapp);
703}
704
675EXPORT_SYMBOL(fasync_helper); 705EXPORT_SYMBOL(fasync_helper);
676 706
677void __kill_fasync(struct fasync_struct *fa, int sig, int band) 707void __kill_fasync(struct fasync_struct *fa, int sig, int band)
diff --git a/fs/file_table.c b/fs/file_table.c
index 8eb44042e00..69652c5bd5f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,7 +13,6 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/ima.h>
17#include <linux/eventpoll.h> 16#include <linux/eventpoll.h>
18#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
19#include <linux/mount.h> 18#include <linux/mount.h>
@@ -22,9 +21,12 @@
22#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
23#include <linux/sysctl.h> 22#include <linux/sysctl.h>
24#include <linux/percpu_counter.h> 23#include <linux/percpu_counter.h>
24#include <linux/ima.h>
25 25
26#include <asm/atomic.h> 26#include <asm/atomic.h>
27 27
28#include "internal.h"
29
28/* sysctl tunables... */ 30/* sysctl tunables... */
29struct files_stat_struct files_stat = { 31struct files_stat_struct files_stat = {
30 .max_files = NR_FILE 32 .max_files = NR_FILE
@@ -148,8 +150,6 @@ fail:
148 return NULL; 150 return NULL;
149} 151}
150 152
151EXPORT_SYMBOL(get_empty_filp);
152
153/** 153/**
154 * alloc_file - allocate and initialize a 'struct file' 154 * alloc_file - allocate and initialize a 'struct file'
155 * @mnt: the vfsmount on which the file will reside 155 * @mnt: the vfsmount on which the file will reside
@@ -165,8 +165,8 @@ EXPORT_SYMBOL(get_empty_filp);
165 * If all the callers of init_file() are eliminated, its 165 * If all the callers of init_file() are eliminated, its
166 * code should be moved into this function. 166 * code should be moved into this function.
167 */ 167 */
168struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, 168struct file *alloc_file(struct path *path, fmode_t mode,
169 fmode_t mode, const struct file_operations *fop) 169 const struct file_operations *fop)
170{ 170{
171 struct file *file; 171 struct file *file;
172 172
@@ -174,35 +174,8 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
174 if (!file) 174 if (!file)
175 return NULL; 175 return NULL;
176 176
177 init_file(file, mnt, dentry, mode, fop); 177 file->f_path = *path;
178 return file; 178 file->f_mapping = path->dentry->d_inode->i_mapping;
179}
180EXPORT_SYMBOL(alloc_file);
181
182/**
183 * init_file - initialize a 'struct file'
184 * @file: the already allocated 'struct file' to initialized
185 * @mnt: the vfsmount on which the file resides
186 * @dentry: the dentry representing this file
187 * @mode: the mode the file is opened with
188 * @fop: the 'struct file_operations' for this file
189 *
190 * Use this instead of setting the members directly. Doing so
191 * avoids making mistakes like forgetting the mntget() or
192 * forgetting to take a write on the mnt.
193 *
194 * Note: This is a crappy interface. It is here to make
195 * merging with the existing users of get_empty_filp()
196 * who have complex failure logic easier. All users
197 * of this should be moving to alloc_file().
198 */
199int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
200 fmode_t mode, const struct file_operations *fop)
201{
202 int error = 0;
203 file->f_path.dentry = dentry;
204 file->f_path.mnt = mntget(mnt);
205 file->f_mapping = dentry->d_inode->i_mapping;
206 file->f_mode = mode; 179 file->f_mode = mode;
207 file->f_op = fop; 180 file->f_op = fop;
208 181
@@ -212,14 +185,14 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
212 * visible. We do this for consistency, and so 185 * visible. We do this for consistency, and so
213 * that we can do debugging checks at __fput() 186 * that we can do debugging checks at __fput()
214 */ 187 */
215 if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { 188 if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
216 file_take_write(file); 189 file_take_write(file);
217 error = mnt_clone_write(mnt); 190 WARN_ON(mnt_clone_write(path->mnt));
218 WARN_ON(error);
219 } 191 }
220 return error; 192 ima_counts_get(file);
193 return file;
221} 194}
222EXPORT_SYMBOL(init_file); 195EXPORT_SYMBOL(alloc_file);
223 196
224void fput(struct file *file) 197void fput(struct file *file)
225{ 198{
@@ -280,7 +253,6 @@ void __fput(struct file *file)
280 if (file->f_op && file->f_op->release) 253 if (file->f_op && file->f_op->release)
281 file->f_op->release(inode, file); 254 file->f_op->release(inode, file);
282 security_file_free(file); 255 security_file_free(file);
283 ima_file_free(file);
284 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 256 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
285 cdev_put(inode->i_cdev); 257 cdev_put(inode->i_cdev);
286 fops_put(file->f_op); 258 fops_put(file->f_op);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9d5360c4c2a..1a7c42c64ff 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -242,6 +242,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
242/** 242/**
243 * bdi_start_writeback - start writeback 243 * bdi_start_writeback - start writeback
244 * @bdi: the backing device to write from 244 * @bdi: the backing device to write from
245 * @sb: write inodes from this super_block
245 * @nr_pages: the number of pages to write 246 * @nr_pages: the number of pages to write
246 * 247 *
247 * Description: 248 * Description:
@@ -614,7 +615,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
614 struct writeback_control *wbc) 615 struct writeback_control *wbc)
615{ 616{
616 struct super_block *sb = wbc->sb, *pin_sb = NULL; 617 struct super_block *sb = wbc->sb, *pin_sb = NULL;
617 const int is_blkdev_sb = sb_is_blkdev_sb(sb);
618 const unsigned long start = jiffies; /* livelock avoidance */ 618 const unsigned long start = jiffies; /* livelock avoidance */
619 619
620 spin_lock(&inode_lock); 620 spin_lock(&inode_lock);
@@ -635,36 +635,11 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
635 continue; 635 continue;
636 } 636 }
637 637
638 if (!bdi_cap_writeback_dirty(wb->bdi)) {
639 redirty_tail(inode);
640 if (is_blkdev_sb) {
641 /*
642 * Dirty memory-backed blockdev: the ramdisk
643 * driver does this. Skip just this inode
644 */
645 continue;
646 }
647 /*
648 * Dirty memory-backed inode against a filesystem other
649 * than the kernel-internal bdev filesystem. Skip the
650 * entire superblock.
651 */
652 break;
653 }
654
655 if (inode->i_state & (I_NEW | I_WILL_FREE)) { 638 if (inode->i_state & (I_NEW | I_WILL_FREE)) {
656 requeue_io(inode); 639 requeue_io(inode);
657 continue; 640 continue;
658 } 641 }
659 642
660 if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
661 wbc->encountered_congestion = 1;
662 if (!is_blkdev_sb)
663 break; /* Skip a congested fs */
664 requeue_io(inode);
665 continue; /* Skip a congested blockdev */
666 }
667
668 /* 643 /*
669 * Was this inode dirtied after sync_sb_inodes was called? 644 * Was this inode dirtied after sync_sb_inodes was called?
670 * This keeps sync from extra jobs and livelock. 645 * This keeps sync from extra jobs and livelock.
@@ -756,6 +731,7 @@ static long wb_writeback(struct bdi_writeback *wb,
756 .sync_mode = args->sync_mode, 731 .sync_mode = args->sync_mode,
757 .older_than_this = NULL, 732 .older_than_this = NULL,
758 .for_kupdate = args->for_kupdate, 733 .for_kupdate = args->for_kupdate,
734 .for_background = args->for_background,
759 .range_cyclic = args->range_cyclic, 735 .range_cyclic = args->range_cyclic,
760 }; 736 };
761 unsigned long oldest_jif; 737 unsigned long oldest_jif;
@@ -787,7 +763,6 @@ static long wb_writeback(struct bdi_writeback *wb,
787 break; 763 break;
788 764
789 wbc.more_io = 0; 765 wbc.more_io = 0;
790 wbc.encountered_congestion = 0;
791 wbc.nr_to_write = MAX_WRITEBACK_PAGES; 766 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
792 wbc.pages_skipped = 0; 767 wbc.pages_skipped = 0;
793 writeback_inodes_wb(wb, &wbc); 768 writeback_inodes_wb(wb, &wbc);
@@ -1213,6 +1188,23 @@ void writeback_inodes_sb(struct super_block *sb)
1213EXPORT_SYMBOL(writeback_inodes_sb); 1188EXPORT_SYMBOL(writeback_inodes_sb);
1214 1189
1215/** 1190/**
1191 * writeback_inodes_sb_if_idle - start writeback if none underway
1192 * @sb: the superblock
1193 *
1194 * Invoke writeback_inodes_sb if no writeback is currently underway.
1195 * Returns 1 if writeback was started, 0 if not.
1196 */
1197int writeback_inodes_sb_if_idle(struct super_block *sb)
1198{
1199 if (!writeback_in_progress(sb->s_bdi)) {
1200 writeback_inodes_sb(sb);
1201 return 1;
1202 } else
1203 return 0;
1204}
1205EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1206
1207/**
1216 * sync_inodes_sb - sync sb inode pages 1208 * sync_inodes_sb - sync sb inode pages
1217 * @sb: the superblock 1209 * @sb: the superblock
1218 * 1210 *
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index e590242fa41..3221a0c7944 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -91,7 +91,7 @@ EXPORT_SYMBOL(fscache_object_destroy);
91 */ 91 */
92static struct fscache_object *fscache_objlist_lookup(loff_t *_pos) 92static struct fscache_object *fscache_objlist_lookup(loff_t *_pos)
93{ 93{
94 struct fscache_object *pobj, *obj, *minobj = NULL; 94 struct fscache_object *pobj, *obj = NULL, *minobj = NULL;
95 struct rb_node *p; 95 struct rb_node *p;
96 unsigned long pos; 96 unsigned long pos;
97 97
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index e0b53aa7bbe..55458031e50 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -1,62 +1,58 @@
1/* 1/*
2 * fs/generic_acl.c
3 *
4 * (C) 2005 Andreas Gruenbacher <agruen@suse.de> 2 * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
5 * 3 *
6 * This file is released under the GPL. 4 * This file is released under the GPL.
5 *
6 * Generic ACL support for in-memory filesystems.
7 */ 7 */
8 8
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/generic_acl.h> 11#include <linux/generic_acl.h>
12#include <linux/posix_acl.h>
13#include <linux/posix_acl_xattr.h>
12 14
13/** 15
14 * generic_acl_list - Generic xattr_handler->list() operation 16static size_t
15 * @ops: Filesystem specific getacl and setacl callbacks 17generic_acl_list(struct dentry *dentry, char *list, size_t list_size,
16 */ 18 const char *name, size_t name_len, int type)
17size_t
18generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
19 int type, char *list, size_t list_size)
20{ 19{
21 struct posix_acl *acl; 20 struct posix_acl *acl;
22 const char *name; 21 const char *xname;
23 size_t size; 22 size_t size;
24 23
25 acl = ops->getacl(inode, type); 24 acl = get_cached_acl(dentry->d_inode, type);
26 if (!acl) 25 if (!acl)
27 return 0; 26 return 0;
28 posix_acl_release(acl); 27 posix_acl_release(acl);
29 28
30 switch(type) { 29 switch (type) {
31 case ACL_TYPE_ACCESS: 30 case ACL_TYPE_ACCESS:
32 name = POSIX_ACL_XATTR_ACCESS; 31 xname = POSIX_ACL_XATTR_ACCESS;
33 break; 32 break;
34 33 case ACL_TYPE_DEFAULT:
35 case ACL_TYPE_DEFAULT: 34 xname = POSIX_ACL_XATTR_DEFAULT;
36 name = POSIX_ACL_XATTR_DEFAULT; 35 break;
37 break; 36 default:
38 37 return 0;
39 default:
40 return 0;
41 } 38 }
42 size = strlen(name) + 1; 39 size = strlen(xname) + 1;
43 if (list && size <= list_size) 40 if (list && size <= list_size)
44 memcpy(list, name, size); 41 memcpy(list, xname, size);
45 return size; 42 return size;
46} 43}
47 44
48/** 45static int
49 * generic_acl_get - Generic xattr_handler->get() operation 46generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
50 * @ops: Filesystem specific getacl and setacl callbacks 47 size_t size, int type)
51 */
52int
53generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
54 int type, void *buffer, size_t size)
55{ 48{
56 struct posix_acl *acl; 49 struct posix_acl *acl;
57 int error; 50 int error;
58 51
59 acl = ops->getacl(inode, type); 52 if (strcmp(name, "") != 0)
53 return -EINVAL;
54
55 acl = get_cached_acl(dentry->d_inode, type);
60 if (!acl) 56 if (!acl)
61 return -ENODATA; 57 return -ENODATA;
62 error = posix_acl_to_xattr(acl, buffer, size); 58 error = posix_acl_to_xattr(acl, buffer, size);
@@ -65,17 +61,16 @@ generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
65 return error; 61 return error;
66} 62}
67 63
68/** 64static int
69 * generic_acl_set - Generic xattr_handler->set() operation 65generic_acl_set(struct dentry *dentry, const char *name, const void *value,
70 * @ops: Filesystem specific getacl and setacl callbacks 66 size_t size, int flags, int type)
71 */
72int
73generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
74 int type, const void *value, size_t size)
75{ 67{
68 struct inode *inode = dentry->d_inode;
76 struct posix_acl *acl = NULL; 69 struct posix_acl *acl = NULL;
77 int error; 70 int error;
78 71
72 if (strcmp(name, "") != 0)
73 return -EINVAL;
79 if (S_ISLNK(inode->i_mode)) 74 if (S_ISLNK(inode->i_mode))
80 return -EOPNOTSUPP; 75 return -EOPNOTSUPP;
81 if (!is_owner_or_cap(inode)) 76 if (!is_owner_or_cap(inode))
@@ -91,28 +86,27 @@ generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
91 error = posix_acl_valid(acl); 86 error = posix_acl_valid(acl);
92 if (error) 87 if (error)
93 goto failed; 88 goto failed;
94 switch(type) { 89 switch (type) {
95 case ACL_TYPE_ACCESS: 90 case ACL_TYPE_ACCESS:
96 mode = inode->i_mode; 91 mode = inode->i_mode;
97 error = posix_acl_equiv_mode(acl, &mode); 92 error = posix_acl_equiv_mode(acl, &mode);
98 if (error < 0) 93 if (error < 0)
99 goto failed; 94 goto failed;
100 inode->i_mode = mode; 95 inode->i_mode = mode;
101 if (error == 0) { 96 if (error == 0) {
102 posix_acl_release(acl); 97 posix_acl_release(acl);
103 acl = NULL; 98 acl = NULL;
104 } 99 }
105 break; 100 break;
106 101 case ACL_TYPE_DEFAULT:
107 case ACL_TYPE_DEFAULT: 102 if (!S_ISDIR(inode->i_mode)) {
108 if (!S_ISDIR(inode->i_mode)) { 103 error = -EINVAL;
109 error = -EINVAL; 104 goto failed;
110 goto failed; 105 }
111 } 106 break;
112 break;
113 } 107 }
114 } 108 }
115 ops->setacl(inode, type, acl); 109 set_cached_acl(inode, type, acl);
116 error = 0; 110 error = 0;
117failed: 111failed:
118 posix_acl_release(acl); 112 posix_acl_release(acl);
@@ -121,14 +115,12 @@ failed:
121 115
122/** 116/**
123 * generic_acl_init - Take care of acl inheritance at @inode create time 117 * generic_acl_init - Take care of acl inheritance at @inode create time
124 * @ops: Filesystem specific getacl and setacl callbacks
125 * 118 *
126 * Files created inside a directory with a default ACL inherit the 119 * Files created inside a directory with a default ACL inherit the
127 * directory's default ACL. 120 * directory's default ACL.
128 */ 121 */
129int 122int
130generic_acl_init(struct inode *inode, struct inode *dir, 123generic_acl_init(struct inode *inode, struct inode *dir)
131 struct generic_acl_operations *ops)
132{ 124{
133 struct posix_acl *acl = NULL; 125 struct posix_acl *acl = NULL;
134 mode_t mode = inode->i_mode; 126 mode_t mode = inode->i_mode;
@@ -136,7 +128,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
136 128
137 inode->i_mode = mode & ~current_umask(); 129 inode->i_mode = mode & ~current_umask();
138 if (!S_ISLNK(inode->i_mode)) 130 if (!S_ISLNK(inode->i_mode))
139 acl = ops->getacl(dir, ACL_TYPE_DEFAULT); 131 acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
140 if (acl) { 132 if (acl) {
141 struct posix_acl *clone; 133 struct posix_acl *clone;
142 134
@@ -145,7 +137,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
145 error = -ENOMEM; 137 error = -ENOMEM;
146 if (!clone) 138 if (!clone)
147 goto cleanup; 139 goto cleanup;
148 ops->setacl(inode, ACL_TYPE_DEFAULT, clone); 140 set_cached_acl(inode, ACL_TYPE_DEFAULT, clone);
149 posix_acl_release(clone); 141 posix_acl_release(clone);
150 } 142 }
151 clone = posix_acl_clone(acl, GFP_KERNEL); 143 clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -156,7 +148,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
156 if (error >= 0) { 148 if (error >= 0) {
157 inode->i_mode = mode; 149 inode->i_mode = mode;
158 if (error > 0) 150 if (error > 0)
159 ops->setacl(inode, ACL_TYPE_ACCESS, clone); 151 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
160 } 152 }
161 posix_acl_release(clone); 153 posix_acl_release(clone);
162 } 154 }
@@ -169,20 +161,19 @@ cleanup:
169 161
170/** 162/**
171 * generic_acl_chmod - change the access acl of @inode upon chmod() 163 * generic_acl_chmod - change the access acl of @inode upon chmod()
172 * @ops: FIlesystem specific getacl and setacl callbacks
173 * 164 *
174 * A chmod also changes the permissions of the owner, group/mask, and 165 * A chmod also changes the permissions of the owner, group/mask, and
175 * other ACL entries. 166 * other ACL entries.
176 */ 167 */
177int 168int
178generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops) 169generic_acl_chmod(struct inode *inode)
179{ 170{
180 struct posix_acl *acl, *clone; 171 struct posix_acl *acl, *clone;
181 int error = 0; 172 int error = 0;
182 173
183 if (S_ISLNK(inode->i_mode)) 174 if (S_ISLNK(inode->i_mode))
184 return -EOPNOTSUPP; 175 return -EOPNOTSUPP;
185 acl = ops->getacl(inode, ACL_TYPE_ACCESS); 176 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
186 if (acl) { 177 if (acl) {
187 clone = posix_acl_clone(acl, GFP_KERNEL); 178 clone = posix_acl_clone(acl, GFP_KERNEL);
188 posix_acl_release(acl); 179 posix_acl_release(acl);
@@ -190,8 +181,37 @@ generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
190 return -ENOMEM; 181 return -ENOMEM;
191 error = posix_acl_chmod_masq(clone, inode->i_mode); 182 error = posix_acl_chmod_masq(clone, inode->i_mode);
192 if (!error) 183 if (!error)
193 ops->setacl(inode, ACL_TYPE_ACCESS, clone); 184 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
194 posix_acl_release(clone); 185 posix_acl_release(clone);
195 } 186 }
196 return error; 187 return error;
197} 188}
189
190int
191generic_check_acl(struct inode *inode, int mask)
192{
193 struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
194
195 if (acl) {
196 int error = posix_acl_permission(inode, acl, mask);
197 posix_acl_release(acl);
198 return error;
199 }
200 return -EAGAIN;
201}
202
203struct xattr_handler generic_acl_access_handler = {
204 .prefix = POSIX_ACL_XATTR_ACCESS,
205 .flags = ACL_TYPE_ACCESS,
206 .list = generic_acl_list,
207 .get = generic_acl_get,
208 .set = generic_acl_set,
209};
210
211struct xattr_handler generic_acl_default_handler = {
212 .prefix = POSIX_ACL_XATTR_DEFAULT,
213 .flags = ACL_TYPE_DEFAULT,
214 .list = generic_acl_list,
215 .get = generic_acl_get,
216 .set = generic_acl_set,
217};
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 5971359d209..4dcddf83326 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -8,6 +8,8 @@ config GFS2_FS
8 select FS_POSIX_ACL 8 select FS_POSIX_ACL
9 select CRC32 9 select CRC32
10 select SLOW_WORK 10 select SLOW_WORK
11 select QUOTA
12 select QUOTACTL
11 help 13 help
12 A cluster filesystem. 14 A cluster filesystem.
13 15
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3fc4e3ac7d8..87ee309d4c2 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -12,6 +12,7 @@
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/completion.h> 13#include <linux/completion.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
15#include <linux/posix_acl.h> 16#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h> 17#include <linux/posix_acl_xattr.h>
17#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
@@ -26,108 +27,44 @@
26#include "trans.h" 27#include "trans.h"
27#include "util.h" 28#include "util.h"
28 29
29#define ACL_ACCESS 1 30static const char *gfs2_acl_name(int type)
30#define ACL_DEFAULT 0
31
32int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
33 struct gfs2_ea_request *er, int *remove, mode_t *mode)
34{ 31{
35 struct posix_acl *acl; 32 switch (type) {
36 int error; 33 case ACL_TYPE_ACCESS:
37 34 return GFS2_POSIX_ACL_ACCESS;
38 error = gfs2_acl_validate_remove(ip, access); 35 case ACL_TYPE_DEFAULT:
39 if (error) 36 return GFS2_POSIX_ACL_DEFAULT;
40 return error;
41
42 if (!er->er_data)
43 return -EINVAL;
44
45 acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
46 if (IS_ERR(acl))
47 return PTR_ERR(acl);
48 if (!acl) {
49 *remove = 1;
50 return 0;
51 }
52
53 error = posix_acl_valid(acl);
54 if (error)
55 goto out;
56
57 if (access) {
58 error = posix_acl_equiv_mode(acl, mode);
59 if (!error)
60 *remove = 1;
61 else if (error > 0)
62 error = 0;
63 } 37 }
64 38 return NULL;
65out:
66 posix_acl_release(acl);
67 return error;
68}
69
70int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
71{
72 if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl)
73 return -EOPNOTSUPP;
74 if (!is_owner_or_cap(&ip->i_inode))
75 return -EPERM;
76 if (S_ISLNK(ip->i_inode.i_mode))
77 return -EOPNOTSUPP;
78 if (!access && !S_ISDIR(ip->i_inode.i_mode))
79 return -EACCES;
80
81 return 0;
82} 39}
83 40
84static int acl_get(struct gfs2_inode *ip, const char *name, 41static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
85 struct posix_acl **acl, struct gfs2_ea_location *el,
86 char **datap, unsigned int *lenp)
87{ 42{
43 struct posix_acl *acl;
44 const char *name;
88 char *data; 45 char *data;
89 unsigned int len; 46 int len;
90 int error;
91
92 el->el_bh = NULL;
93 47
94 if (!ip->i_eattr) 48 if (!ip->i_eattr)
95 return 0; 49 return NULL;
96
97 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, el);
98 if (error)
99 return error;
100 if (!el->el_ea)
101 return 0;
102 if (!GFS2_EA_DATA_LEN(el->el_ea))
103 goto out;
104 50
105 len = GFS2_EA_DATA_LEN(el->el_ea); 51 acl = get_cached_acl(&ip->i_inode, type);
106 data = kmalloc(len, GFP_NOFS); 52 if (acl != ACL_NOT_CACHED)
107 error = -ENOMEM; 53 return acl;
108 if (!data)
109 goto out;
110 54
111 error = gfs2_ea_get_copy(ip, el, data, len); 55 name = gfs2_acl_name(type);
112 if (error < 0) 56 if (name == NULL)
113 goto out_kfree; 57 return ERR_PTR(-EINVAL);
114 error = 0;
115 58
116 if (acl) { 59 len = gfs2_xattr_acl_get(ip, name, &data);
117 *acl = posix_acl_from_xattr(data, len); 60 if (len < 0)
118 if (IS_ERR(*acl)) 61 return ERR_PTR(len);
119 error = PTR_ERR(*acl); 62 if (len == 0)
120 } 63 return NULL;
121 64
122out_kfree: 65 acl = posix_acl_from_xattr(data, len);
123 if (error || !datap) { 66 kfree(data);
124 kfree(data); 67 return acl;
125 } else {
126 *datap = data;
127 *lenp = len;
128 }
129out:
130 return error;
131} 68}
132 69
133/** 70/**
@@ -140,14 +77,12 @@ out:
140 77
141int gfs2_check_acl(struct inode *inode, int mask) 78int gfs2_check_acl(struct inode *inode, int mask)
142{ 79{
143 struct gfs2_ea_location el; 80 struct posix_acl *acl;
144 struct posix_acl *acl = NULL;
145 int error; 81 int error;
146 82
147 error = acl_get(GFS2_I(inode), GFS2_POSIX_ACL_ACCESS, &acl, &el, NULL, NULL); 83 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
148 brelse(el.el_bh); 84 if (IS_ERR(acl))
149 if (error) 85 return PTR_ERR(acl);
150 return error;
151 86
152 if (acl) { 87 if (acl) {
153 error = posix_acl_permission(inode, acl, mask); 88 error = posix_acl_permission(inode, acl, mask);
@@ -158,57 +93,75 @@ int gfs2_check_acl(struct inode *inode, int mask)
158 return -EAGAIN; 93 return -EAGAIN;
159} 94}
160 95
161static int munge_mode(struct gfs2_inode *ip, mode_t mode) 96static int gfs2_set_mode(struct inode *inode, mode_t mode)
162{ 97{
163 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 98 int error = 0;
164 struct buffer_head *dibh;
165 int error;
166 99
167 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 100 if (mode != inode->i_mode) {
168 if (error) 101 struct iattr iattr;
169 return error;
170 102
171 error = gfs2_meta_inode_buffer(ip, &dibh); 103 iattr.ia_valid = ATTR_MODE;
172 if (!error) { 104 iattr.ia_mode = mode;
173 gfs2_assert_withdraw(sdp, 105
174 (ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT)); 106 error = gfs2_setattr_simple(GFS2_I(inode), &iattr);
175 ip->i_inode.i_mode = mode;
176 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
177 gfs2_dinode_out(ip, dibh->b_data);
178 brelse(dibh);
179 } 107 }
180 108
181 gfs2_trans_end(sdp); 109 return error;
110}
111
112static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
113{
114 int error;
115 int len;
116 char *data;
117 const char *name = gfs2_acl_name(type);
182 118
183 return 0; 119 BUG_ON(name == NULL);
120 len = posix_acl_to_xattr(acl, NULL, 0);
121 if (len == 0)
122 return 0;
123 data = kmalloc(len, GFP_NOFS);
124 if (data == NULL)
125 return -ENOMEM;
126 error = posix_acl_to_xattr(acl, data, len);
127 if (error < 0)
128 goto out;
129 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
130 if (!error)
131 set_cached_acl(inode, type, acl);
132out:
133 kfree(data);
134 return error;
184} 135}
185 136
186int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) 137int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
187{ 138{
188 struct gfs2_ea_location el;
189 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 139 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
190 struct posix_acl *acl = NULL, *clone; 140 struct posix_acl *acl, *clone;
191 mode_t mode = ip->i_inode.i_mode; 141 mode_t mode = inode->i_mode;
192 char *data = NULL; 142 int error = 0;
193 unsigned int len;
194 int error;
195 143
196 if (!sdp->sd_args.ar_posix_acl) 144 if (!sdp->sd_args.ar_posix_acl)
197 return 0; 145 return 0;
198 if (S_ISLNK(ip->i_inode.i_mode)) 146 if (S_ISLNK(inode->i_mode))
199 return 0; 147 return 0;
200 148
201 error = acl_get(dip, GFS2_POSIX_ACL_DEFAULT, &acl, &el, &data, &len); 149 acl = gfs2_acl_get(dip, ACL_TYPE_DEFAULT);
202 brelse(el.el_bh); 150 if (IS_ERR(acl))
203 if (error) 151 return PTR_ERR(acl);
204 return error;
205 if (!acl) { 152 if (!acl) {
206 mode &= ~current_umask(); 153 mode &= ~current_umask();
207 if (mode != ip->i_inode.i_mode) 154 if (mode != inode->i_mode)
208 error = munge_mode(ip, mode); 155 error = gfs2_set_mode(inode, mode);
209 return error; 156 return error;
210 } 157 }
211 158
159 if (S_ISDIR(inode->i_mode)) {
160 error = gfs2_acl_set(inode, ACL_TYPE_DEFAULT, acl);
161 if (error)
162 goto out;
163 }
164
212 clone = posix_acl_clone(acl, GFP_NOFS); 165 clone = posix_acl_clone(acl, GFP_NOFS);
213 error = -ENOMEM; 166 error = -ENOMEM;
214 if (!clone) 167 if (!clone)
@@ -216,43 +169,32 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
216 posix_acl_release(acl); 169 posix_acl_release(acl);
217 acl = clone; 170 acl = clone;
218 171
219 if (S_ISDIR(ip->i_inode.i_mode)) {
220 error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
221 GFS2_POSIX_ACL_DEFAULT, data, len, 0);
222 if (error)
223 goto out;
224 }
225
226 error = posix_acl_create_masq(acl, &mode); 172 error = posix_acl_create_masq(acl, &mode);
227 if (error < 0) 173 if (error < 0)
228 goto out; 174 goto out;
229 if (error == 0) 175 if (error == 0)
230 goto munge; 176 goto munge;
231 177
232 posix_acl_to_xattr(acl, data, len); 178 error = gfs2_acl_set(inode, ACL_TYPE_ACCESS, acl);
233 error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
234 GFS2_POSIX_ACL_ACCESS, data, len, 0);
235 if (error) 179 if (error)
236 goto out; 180 goto out;
237munge: 181munge:
238 error = munge_mode(ip, mode); 182 error = gfs2_set_mode(inode, mode);
239out: 183out:
240 posix_acl_release(acl); 184 posix_acl_release(acl);
241 kfree(data);
242 return error; 185 return error;
243} 186}
244 187
245int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) 188int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
246{ 189{
247 struct posix_acl *acl = NULL, *clone; 190 struct posix_acl *acl, *clone;
248 struct gfs2_ea_location el;
249 char *data; 191 char *data;
250 unsigned int len; 192 unsigned int len;
251 int error; 193 int error;
252 194
253 error = acl_get(ip, GFS2_POSIX_ACL_ACCESS, &acl, &el, &data, &len); 195 acl = gfs2_acl_get(ip, ACL_TYPE_ACCESS);
254 if (error) 196 if (IS_ERR(acl))
255 goto out_brelse; 197 return PTR_ERR(acl);
256 if (!acl) 198 if (!acl)
257 return gfs2_setattr_simple(ip, attr); 199 return gfs2_setattr_simple(ip, attr);
258 200
@@ -265,15 +207,138 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
265 207
266 error = posix_acl_chmod_masq(acl, attr->ia_mode); 208 error = posix_acl_chmod_masq(acl, attr->ia_mode);
267 if (!error) { 209 if (!error) {
210 len = posix_acl_to_xattr(acl, NULL, 0);
211 data = kmalloc(len, GFP_NOFS);
212 error = -ENOMEM;
213 if (data == NULL)
214 goto out;
268 posix_acl_to_xattr(acl, data, len); 215 posix_acl_to_xattr(acl, data, len);
269 error = gfs2_ea_acl_chmod(ip, &el, attr, data); 216 error = gfs2_xattr_acl_chmod(ip, attr, data);
217 kfree(data);
218 set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl);
270 } 219 }
271 220
272out: 221out:
273 posix_acl_release(acl); 222 posix_acl_release(acl);
274 kfree(data);
275out_brelse:
276 brelse(el.el_bh);
277 return error; 223 return error;
278} 224}
279 225
226static int gfs2_acl_type(const char *name)
227{
228 if (strcmp(name, GFS2_POSIX_ACL_ACCESS) == 0)
229 return ACL_TYPE_ACCESS;
230 if (strcmp(name, GFS2_POSIX_ACL_DEFAULT) == 0)
231 return ACL_TYPE_DEFAULT;
232 return -EINVAL;
233}
234
235static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
236 void *buffer, size_t size, int xtype)
237{
238 struct inode *inode = dentry->d_inode;
239 struct posix_acl *acl;
240 int type;
241 int error;
242
243 type = gfs2_acl_type(name);
244 if (type < 0)
245 return type;
246
247 acl = gfs2_acl_get(GFS2_I(inode), type);
248 if (IS_ERR(acl))
249 return PTR_ERR(acl);
250 if (acl == NULL)
251 return -ENODATA;
252
253 error = posix_acl_to_xattr(acl, buffer, size);
254 posix_acl_release(acl);
255
256 return error;
257}
258
259static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
260 const void *value, size_t size, int flags,
261 int xtype)
262{
263 struct inode *inode = dentry->d_inode;
264 struct gfs2_sbd *sdp = GFS2_SB(inode);
265 struct posix_acl *acl = NULL;
266 int error = 0, type;
267
268 if (!sdp->sd_args.ar_posix_acl)
269 return -EOPNOTSUPP;
270
271 type = gfs2_acl_type(name);
272 if (type < 0)
273 return type;
274 if (flags & XATTR_CREATE)
275 return -EINVAL;
276 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
277 return value ? -EACCES : 0;
278 if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
279 return -EPERM;
280 if (S_ISLNK(inode->i_mode))
281 return -EOPNOTSUPP;
282
283 if (!value)
284 goto set_acl;
285
286 acl = posix_acl_from_xattr(value, size);
287 if (!acl) {
288 /*
289 * acl_set_file(3) may request that we set default ACLs with
290 * zero length -- defend (gracefully) against that here.
291 */
292 goto out;
293 }
294 if (IS_ERR(acl)) {
295 error = PTR_ERR(acl);
296 goto out;
297 }
298
299 error = posix_acl_valid(acl);
300 if (error)
301 goto out_release;
302
303 error = -EINVAL;
304 if (acl->a_count > GFS2_ACL_MAX_ENTRIES)
305 goto out_release;
306
307 if (type == ACL_TYPE_ACCESS) {
308 mode_t mode = inode->i_mode;
309 error = posix_acl_equiv_mode(acl, &mode);
310
311 if (error <= 0) {
312 posix_acl_release(acl);
313 acl = NULL;
314
315 if (error < 0)
316 return error;
317 }
318
319 error = gfs2_set_mode(inode, mode);
320 if (error)
321 goto out_release;
322 }
323
324set_acl:
325 error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS);
326 if (!error) {
327 if (acl)
328 set_cached_acl(inode, type, acl);
329 else
330 forget_cached_acl(inode, type);
331 }
332out_release:
333 posix_acl_release(acl);
334out:
335 return error;
336}
337
338struct xattr_handler gfs2_xattr_system_handler = {
339 .prefix = XATTR_SYSTEM_PREFIX,
340 .flags = GFS2_EATYPE_SYS,
341 .get = gfs2_xattr_system_get,
342 .set = gfs2_xattr_system_set,
343};
344
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 6751930bfb6..9306a2e6620 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -13,26 +13,12 @@
13#include "incore.h" 13#include "incore.h"
14 14
15#define GFS2_POSIX_ACL_ACCESS "posix_acl_access" 15#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
16#define GFS2_POSIX_ACL_ACCESS_LEN 16
17#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" 16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
18#define GFS2_POSIX_ACL_DEFAULT_LEN 17 17#define GFS2_ACL_MAX_ENTRIES 25
19 18
20#define GFS2_ACL_IS_ACCESS(name, len) \ 19extern int gfs2_check_acl(struct inode *inode, int mask);
21 ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \ 20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
22 !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len))) 21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
23 22extern struct xattr_handler gfs2_xattr_system_handler;
24#define GFS2_ACL_IS_DEFAULT(name, len) \
25 ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
26 !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
27
28struct gfs2_ea_request;
29
30int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
31 struct gfs2_ea_request *er,
32 int *remove, mode_t *mode);
33int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
34int gfs2_check_acl(struct inode *inode, int mask);
35int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
36int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
37 23
38#endif /* __ACL_DOT_H__ */ 24#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 694b5d48f03..7b8da941526 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -269,7 +269,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
269 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 269 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
270 unsigned offset = i_size & (PAGE_CACHE_SIZE-1); 270 unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
271 unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); 271 unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
272 struct backing_dev_info *bdi = mapping->backing_dev_info;
273 int i; 272 int i;
274 int ret; 273 int ret;
275 274
@@ -313,11 +312,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
313 312
314 if (ret || (--(wbc->nr_to_write) <= 0)) 313 if (ret || (--(wbc->nr_to_write) <= 0))
315 ret = 1; 314 ret = 1;
316 if (wbc->nonblocking && bdi_write_congested(bdi)) {
317 wbc->encountered_congestion = 1;
318 ret = 1;
319 }
320
321 } 315 }
322 gfs2_trans_end(sdp); 316 gfs2_trans_end(sdp);
323 return ret; 317 return ret;
@@ -338,7 +332,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
338static int gfs2_write_cache_jdata(struct address_space *mapping, 332static int gfs2_write_cache_jdata(struct address_space *mapping,
339 struct writeback_control *wbc) 333 struct writeback_control *wbc)
340{ 334{
341 struct backing_dev_info *bdi = mapping->backing_dev_info;
342 int ret = 0; 335 int ret = 0;
343 int done = 0; 336 int done = 0;
344 struct pagevec pvec; 337 struct pagevec pvec;
@@ -348,11 +341,6 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
348 int scanned = 0; 341 int scanned = 0;
349 int range_whole = 0; 342 int range_whole = 0;
350 343
351 if (wbc->nonblocking && bdi_write_congested(bdi)) {
352 wbc->encountered_congestion = 1;
353 return 0;
354 }
355
356 pagevec_init(&pvec, 0); 344 pagevec_init(&pvec, 0);
357 if (wbc->range_cyclic) { 345 if (wbc->range_cyclic) {
358 index = mapping->writeback_index; /* Start from prev offset */ 346 index = mapping->writeback_index; /* Start from prev offset */
@@ -819,8 +807,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
819 mark_inode_dirty(inode); 807 mark_inode_dirty(inode);
820 } 808 }
821 809
822 if (inode == sdp->sd_rindex) 810 if (inode == sdp->sd_rindex) {
823 adjust_fs_space(inode); 811 adjust_fs_space(inode);
812 ip->i_gh.gh_flags |= GL_NOCACHE;
813 }
824 814
825 brelse(dibh); 815 brelse(dibh);
826 gfs2_trans_end(sdp); 816 gfs2_trans_end(sdp);
@@ -889,8 +879,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
889 mark_inode_dirty(inode); 879 mark_inode_dirty(inode);
890 } 880 }
891 881
892 if (inode == sdp->sd_rindex) 882 if (inode == sdp->sd_rindex) {
893 adjust_fs_space(inode); 883 adjust_fs_space(inode);
884 ip->i_gh.gh_flags |= GL_NOCACHE;
885 }
894 886
895 brelse(dibh); 887 brelse(dibh);
896 gfs2_trans_end(sdp); 888 gfs2_trans_end(sdp);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 297d7e5ceba..25fddc100f1 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -525,38 +525,6 @@ consist_inode:
525 return ERR_PTR(-EIO); 525 return ERR_PTR(-EIO);
526} 526}
527 527
528
529/**
530 * dirent_first - Return the first dirent
531 * @dip: the directory
532 * @bh: The buffer
533 * @dent: Pointer to list of dirents
534 *
535 * return first dirent whether bh points to leaf or stuffed dinode
536 *
537 * Returns: IS_LEAF, IS_DINODE, or -errno
538 */
539
540static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
541 struct gfs2_dirent **dent)
542{
543 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
544
545 if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
546 if (gfs2_meta_check(GFS2_SB(&dip->i_inode), bh))
547 return -EIO;
548 *dent = (struct gfs2_dirent *)(bh->b_data +
549 sizeof(struct gfs2_leaf));
550 return IS_LEAF;
551 } else {
552 if (gfs2_metatype_check(GFS2_SB(&dip->i_inode), bh, GFS2_METATYPE_DI))
553 return -EIO;
554 *dent = (struct gfs2_dirent *)(bh->b_data +
555 sizeof(struct gfs2_dinode));
556 return IS_DINODE;
557 }
558}
559
560static int dirent_check_reclen(struct gfs2_inode *dip, 528static int dirent_check_reclen(struct gfs2_inode *dip,
561 const struct gfs2_dirent *d, const void *end_p) 529 const struct gfs2_dirent *d, const void *end_p)
562{ 530{
@@ -1006,7 +974,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1006 divider = (start + half_len) << (32 - dip->i_depth); 974 divider = (start + half_len) << (32 - dip->i_depth);
1007 975
1008 /* Copy the entries */ 976 /* Copy the entries */
1009 dirent_first(dip, obh, &dent); 977 dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf));
1010 978
1011 do { 979 do {
1012 next = dent; 980 next = dent;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4eb308aa323..a6abbae8a27 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -569,6 +569,40 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
569 return ret; 569 return ret;
570} 570}
571 571
572/**
573 * gfs2_file_aio_write - Perform a write to a file
574 * @iocb: The io context
575 * @iov: The data to write
576 * @nr_segs: Number of @iov segments
577 * @pos: The file position
578 *
579 * We have to do a lock/unlock here to refresh the inode size for
580 * O_APPEND writes, otherwise we can land up writing at the wrong
581 * offset. There is still a race, but provided the app is using its
582 * own file locking, this will make O_APPEND work as expected.
583 *
584 */
585
586static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
587 unsigned long nr_segs, loff_t pos)
588{
589 struct file *file = iocb->ki_filp;
590
591 if (file->f_flags & O_APPEND) {
592 struct dentry *dentry = file->f_dentry;
593 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
594 struct gfs2_holder gh;
595 int ret;
596
597 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
598 if (ret)
599 return ret;
600 gfs2_glock_dq_uninit(&gh);
601 }
602
603 return generic_file_aio_write(iocb, iov, nr_segs, pos);
604}
605
572#ifdef CONFIG_GFS2_FS_LOCKING_DLM 606#ifdef CONFIG_GFS2_FS_LOCKING_DLM
573 607
574/** 608/**
@@ -711,7 +745,7 @@ const struct file_operations gfs2_file_fops = {
711 .read = do_sync_read, 745 .read = do_sync_read,
712 .aio_read = generic_file_aio_read, 746 .aio_read = generic_file_aio_read,
713 .write = do_sync_write, 747 .write = do_sync_write,
714 .aio_write = generic_file_aio_write, 748 .aio_write = gfs2_file_aio_write,
715 .unlocked_ioctl = gfs2_ioctl, 749 .unlocked_ioctl = gfs2_ioctl,
716 .mmap = gfs2_mmap, 750 .mmap = gfs2_mmap,
717 .open = gfs2_open, 751 .open = gfs2_open,
@@ -741,7 +775,7 @@ const struct file_operations gfs2_file_fops_nolock = {
741 .read = do_sync_read, 775 .read = do_sync_read,
742 .aio_read = generic_file_aio_read, 776 .aio_read = generic_file_aio_read,
743 .write = do_sync_write, 777 .write = do_sync_write,
744 .aio_write = generic_file_aio_write, 778 .aio_write = gfs2_file_aio_write,
745 .unlocked_ioctl = gfs2_ioctl, 779 .unlocked_ioctl = gfs2_ioctl,
746 .mmap = gfs2_mmap, 780 .mmap = gfs2_mmap,
747 .open = gfs2_open, 781 .open = gfs2_open,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8b674b1f3a5..f455a03a09e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -241,15 +241,14 @@ int gfs2_glock_put(struct gfs2_glock *gl)
241 int rv = 0; 241 int rv = 0;
242 242
243 write_lock(gl_lock_addr(gl->gl_hash)); 243 write_lock(gl_lock_addr(gl->gl_hash));
244 if (atomic_dec_and_test(&gl->gl_ref)) { 244 if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
245 hlist_del(&gl->gl_list); 245 hlist_del(&gl->gl_list);
246 write_unlock(gl_lock_addr(gl->gl_hash));
247 spin_lock(&lru_lock);
248 if (!list_empty(&gl->gl_lru)) { 246 if (!list_empty(&gl->gl_lru)) {
249 list_del_init(&gl->gl_lru); 247 list_del_init(&gl->gl_lru);
250 atomic_dec(&lru_count); 248 atomic_dec(&lru_count);
251 } 249 }
252 spin_unlock(&lru_lock); 250 spin_unlock(&lru_lock);
251 write_unlock(gl_lock_addr(gl->gl_hash));
253 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 252 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
254 glock_free(gl); 253 glock_free(gl);
255 rv = 1; 254 rv = 1;
@@ -513,7 +512,6 @@ retry:
513 GLOCK_BUG_ON(gl, 1); 512 GLOCK_BUG_ON(gl, 1);
514 } 513 }
515 spin_unlock(&gl->gl_spin); 514 spin_unlock(&gl->gl_spin);
516 gfs2_glock_put(gl);
517 return; 515 return;
518 } 516 }
519 517
@@ -524,8 +522,6 @@ retry:
524 if (glops->go_xmote_bh) { 522 if (glops->go_xmote_bh) {
525 spin_unlock(&gl->gl_spin); 523 spin_unlock(&gl->gl_spin);
526 rv = glops->go_xmote_bh(gl, gh); 524 rv = glops->go_xmote_bh(gl, gh);
527 if (rv == -EAGAIN)
528 return;
529 spin_lock(&gl->gl_spin); 525 spin_lock(&gl->gl_spin);
530 if (rv) { 526 if (rv) {
531 do_error(gl, rv); 527 do_error(gl, rv);
@@ -540,7 +536,6 @@ out:
540 clear_bit(GLF_LOCK, &gl->gl_flags); 536 clear_bit(GLF_LOCK, &gl->gl_flags);
541out_locked: 537out_locked:
542 spin_unlock(&gl->gl_spin); 538 spin_unlock(&gl->gl_spin);
543 gfs2_glock_put(gl);
544} 539}
545 540
546static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, 541static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
@@ -600,7 +595,6 @@ __acquires(&gl->gl_spin)
600 595
601 if (!(ret & LM_OUT_ASYNC)) { 596 if (!(ret & LM_OUT_ASYNC)) {
602 finish_xmote(gl, ret); 597 finish_xmote(gl, ret);
603 gfs2_glock_hold(gl);
604 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 598 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
605 gfs2_glock_put(gl); 599 gfs2_glock_put(gl);
606 } else { 600 } else {
@@ -672,12 +666,17 @@ out:
672 return; 666 return;
673 667
674out_sched: 668out_sched:
669 clear_bit(GLF_LOCK, &gl->gl_flags);
670 smp_mb__after_clear_bit();
675 gfs2_glock_hold(gl); 671 gfs2_glock_hold(gl);
676 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 672 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
677 gfs2_glock_put_nolock(gl); 673 gfs2_glock_put_nolock(gl);
674 return;
675
678out_unlock: 676out_unlock:
679 clear_bit(GLF_LOCK, &gl->gl_flags); 677 clear_bit(GLF_LOCK, &gl->gl_flags);
680 goto out; 678 smp_mb__after_clear_bit();
679 return;
681} 680}
682 681
683static void delete_work_func(struct work_struct *work) 682static void delete_work_func(struct work_struct *work)
@@ -707,9 +706,12 @@ static void glock_work_func(struct work_struct *work)
707{ 706{
708 unsigned long delay = 0; 707 unsigned long delay = 0;
709 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 708 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
709 int drop_ref = 0;
710 710
711 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) 711 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
712 finish_xmote(gl, gl->gl_reply); 712 finish_xmote(gl, gl->gl_reply);
713 drop_ref = 1;
714 }
713 down_read(&gfs2_umount_flush_sem); 715 down_read(&gfs2_umount_flush_sem);
714 spin_lock(&gl->gl_spin); 716 spin_lock(&gl->gl_spin);
715 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 717 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
@@ -727,6 +729,8 @@ static void glock_work_func(struct work_struct *work)
727 if (!delay || 729 if (!delay ||
728 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) 730 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
729 gfs2_glock_put(gl); 731 gfs2_glock_put(gl);
732 if (drop_ref)
733 gfs2_glock_put(gl);
730} 734}
731 735
732/** 736/**
@@ -1361,10 +1365,6 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
1361 list_del_init(&gl->gl_lru); 1365 list_del_init(&gl->gl_lru);
1362 atomic_dec(&lru_count); 1366 atomic_dec(&lru_count);
1363 1367
1364 /* Check if glock is about to be freed */
1365 if (atomic_read(&gl->gl_ref) == 0)
1366 continue;
1367
1368 /* Test for being demotable */ 1368 /* Test for being demotable */
1369 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1369 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1370 gfs2_glock_hold(gl); 1370 gfs2_glock_hold(gl);
@@ -1375,10 +1375,11 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
1375 handle_callback(gl, LM_ST_UNLOCKED, 0); 1375 handle_callback(gl, LM_ST_UNLOCKED, 0);
1376 nr--; 1376 nr--;
1377 } 1377 }
1378 clear_bit(GLF_LOCK, &gl->gl_flags);
1379 smp_mb__after_clear_bit();
1378 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 1380 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1379 gfs2_glock_put_nolock(gl); 1381 gfs2_glock_put_nolock(gl);
1380 spin_unlock(&gl->gl_spin); 1382 spin_unlock(&gl->gl_spin);
1381 clear_bit(GLF_LOCK, &gl->gl_flags);
1382 spin_lock(&lru_lock); 1383 spin_lock(&lru_lock);
1383 continue; 1384 continue;
1384 } 1385 }
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index c609894ec0d..13f0bd22813 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -180,15 +180,6 @@ static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
180 return gl->gl_state == LM_ST_SHARED; 180 return gl->gl_state == LM_ST_SHARED;
181} 181}
182 182
183static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
184{
185 int ret;
186 spin_lock(&gl->gl_spin);
187 ret = test_bit(GLF_DEMOTE, &gl->gl_flags);
188 spin_unlock(&gl->gl_spin);
189 return ret;
190}
191
192int gfs2_glock_get(struct gfs2_sbd *sdp, 183int gfs2_glock_get(struct gfs2_sbd *sdp,
193 u64 number, const struct gfs2_glock_operations *glops, 184 u64 number, const struct gfs2_glock_operations *glops,
194 int create, struct gfs2_glock **glp); 185 int create, struct gfs2_glock **glp);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 6985eef06c3..78554acc060 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -13,6 +13,7 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/bio.h> 15#include <linux/bio.h>
16#include <linux/posix_acl.h>
16 17
17#include "gfs2.h" 18#include "gfs2.h"
18#include "incore.h" 19#include "incore.h"
@@ -184,8 +185,10 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
184 if (flags & DIO_METADATA) { 185 if (flags & DIO_METADATA) {
185 struct address_space *mapping = gl->gl_aspace->i_mapping; 186 struct address_space *mapping = gl->gl_aspace->i_mapping;
186 truncate_inode_pages(mapping, 0); 187 truncate_inode_pages(mapping, 0);
187 if (ip) 188 if (ip) {
188 set_bit(GIF_INVALID, &ip->i_flags); 189 set_bit(GIF_INVALID, &ip->i_flags);
190 forget_all_cached_acls(&ip->i_inode);
191 }
189 } 192 }
190 193
191 if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) 194 if (ip == GFS2_I(gl->gl_sbd->sd_rindex))
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6edb423f90b..4792200978c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -429,7 +429,11 @@ struct gfs2_args {
429 unsigned int ar_meta:1; /* mount metafs */ 429 unsigned int ar_meta:1; /* mount metafs */
430 unsigned int ar_discard:1; /* discard requests */ 430 unsigned int ar_discard:1; /* discard requests */
431 unsigned int ar_errors:2; /* errors=withdraw | panic */ 431 unsigned int ar_errors:2; /* errors=withdraw | panic */
432 unsigned int ar_nobarrier:1; /* do not send barriers */
432 int ar_commit; /* Commit interval */ 433 int ar_commit; /* Commit interval */
434 int ar_statfs_quantum; /* The fast statfs interval */
435 int ar_quota_quantum; /* The quota interval */
436 int ar_statfs_percent; /* The % change to force sync */
433}; 437};
434 438
435struct gfs2_tune { 439struct gfs2_tune {
@@ -558,6 +562,7 @@ struct gfs2_sbd {
558 spinlock_t sd_statfs_spin; 562 spinlock_t sd_statfs_spin;
559 struct gfs2_statfs_change_host sd_statfs_master; 563 struct gfs2_statfs_change_host sd_statfs_master;
560 struct gfs2_statfs_change_host sd_statfs_local; 564 struct gfs2_statfs_change_host sd_statfs_local;
565 int sd_statfs_force_sync;
561 566
562 /* Resource group stuff */ 567 /* Resource group stuff */
563 568
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index fb15d3b1f40..6e220f4eee7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code 125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code
126 * segment inside gfs2_inode_lookup code needs to get moved around. 126 * segment inside gfs2_inode_lookup code needs to get moved around.
127 * 127 *
128 * Clean up I_LOCK and I_NEW as well. 128 * Clears I_NEW as well.
129 **/ 129 **/
130 130
131void gfs2_set_iop(struct inode *inode) 131void gfs2_set_iop(struct inode *inode)
@@ -801,7 +801,8 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
801 return err; 801 return err;
802 } 802 }
803 803
804 err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0); 804 err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
805 GFS2_EATYPE_SECURITY);
805 kfree(value); 806 kfree(value);
806 kfree(name); 807 kfree(name);
807 808
@@ -871,7 +872,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
871 if (error) 872 if (error)
872 goto fail_gunlock2; 873 goto fail_gunlock2;
873 874
874 error = gfs2_acl_create(dip, GFS2_I(inode)); 875 error = gfs2_acl_create(dip, inode);
875 if (error) 876 if (error)
876 goto fail_gunlock2; 877 goto fail_gunlock2;
877 878
@@ -947,9 +948,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
947 948
948 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 949 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
949 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 950 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
950 str->di_header.__pad0 = 0;
951 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 951 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
952 str->di_header.__pad1 = 0;
953 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 952 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
954 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 953 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
955 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 954 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 13c6237c5f6..4511b08fc45 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -596,7 +596,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
596 memset(lh, 0, sizeof(struct gfs2_log_header)); 596 memset(lh, 0, sizeof(struct gfs2_log_header));
597 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 597 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
598 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); 598 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
599 lh->lh_header.__pad0 = cpu_to_be64(0);
599 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); 600 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
601 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
600 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); 602 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
601 lh->lh_flags = cpu_to_be32(flags); 603 lh->lh_flags = cpu_to_be32(flags);
602 lh->lh_tail = cpu_to_be32(tail); 604 lh->lh_tail = cpu_to_be32(tail);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9969ff062c5..de97632ba32 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -132,6 +132,7 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
132static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 132static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
133{ 133{
134 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 134 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
135 struct gfs2_meta_header *mh;
135 struct gfs2_trans *tr; 136 struct gfs2_trans *tr;
136 137
137 lock_buffer(bd->bd_bh); 138 lock_buffer(bd->bd_bh);
@@ -148,6 +149,9 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
148 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 149 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
149 gfs2_meta_check(sdp, bd->bd_bh); 150 gfs2_meta_check(sdp, bd->bd_bh);
150 gfs2_pin(sdp, bd->bd_bh); 151 gfs2_pin(sdp, bd->bd_bh);
152 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
153 mh->__pad0 = cpu_to_be64(0);
154 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
151 sdp->sd_log_num_buf++; 155 sdp->sd_log_num_buf++;
152 list_add(&le->le_list, &sdp->sd_log_le_buf); 156 list_add(&le->le_list, &sdp->sd_log_le_buf);
153 tr->tr_num_buf_new++; 157 tr->tr_num_buf_new++;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index cb8d7a93d5e..6f68a5f18eb 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -121,7 +121,7 @@ struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
121 if (aspace) { 121 if (aspace) {
122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); 122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
123 aspace->i_mapping->a_ops = &aspace_aops; 123 aspace->i_mapping->a_ops = &aspace_aops;
124 aspace->i_size = ~0ULL; 124 aspace->i_size = MAX_LFS_FILESIZE;
125 ip = GFS2_I(aspace); 125 ip = GFS2_I(aspace);
126 clear_bit(GIF_USER, &ip->i_flags); 126 clear_bit(GIF_USER, &ip->i_flags);
127 insert_inode_hash(aspace); 127 insert_inode_hash(aspace);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 52fb6c04898..edfee24f363 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -18,6 +18,7 @@
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
20#include <linux/slow-work.h> 20#include <linux/slow-work.h>
21#include <linux/quotaops.h>
21 22
22#include "gfs2.h" 23#include "gfs2.h"
23#include "incore.h" 24#include "incore.h"
@@ -62,13 +63,10 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
62 gt->gt_quota_warn_period = 10; 63 gt->gt_quota_warn_period = 10;
63 gt->gt_quota_scale_num = 1; 64 gt->gt_quota_scale_num = 1;
64 gt->gt_quota_scale_den = 1; 65 gt->gt_quota_scale_den = 1;
65 gt->gt_quota_quantum = 60;
66 gt->gt_new_files_jdata = 0; 66 gt->gt_new_files_jdata = 0;
67 gt->gt_max_readahead = 1 << 18; 67 gt->gt_max_readahead = 1 << 18;
68 gt->gt_stall_secs = 600; 68 gt->gt_stall_secs = 600;
69 gt->gt_complain_secs = 10; 69 gt->gt_complain_secs = 10;
70 gt->gt_statfs_quantum = 30;
71 gt->gt_statfs_slow = 0;
72} 70}
73 71
74static struct gfs2_sbd *init_sbd(struct super_block *sb) 72static struct gfs2_sbd *init_sbd(struct super_block *sb)
@@ -1114,7 +1112,7 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp)
1114 * Returns: errno 1112 * Returns: errno
1115 */ 1113 */
1116 1114
1117static int fill_super(struct super_block *sb, void *data, int silent) 1115static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent)
1118{ 1116{
1119 struct gfs2_sbd *sdp; 1117 struct gfs2_sbd *sdp;
1120 struct gfs2_holder mount_gh; 1118 struct gfs2_holder mount_gh;
@@ -1125,17 +1123,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1125 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); 1123 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n");
1126 return -ENOMEM; 1124 return -ENOMEM;
1127 } 1125 }
1128 1126 sdp->sd_args = *args;
1129 sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
1130 sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
1131 sdp->sd_args.ar_commit = 60;
1132 sdp->sd_args.ar_errors = GFS2_ERRORS_DEFAULT;
1133
1134 error = gfs2_mount_args(sdp, &sdp->sd_args, data);
1135 if (error) {
1136 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
1137 goto fail;
1138 }
1139 1127
1140 if (sdp->sd_args.ar_spectator) { 1128 if (sdp->sd_args.ar_spectator) {
1141 sb->s_flags |= MS_RDONLY; 1129 sb->s_flags |= MS_RDONLY;
@@ -1143,11 +1131,15 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1143 } 1131 }
1144 if (sdp->sd_args.ar_posix_acl) 1132 if (sdp->sd_args.ar_posix_acl)
1145 sb->s_flags |= MS_POSIXACL; 1133 sb->s_flags |= MS_POSIXACL;
1134 if (sdp->sd_args.ar_nobarrier)
1135 set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
1146 1136
1147 sb->s_magic = GFS2_MAGIC; 1137 sb->s_magic = GFS2_MAGIC;
1148 sb->s_op = &gfs2_super_ops; 1138 sb->s_op = &gfs2_super_ops;
1149 sb->s_export_op = &gfs2_export_ops; 1139 sb->s_export_op = &gfs2_export_ops;
1150 sb->s_xattr = gfs2_xattr_handlers; 1140 sb->s_xattr = gfs2_xattr_handlers;
1141 sb->s_qcop = &gfs2_quotactl_ops;
1142 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
1151 sb->s_time_gran = 1; 1143 sb->s_time_gran = 1;
1152 sb->s_maxbytes = MAX_LFS_FILESIZE; 1144 sb->s_maxbytes = MAX_LFS_FILESIZE;
1153 1145
@@ -1160,6 +1152,15 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1160 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; 1152 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
1161 1153
1162 sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; 1154 sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit;
1155 sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum;
1156 if (sdp->sd_args.ar_statfs_quantum) {
1157 sdp->sd_tune.gt_statfs_slow = 0;
1158 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
1159 }
1160 else {
1161 sdp->sd_tune.gt_statfs_slow = 1;
1162 sdp->sd_tune.gt_statfs_quantum = 30;
1163 }
1163 1164
1164 error = init_names(sdp, silent); 1165 error = init_names(sdp, silent);
1165 if (error) 1166 if (error)
@@ -1243,18 +1244,127 @@ fail:
1243 return error; 1244 return error;
1244} 1245}
1245 1246
1246static int gfs2_get_sb(struct file_system_type *fs_type, int flags, 1247static int set_gfs2_super(struct super_block *s, void *data)
1247 const char *dev_name, void *data, struct vfsmount *mnt)
1248{ 1248{
1249 return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); 1249 s->s_bdev = data;
1250 s->s_dev = s->s_bdev->bd_dev;
1251
1252 /*
1253 * We set the bdi here to the queue backing, file systems can
1254 * overwrite this in ->fill_super()
1255 */
1256 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
1257 return 0;
1250} 1258}
1251 1259
1252static int test_meta_super(struct super_block *s, void *ptr) 1260static int test_gfs2_super(struct super_block *s, void *ptr)
1253{ 1261{
1254 struct block_device *bdev = ptr; 1262 struct block_device *bdev = ptr;
1255 return (bdev == s->s_bdev); 1263 return (bdev == s->s_bdev);
1256} 1264}
1257 1265
1266/**
1267 * gfs2_get_sb - Get the GFS2 superblock
1268 * @fs_type: The GFS2 filesystem type
1269 * @flags: Mount flags
1270 * @dev_name: The name of the device
1271 * @data: The mount arguments
1272 * @mnt: The vfsmnt for this mount
1273 *
1274 * Q. Why not use get_sb_bdev() ?
1275 * A. We need to select one of two root directories to mount, independent
1276 * of whether this is the initial, or subsequent, mount of this sb
1277 *
1278 * Returns: 0 or -ve on error
1279 */
1280
1281static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1282 const char *dev_name, void *data, struct vfsmount *mnt)
1283{
1284 struct block_device *bdev;
1285 struct super_block *s;
1286 fmode_t mode = FMODE_READ;
1287 int error;
1288 struct gfs2_args args;
1289 struct gfs2_sbd *sdp;
1290
1291 if (!(flags & MS_RDONLY))
1292 mode |= FMODE_WRITE;
1293
1294 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1295 if (IS_ERR(bdev))
1296 return PTR_ERR(bdev);
1297
1298 /*
1299 * once the super is inserted into the list by sget, s_umount
1300 * will protect the lockfs code from trying to start a snapshot
1301 * while we are mounting
1302 */
1303 mutex_lock(&bdev->bd_fsfreeze_mutex);
1304 if (bdev->bd_fsfreeze_count > 0) {
1305 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1306 error = -EBUSY;
1307 goto error_bdev;
1308 }
1309 s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev);
1310 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1311 error = PTR_ERR(s);
1312 if (IS_ERR(s))
1313 goto error_bdev;
1314
1315 memset(&args, 0, sizeof(args));
1316 args.ar_quota = GFS2_QUOTA_DEFAULT;
1317 args.ar_data = GFS2_DATA_DEFAULT;
1318 args.ar_commit = 60;
1319 args.ar_statfs_quantum = 30;
1320 args.ar_quota_quantum = 60;
1321 args.ar_errors = GFS2_ERRORS_DEFAULT;
1322
1323 error = gfs2_mount_args(&args, data);
1324 if (error) {
1325 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
1326 if (s->s_root)
1327 goto error_super;
1328 deactivate_locked_super(s);
1329 return error;
1330 }
1331
1332 if (s->s_root) {
1333 error = -EBUSY;
1334 if ((flags ^ s->s_flags) & MS_RDONLY)
1335 goto error_super;
1336 close_bdev_exclusive(bdev, mode);
1337 } else {
1338 char b[BDEVNAME_SIZE];
1339
1340 s->s_flags = flags;
1341 s->s_mode = mode;
1342 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1343 sb_set_blocksize(s, block_size(bdev));
1344 error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
1345 if (error) {
1346 deactivate_locked_super(s);
1347 return error;
1348 }
1349 s->s_flags |= MS_ACTIVE;
1350 bdev->bd_super = s;
1351 }
1352
1353 sdp = s->s_fs_info;
1354 mnt->mnt_sb = s;
1355 if (args.ar_meta)
1356 mnt->mnt_root = dget(sdp->sd_master_dir);
1357 else
1358 mnt->mnt_root = dget(sdp->sd_root_dir);
1359 return 0;
1360
1361error_super:
1362 deactivate_locked_super(s);
1363error_bdev:
1364 close_bdev_exclusive(bdev, mode);
1365 return error;
1366}
1367
1258static int set_meta_super(struct super_block *s, void *ptr) 1368static int set_meta_super(struct super_block *s, void *ptr)
1259{ 1369{
1260 return -EINVAL; 1370 return -EINVAL;
@@ -1274,13 +1384,17 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
1274 dev_name, error); 1384 dev_name, error);
1275 return error; 1385 return error;
1276 } 1386 }
1277 s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, 1387 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
1278 path.dentry->d_inode->i_sb->s_bdev); 1388 path.dentry->d_inode->i_sb->s_bdev);
1279 path_put(&path); 1389 path_put(&path);
1280 if (IS_ERR(s)) { 1390 if (IS_ERR(s)) {
1281 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); 1391 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
1282 return PTR_ERR(s); 1392 return PTR_ERR(s);
1283 } 1393 }
1394 if ((flags ^ s->s_flags) & MS_RDONLY) {
1395 deactivate_locked_super(s);
1396 return -EBUSY;
1397 }
1284 sdp = s->s_fs_info; 1398 sdp = s->s_fs_info;
1285 mnt->mnt_sb = s; 1399 mnt->mnt_sb = s;
1286 mnt->mnt_root = dget(sdp->sd_master_dir); 1400 mnt->mnt_root = dget(sdp->sd_master_dir);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 247436c10de..78f73ca1ef3 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -748,7 +748,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
748 struct gfs2_rgrpd *nrgd; 748 struct gfs2_rgrpd *nrgd;
749 unsigned int num_gh; 749 unsigned int num_gh;
750 int dir_rename = 0; 750 int dir_rename = 0;
751 int alloc_required; 751 int alloc_required = 0;
752 unsigned int x; 752 unsigned int x;
753 int error; 753 int error;
754 754
@@ -867,7 +867,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
867 goto out_gunlock; 867 goto out_gunlock;
868 } 868 }
869 869
870 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); 870 if (nip == NULL)
871 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
872 error = alloc_required;
871 if (error < 0) 873 if (error < 0)
872 goto out_gunlock; 874 goto out_gunlock;
873 error = 0; 875 error = 0;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 2e9b9326bfc..e3bf6eab875 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -15,7 +15,7 @@
15 * fuzziness in the current usage value of IDs that are being used on different 15 * fuzziness in the current usage value of IDs that are being used on different
16 * nodes in the cluster simultaneously. So, it is possible for a user on 16 * nodes in the cluster simultaneously. So, it is possible for a user on
17 * multiple nodes to overrun their quota, but that overrun is controlable. 17 * multiple nodes to overrun their quota, but that overrun is controlable.
18 * Since quota tags are part of transactions, there is no need to a quota check 18 * Since quota tags are part of transactions, there is no need for a quota check
19 * program to be run on node crashes or anything like that. 19 * program to be run on node crashes or anything like that.
20 * 20 *
21 * There are couple of knobs that let the administrator manage the quota 21 * There are couple of knobs that let the administrator manage the quota
@@ -47,6 +47,8 @@
47#include <linux/gfs2_ondisk.h> 47#include <linux/gfs2_ondisk.h>
48#include <linux/kthread.h> 48#include <linux/kthread.h>
49#include <linux/freezer.h> 49#include <linux/freezer.h>
50#include <linux/quota.h>
51#include <linux/dqblk_xfs.h>
50 52
51#include "gfs2.h" 53#include "gfs2.h"
52#include "incore.h" 54#include "incore.h"
@@ -65,13 +67,6 @@
65#define QUOTA_USER 1 67#define QUOTA_USER 1
66#define QUOTA_GROUP 0 68#define QUOTA_GROUP 0
67 69
68struct gfs2_quota_host {
69 u64 qu_limit;
70 u64 qu_warn;
71 s64 qu_value;
72 u32 qu_ll_next;
73};
74
75struct gfs2_quota_change_host { 70struct gfs2_quota_change_host {
76 u64 qc_change; 71 u64 qc_change;
77 u32 qc_flags; /* GFS2_QCF_... */ 72 u32 qc_flags; /* GFS2_QCF_... */
@@ -164,7 +159,7 @@ fail:
164 return error; 159 return error;
165} 160}
166 161
167static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, 162static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
168 struct gfs2_quota_data **qdp) 163 struct gfs2_quota_data **qdp)
169{ 164{
170 struct gfs2_quota_data *qd = NULL, *new_qd = NULL; 165 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
@@ -202,7 +197,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
202 197
203 spin_unlock(&qd_lru_lock); 198 spin_unlock(&qd_lru_lock);
204 199
205 if (qd || !create) { 200 if (qd) {
206 if (new_qd) { 201 if (new_qd) {
207 gfs2_glock_put(new_qd->qd_gl); 202 gfs2_glock_put(new_qd->qd_gl);
208 kmem_cache_free(gfs2_quotad_cachep, new_qd); 203 kmem_cache_free(gfs2_quotad_cachep, new_qd);
@@ -461,12 +456,12 @@ static void qd_unlock(struct gfs2_quota_data *qd)
461 qd_put(qd); 456 qd_put(qd);
462} 457}
463 458
464static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, int create, 459static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id,
465 struct gfs2_quota_data **qdp) 460 struct gfs2_quota_data **qdp)
466{ 461{
467 int error; 462 int error;
468 463
469 error = qd_get(sdp, user, id, create, qdp); 464 error = qd_get(sdp, user, id, qdp);
470 if (error) 465 if (error)
471 return error; 466 return error;
472 467
@@ -508,20 +503,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
508 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 503 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
509 return 0; 504 return 0;
510 505
511 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd); 506 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
512 if (error) 507 if (error)
513 goto out; 508 goto out;
514 al->al_qd_num++; 509 al->al_qd_num++;
515 qd++; 510 qd++;
516 511
517 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd); 512 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
518 if (error) 513 if (error)
519 goto out; 514 goto out;
520 al->al_qd_num++; 515 al->al_qd_num++;
521 qd++; 516 qd++;
522 517
523 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { 518 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
524 error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd); 519 error = qdsb_get(sdp, QUOTA_USER, uid, qd);
525 if (error) 520 if (error)
526 goto out; 521 goto out;
527 al->al_qd_num++; 522 al->al_qd_num++;
@@ -529,7 +524,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
529 } 524 }
530 525
531 if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { 526 if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) {
532 error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd); 527 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
533 if (error) 528 if (error)
534 goto out; 529 goto out;
535 al->al_qd_num++; 530 al->al_qd_num++;
@@ -617,48 +612,36 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
617 mutex_unlock(&sdp->sd_quota_mutex); 612 mutex_unlock(&sdp->sd_quota_mutex);
618} 613}
619 614
620static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
621{
622 const struct gfs2_quota *str = buf;
623
624 qu->qu_limit = be64_to_cpu(str->qu_limit);
625 qu->qu_warn = be64_to_cpu(str->qu_warn);
626 qu->qu_value = be64_to_cpu(str->qu_value);
627 qu->qu_ll_next = be32_to_cpu(str->qu_ll_next);
628}
629
630static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
631{
632 struct gfs2_quota *str = buf;
633
634 str->qu_limit = cpu_to_be64(qu->qu_limit);
635 str->qu_warn = cpu_to_be64(qu->qu_warn);
636 str->qu_value = cpu_to_be64(qu->qu_value);
637 str->qu_ll_next = cpu_to_be32(qu->qu_ll_next);
638 memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
639}
640
641/** 615/**
642 * gfs2_adjust_quota 616 * gfs2_adjust_quota - adjust record of current block usage
617 * @ip: The quota inode
618 * @loc: Offset of the entry in the quota file
619 * @change: The amount of usage change to record
620 * @qd: The quota data
621 * @fdq: The updated limits to record
643 * 622 *
644 * This function was mostly borrowed from gfs2_block_truncate_page which was 623 * This function was mostly borrowed from gfs2_block_truncate_page which was
645 * in turn mostly borrowed from ext3 624 * in turn mostly borrowed from ext3
625 *
626 * Returns: 0 or -ve on error
646 */ 627 */
628
647static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, 629static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
648 s64 change, struct gfs2_quota_data *qd) 630 s64 change, struct gfs2_quota_data *qd,
631 struct fs_disk_quota *fdq)
649{ 632{
650 struct inode *inode = &ip->i_inode; 633 struct inode *inode = &ip->i_inode;
651 struct address_space *mapping = inode->i_mapping; 634 struct address_space *mapping = inode->i_mapping;
652 unsigned long index = loc >> PAGE_CACHE_SHIFT; 635 unsigned long index = loc >> PAGE_CACHE_SHIFT;
653 unsigned offset = loc & (PAGE_CACHE_SIZE - 1); 636 unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
654 unsigned blocksize, iblock, pos; 637 unsigned blocksize, iblock, pos;
655 struct buffer_head *bh; 638 struct buffer_head *bh, *dibh;
656 struct page *page; 639 struct page *page;
657 void *kaddr; 640 void *kaddr;
658 char *ptr; 641 struct gfs2_quota *qp;
659 struct gfs2_quota_host qp;
660 s64 value; 642 s64 value;
661 int err = -EIO; 643 int err = -EIO;
644 u64 size;
662 645
663 if (gfs2_is_stuffed(ip)) 646 if (gfs2_is_stuffed(ip))
664 gfs2_unstuff_dinode(ip, NULL); 647 gfs2_unstuff_dinode(ip, NULL);
@@ -700,18 +683,38 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
700 gfs2_trans_add_bh(ip->i_gl, bh, 0); 683 gfs2_trans_add_bh(ip->i_gl, bh, 0);
701 684
702 kaddr = kmap_atomic(page, KM_USER0); 685 kaddr = kmap_atomic(page, KM_USER0);
703 ptr = kaddr + offset; 686 qp = kaddr + offset;
704 gfs2_quota_in(&qp, ptr); 687 value = (s64)be64_to_cpu(qp->qu_value) + change;
705 qp.qu_value += change; 688 qp->qu_value = cpu_to_be64(value);
706 value = qp.qu_value; 689 qd->qd_qb.qb_value = qp->qu_value;
707 gfs2_quota_out(&qp, ptr); 690 if (fdq) {
691 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
692 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
693 qd->qd_qb.qb_warn = qp->qu_warn;
694 }
695 if (fdq->d_fieldmask & FS_DQ_BHARD) {
696 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
697 qd->qd_qb.qb_limit = qp->qu_limit;
698 }
699 }
708 flush_dcache_page(page); 700 flush_dcache_page(page);
709 kunmap_atomic(kaddr, KM_USER0); 701 kunmap_atomic(kaddr, KM_USER0);
710 err = 0; 702
711 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); 703 err = gfs2_meta_inode_buffer(ip, &dibh);
712 qd->qd_qb.qb_value = cpu_to_be64(value); 704 if (err)
713 ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC); 705 goto unlock;
714 ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value); 706
707 size = loc + sizeof(struct gfs2_quota);
708 if (size > inode->i_size) {
709 ip->i_disksize = size;
710 i_size_write(inode, size);
711 }
712 inode->i_mtime = inode->i_atime = CURRENT_TIME;
713 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
714 gfs2_dinode_out(ip, dibh->b_data);
715 brelse(dibh);
716 mark_inode_dirty(inode);
717
715unlock: 718unlock:
716 unlock_page(page); 719 unlock_page(page);
717 page_cache_release(page); 720 page_cache_release(page);
@@ -739,9 +742,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
739 return -ENOMEM; 742 return -ENOMEM;
740 743
741 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); 744 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
745 mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA);
742 for (qx = 0; qx < num_qd; qx++) { 746 for (qx = 0; qx < num_qd; qx++) {
743 error = gfs2_glock_nq_init(qda[qx]->qd_gl, 747 error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
744 LM_ST_EXCLUSIVE,
745 GL_NOCACHE, &ghs[qx]); 748 GL_NOCACHE, &ghs[qx]);
746 if (error) 749 if (error)
747 goto out; 750 goto out;
@@ -795,9 +798,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
795 for (x = 0; x < num_qd; x++) { 798 for (x = 0; x < num_qd; x++) {
796 qd = qda[x]; 799 qd = qda[x];
797 offset = qd2offset(qd); 800 offset = qd2offset(qd);
798 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, 801 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL);
799 (struct gfs2_quota_data *)
800 qd);
801 if (error) 802 if (error)
802 goto out_end_trans; 803 goto out_end_trans;
803 804
@@ -817,21 +818,44 @@ out_gunlock:
817out: 818out:
818 while (qx--) 819 while (qx--)
819 gfs2_glock_dq_uninit(&ghs[qx]); 820 gfs2_glock_dq_uninit(&ghs[qx]);
821 mutex_unlock(&ip->i_inode.i_mutex);
820 kfree(ghs); 822 kfree(ghs);
821 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); 823 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
822 return error; 824 return error;
823} 825}
824 826
827static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
828{
829 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
830 struct gfs2_quota q;
831 struct gfs2_quota_lvb *qlvb;
832 loff_t pos;
833 int error;
834
835 memset(&q, 0, sizeof(struct gfs2_quota));
836 pos = qd2offset(qd);
837 error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q));
838 if (error < 0)
839 return error;
840
841 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
842 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
843 qlvb->__pad = 0;
844 qlvb->qb_limit = q.qu_limit;
845 qlvb->qb_warn = q.qu_warn;
846 qlvb->qb_value = q.qu_value;
847 qd->qd_qb = *qlvb;
848
849 return 0;
850}
851
825static int do_glock(struct gfs2_quota_data *qd, int force_refresh, 852static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
826 struct gfs2_holder *q_gh) 853 struct gfs2_holder *q_gh)
827{ 854{
828 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 855 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
829 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); 856 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
830 struct gfs2_holder i_gh; 857 struct gfs2_holder i_gh;
831 struct gfs2_quota_host q;
832 char buf[sizeof(struct gfs2_quota)];
833 int error; 858 int error;
834 struct gfs2_quota_lvb *qlvb;
835 859
836restart: 860restart:
837 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); 861 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
@@ -841,11 +865,9 @@ restart:
841 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 865 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
842 866
843 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 867 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
844 loff_t pos;
845 gfs2_glock_dq_uninit(q_gh); 868 gfs2_glock_dq_uninit(q_gh);
846 error = gfs2_glock_nq_init(qd->qd_gl, 869 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE,
847 LM_ST_EXCLUSIVE, GL_NOCACHE, 870 GL_NOCACHE, q_gh);
848 q_gh);
849 if (error) 871 if (error)
850 return error; 872 return error;
851 873
@@ -853,29 +875,14 @@ restart:
853 if (error) 875 if (error)
854 goto fail; 876 goto fail;
855 877
856 memset(buf, 0, sizeof(struct gfs2_quota)); 878 error = update_qd(sdp, qd);
857 pos = qd2offset(qd); 879 if (error)
858 error = gfs2_internal_read(ip, NULL, buf, &pos,
859 sizeof(struct gfs2_quota));
860 if (error < 0)
861 goto fail_gunlock; 880 goto fail_gunlock;
862 881
863 gfs2_glock_dq_uninit(&i_gh); 882 gfs2_glock_dq_uninit(&i_gh);
864 883 gfs2_glock_dq_uninit(q_gh);
865 gfs2_quota_in(&q, buf); 884 force_refresh = 0;
866 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 885 goto restart;
867 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
868 qlvb->__pad = 0;
869 qlvb->qb_limit = cpu_to_be64(q.qu_limit);
870 qlvb->qb_warn = cpu_to_be64(q.qu_warn);
871 qlvb->qb_value = cpu_to_be64(q.qu_value);
872 qd->qd_qb = *qlvb;
873
874 if (gfs2_glock_is_blocking(qd->qd_gl)) {
875 gfs2_glock_dq_uninit(q_gh);
876 force_refresh = 0;
877 goto restart;
878 }
879 } 886 }
880 887
881 return 0; 888 return 0;
@@ -995,7 +1002,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
995{ 1002{
996 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 1003 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
997 1004
998 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\r\n", 1005 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n",
999 sdp->sd_fsname, type, 1006 sdp->sd_fsname, type,
1000 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", 1007 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
1001 qd->qd_id); 1008 qd->qd_id);
@@ -1032,6 +1039,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1032 1039
1033 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1040 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
1034 print_message(qd, "exceeded"); 1041 print_message(qd, "exceeded");
1042 quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ?
1043 USRQUOTA : GRPQUOTA, qd->qd_id,
1044 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
1045
1035 error = -EDQUOT; 1046 error = -EDQUOT;
1036 break; 1047 break;
1037 } else if (be64_to_cpu(qd->qd_qb.qb_warn) && 1048 } else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
@@ -1039,6 +1050,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1039 time_after_eq(jiffies, qd->qd_last_warn + 1050 time_after_eq(jiffies, qd->qd_last_warn +
1040 gfs2_tune_get(sdp, 1051 gfs2_tune_get(sdp,
1041 gt_quota_warn_period) * HZ)) { 1052 gt_quota_warn_period) * HZ)) {
1053 quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ?
1054 USRQUOTA : GRPQUOTA, qd->qd_id,
1055 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1042 error = print_message(qd, "warning"); 1056 error = print_message(qd, "warning");
1043 qd->qd_last_warn = jiffies; 1057 qd->qd_last_warn = jiffies;
1044 } 1058 }
@@ -1069,8 +1083,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1069 } 1083 }
1070} 1084}
1071 1085
1072int gfs2_quota_sync(struct gfs2_sbd *sdp) 1086int gfs2_quota_sync(struct super_block *sb, int type)
1073{ 1087{
1088 struct gfs2_sbd *sdp = sb->s_fs_info;
1074 struct gfs2_quota_data **qda; 1089 struct gfs2_quota_data **qda;
1075 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); 1090 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
1076 unsigned int num_qd; 1091 unsigned int num_qd;
@@ -1118,7 +1133,7 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1118 struct gfs2_holder q_gh; 1133 struct gfs2_holder q_gh;
1119 int error; 1134 int error;
1120 1135
1121 error = qd_get(sdp, user, id, CREATE, &qd); 1136 error = qd_get(sdp, user, id, &qd);
1122 if (error) 1137 if (error)
1123 return error; 1138 return error;
1124 1139
@@ -1127,7 +1142,6 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1127 gfs2_glock_dq_uninit(&q_gh); 1142 gfs2_glock_dq_uninit(&q_gh);
1128 1143
1129 qd_put(qd); 1144 qd_put(qd);
1130
1131 return error; 1145 return error;
1132} 1146}
1133 1147
@@ -1298,12 +1312,12 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
1298} 1312}
1299 1313
1300static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, 1314static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
1301 int (*fxn)(struct gfs2_sbd *sdp), 1315 int (*fxn)(struct super_block *sb, int type),
1302 unsigned long t, unsigned long *timeo, 1316 unsigned long t, unsigned long *timeo,
1303 unsigned int *new_timeo) 1317 unsigned int *new_timeo)
1304{ 1318{
1305 if (t >= *timeo) { 1319 if (t >= *timeo) {
1306 int error = fxn(sdp); 1320 int error = fxn(sdp->sd_vfs, 0);
1307 quotad_error(sdp, msg, error); 1321 quotad_error(sdp, msg, error);
1308 *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; 1322 *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ;
1309 } else { 1323 } else {
@@ -1330,6 +1344,14 @@ static void quotad_check_trunc_list(struct gfs2_sbd *sdp)
1330 } 1344 }
1331} 1345}
1332 1346
1347void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) {
1348 if (!sdp->sd_statfs_force_sync) {
1349 sdp->sd_statfs_force_sync = 1;
1350 wake_up(&sdp->sd_quota_wait);
1351 }
1352}
1353
1354
1333/** 1355/**
1334 * gfs2_quotad - Write cached quota changes into the quota file 1356 * gfs2_quotad - Write cached quota changes into the quota file
1335 * @sdp: Pointer to GFS2 superblock 1357 * @sdp: Pointer to GFS2 superblock
@@ -1349,8 +1371,15 @@ int gfs2_quotad(void *data)
1349 while (!kthread_should_stop()) { 1371 while (!kthread_should_stop()) {
1350 1372
1351 /* Update the master statfs file */ 1373 /* Update the master statfs file */
1352 quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, 1374 if (sdp->sd_statfs_force_sync) {
1353 &statfs_timeo, &tune->gt_statfs_quantum); 1375 int error = gfs2_statfs_sync(sdp->sd_vfs, 0);
1376 quotad_error(sdp, "statfs", error);
1377 statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
1378 }
1379 else
1380 quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t,
1381 &statfs_timeo,
1382 &tune->gt_statfs_quantum);
1354 1383
1355 /* Update quota file */ 1384 /* Update quota file */
1356 quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, 1385 quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
@@ -1367,7 +1396,7 @@ int gfs2_quotad(void *data)
1367 spin_lock(&sdp->sd_trunc_lock); 1396 spin_lock(&sdp->sd_trunc_lock);
1368 empty = list_empty(&sdp->sd_trunc_list); 1397 empty = list_empty(&sdp->sd_trunc_list);
1369 spin_unlock(&sdp->sd_trunc_lock); 1398 spin_unlock(&sdp->sd_trunc_lock);
1370 if (empty) 1399 if (empty && !sdp->sd_statfs_force_sync)
1371 t -= schedule_timeout(t); 1400 t -= schedule_timeout(t);
1372 else 1401 else
1373 t = 0; 1402 t = 0;
@@ -1377,3 +1406,181 @@ int gfs2_quotad(void *data)
1377 return 0; 1406 return 0;
1378} 1407}
1379 1408
1409static int gfs2_quota_get_xstate(struct super_block *sb,
1410 struct fs_quota_stat *fqs)
1411{
1412 struct gfs2_sbd *sdp = sb->s_fs_info;
1413
1414 memset(fqs, 0, sizeof(struct fs_quota_stat));
1415 fqs->qs_version = FS_QSTAT_VERSION;
1416 if (sdp->sd_args.ar_quota == GFS2_QUOTA_ON)
1417 fqs->qs_flags = (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD);
1418 else if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT)
1419 fqs->qs_flags = (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT);
1420 if (sdp->sd_quota_inode) {
1421 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr;
1422 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks;
1423 }
1424 fqs->qs_uquota.qfs_nextents = 1; /* unsupported */
1425 fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */
1426 fqs->qs_incoredqs = atomic_read(&qd_lru_count);
1427 return 0;
1428}
1429
1430static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id,
1431 struct fs_disk_quota *fdq)
1432{
1433 struct gfs2_sbd *sdp = sb->s_fs_info;
1434 struct gfs2_quota_lvb *qlvb;
1435 struct gfs2_quota_data *qd;
1436 struct gfs2_holder q_gh;
1437 int error;
1438
1439 memset(fdq, 0, sizeof(struct fs_disk_quota));
1440
1441 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1442 return -ESRCH; /* Crazy XFS error code */
1443
1444 if (type == USRQUOTA)
1445 type = QUOTA_USER;
1446 else if (type == GRPQUOTA)
1447 type = QUOTA_GROUP;
1448 else
1449 return -EINVAL;
1450
1451 error = qd_get(sdp, type, id, &qd);
1452 if (error)
1453 return error;
1454 error = do_glock(qd, FORCE, &q_gh);
1455 if (error)
1456 goto out;
1457
1458 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
1459 fdq->d_version = FS_DQUOT_VERSION;
1460 fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA;
1461 fdq->d_id = id;
1462 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit);
1463 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn);
1464 fdq->d_bcount = be64_to_cpu(qlvb->qb_value);
1465
1466 gfs2_glock_dq_uninit(&q_gh);
1467out:
1468 qd_put(qd);
1469 return error;
1470}
1471
1472/* GFS2 only supports a subset of the XFS fields */
1473#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
1474
1475static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id,
1476 struct fs_disk_quota *fdq)
1477{
1478 struct gfs2_sbd *sdp = sb->s_fs_info;
1479 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
1480 struct gfs2_quota_data *qd;
1481 struct gfs2_holder q_gh, i_gh;
1482 unsigned int data_blocks, ind_blocks;
1483 unsigned int blocks = 0;
1484 int alloc_required;
1485 struct gfs2_alloc *al;
1486 loff_t offset;
1487 int error;
1488
1489 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1490 return -ESRCH; /* Crazy XFS error code */
1491
1492 switch(type) {
1493 case USRQUOTA:
1494 type = QUOTA_USER;
1495 if (fdq->d_flags != XFS_USER_QUOTA)
1496 return -EINVAL;
1497 break;
1498 case GRPQUOTA:
1499 type = QUOTA_GROUP;
1500 if (fdq->d_flags != XFS_GROUP_QUOTA)
1501 return -EINVAL;
1502 break;
1503 default:
1504 return -EINVAL;
1505 }
1506
1507 if (fdq->d_fieldmask & ~GFS2_FIELDMASK)
1508 return -EINVAL;
1509 if (fdq->d_id != id)
1510 return -EINVAL;
1511
1512 error = qd_get(sdp, type, id, &qd);
1513 if (error)
1514 return error;
1515
1516 mutex_lock(&ip->i_inode.i_mutex);
1517 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
1518 if (error)
1519 goto out_put;
1520 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1521 if (error)
1522 goto out_q;
1523
1524 /* Check for existing entry, if none then alloc new blocks */
1525 error = update_qd(sdp, qd);
1526 if (error)
1527 goto out_i;
1528
1529 /* If nothing has changed, this is a no-op */
1530 if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
1531 (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn)))
1532 fdq->d_fieldmask ^= FS_DQ_BSOFT;
1533 if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
1534 (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit)))
1535 fdq->d_fieldmask ^= FS_DQ_BHARD;
1536 if (fdq->d_fieldmask == 0)
1537 goto out_i;
1538
1539 offset = qd2offset(qd);
1540 error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota),
1541 &alloc_required);
1542 if (error)
1543 goto out_i;
1544 if (alloc_required) {
1545 al = gfs2_alloc_get(ip);
1546 if (al == NULL)
1547 goto out_i;
1548 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
1549 &data_blocks, &ind_blocks);
1550 blocks = al->al_requested = 1 + data_blocks + ind_blocks;
1551 error = gfs2_inplace_reserve(ip);
1552 if (error)
1553 goto out_alloc;
1554 }
1555
1556 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0);
1557 if (error)
1558 goto out_release;
1559
1560 /* Apply changes */
1561 error = gfs2_adjust_quota(ip, offset, 0, qd, fdq);
1562
1563 gfs2_trans_end(sdp);
1564out_release:
1565 if (alloc_required) {
1566 gfs2_inplace_release(ip);
1567out_alloc:
1568 gfs2_alloc_put(ip);
1569 }
1570out_i:
1571 gfs2_glock_dq_uninit(&i_gh);
1572out_q:
1573 gfs2_glock_dq_uninit(&q_gh);
1574out_put:
1575 mutex_unlock(&ip->i_inode.i_mutex);
1576 qd_put(qd);
1577 return error;
1578}
1579
1580const struct quotactl_ops gfs2_quotactl_ops = {
1581 .quota_sync = gfs2_quota_sync,
1582 .get_xstate = gfs2_quota_get_xstate,
1583 .get_xquota = gfs2_xquota_get,
1584 .set_xquota = gfs2_xquota_set,
1585};
1586
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 0fa5fa63d0e..e271fa07ad0 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -25,13 +25,15 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
25extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 25extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
26 u32 uid, u32 gid); 26 u32 uid, u32 gid);
27 27
28extern int gfs2_quota_sync(struct gfs2_sbd *sdp); 28extern int gfs2_quota_sync(struct super_block *sb, int type);
29extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); 29extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
30 30
31extern int gfs2_quota_init(struct gfs2_sbd *sdp); 31extern int gfs2_quota_init(struct gfs2_sbd *sdp);
32extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); 32extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
33extern int gfs2_quotad(void *data); 33extern int gfs2_quotad(void *data);
34 34
35extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
36
35static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) 37static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
36{ 38{
37 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 39 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -50,5 +52,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
50} 52}
51 53
52extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); 54extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
55extern const struct quotactl_ops gfs2_quotactl_ops;
53 56
54#endif /* __QUOTA_DOT_H__ */ 57#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 09fa3196557..4b9bece3d43 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -410,7 +410,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
410 memset(lh, 0, sizeof(struct gfs2_log_header)); 410 memset(lh, 0, sizeof(struct gfs2_log_header));
411 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 411 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
412 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); 412 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
413 lh->lh_header.__pad0 = cpu_to_be64(0);
413 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); 414 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
415 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
414 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); 416 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
415 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); 417 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
416 lh->lh_blkno = cpu_to_be32(lblock); 418 lh->lh_blkno = cpu_to_be32(lblock);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8f1cfb02a6c..0608f490c29 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1710,11 +1710,16 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1710{ 1710{
1711 struct gfs2_rgrpd *rgd; 1711 struct gfs2_rgrpd *rgd;
1712 struct gfs2_holder ri_gh, rgd_gh; 1712 struct gfs2_holder ri_gh, rgd_gh;
1713 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1714 int ri_locked = 0;
1713 int error; 1715 int error;
1714 1716
1715 error = gfs2_rindex_hold(sdp, &ri_gh); 1717 if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
1716 if (error) 1718 error = gfs2_rindex_hold(sdp, &ri_gh);
1717 goto fail; 1719 if (error)
1720 goto fail;
1721 ri_locked = 1;
1722 }
1718 1723
1719 error = -EINVAL; 1724 error = -EINVAL;
1720 rgd = gfs2_blk2rgrpd(sdp, no_addr); 1725 rgd = gfs2_blk2rgrpd(sdp, no_addr);
@@ -1730,7 +1735,8 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1730 1735
1731 gfs2_glock_dq_uninit(&rgd_gh); 1736 gfs2_glock_dq_uninit(&rgd_gh);
1732fail_rindex: 1737fail_rindex:
1733 gfs2_glock_dq_uninit(&ri_gh); 1738 if (ri_locked)
1739 gfs2_glock_dq_uninit(&ri_gh);
1734fail: 1740fail:
1735 return error; 1741 return error;
1736} 1742}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 0ec3ec672de..c282ad41f3d 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -70,6 +70,11 @@ enum {
70 Opt_commit, 70 Opt_commit,
71 Opt_err_withdraw, 71 Opt_err_withdraw,
72 Opt_err_panic, 72 Opt_err_panic,
73 Opt_statfs_quantum,
74 Opt_statfs_percent,
75 Opt_quota_quantum,
76 Opt_barrier,
77 Opt_nobarrier,
73 Opt_error, 78 Opt_error,
74}; 79};
75 80
@@ -101,18 +106,23 @@ static const match_table_t tokens = {
101 {Opt_commit, "commit=%d"}, 106 {Opt_commit, "commit=%d"},
102 {Opt_err_withdraw, "errors=withdraw"}, 107 {Opt_err_withdraw, "errors=withdraw"},
103 {Opt_err_panic, "errors=panic"}, 108 {Opt_err_panic, "errors=panic"},
109 {Opt_statfs_quantum, "statfs_quantum=%d"},
110 {Opt_statfs_percent, "statfs_percent=%d"},
111 {Opt_quota_quantum, "quota_quantum=%d"},
112 {Opt_barrier, "barrier"},
113 {Opt_nobarrier, "nobarrier"},
104 {Opt_error, NULL} 114 {Opt_error, NULL}
105}; 115};
106 116
107/** 117/**
108 * gfs2_mount_args - Parse mount options 118 * gfs2_mount_args - Parse mount options
109 * @sdp: 119 * @args: The structure into which the parsed options will be written
110 * @data: 120 * @options: The options to parse
111 * 121 *
112 * Return: errno 122 * Return: errno
113 */ 123 */
114 124
115int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) 125int gfs2_mount_args(struct gfs2_args *args, char *options)
116{ 126{
117 char *o; 127 char *o;
118 int token; 128 int token;
@@ -157,7 +167,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
157 break; 167 break;
158 case Opt_debug: 168 case Opt_debug:
159 if (args->ar_errors == GFS2_ERRORS_PANIC) { 169 if (args->ar_errors == GFS2_ERRORS_PANIC) {
160 fs_info(sdp, "-o debug and -o errors=panic " 170 printk(KERN_WARNING "GFS2: -o debug and -o errors=panic "
161 "are mutually exclusive.\n"); 171 "are mutually exclusive.\n");
162 return -EINVAL; 172 return -EINVAL;
163 } 173 }
@@ -210,7 +220,29 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
210 case Opt_commit: 220 case Opt_commit:
211 rv = match_int(&tmp[0], &args->ar_commit); 221 rv = match_int(&tmp[0], &args->ar_commit);
212 if (rv || args->ar_commit <= 0) { 222 if (rv || args->ar_commit <= 0) {
213 fs_info(sdp, "commit mount option requires a positive numeric argument\n"); 223 printk(KERN_WARNING "GFS2: commit mount option requires a positive numeric argument\n");
224 return rv ? rv : -EINVAL;
225 }
226 break;
227 case Opt_statfs_quantum:
228 rv = match_int(&tmp[0], &args->ar_statfs_quantum);
229 if (rv || args->ar_statfs_quantum < 0) {
230 printk(KERN_WARNING "GFS2: statfs_quantum mount option requires a non-negative numeric argument\n");
231 return rv ? rv : -EINVAL;
232 }
233 break;
234 case Opt_quota_quantum:
235 rv = match_int(&tmp[0], &args->ar_quota_quantum);
236 if (rv || args->ar_quota_quantum <= 0) {
237 printk(KERN_WARNING "GFS2: quota_quantum mount option requires a positive numeric argument\n");
238 return rv ? rv : -EINVAL;
239 }
240 break;
241 case Opt_statfs_percent:
242 rv = match_int(&tmp[0], &args->ar_statfs_percent);
243 if (rv || args->ar_statfs_percent < 0 ||
244 args->ar_statfs_percent > 100) {
245 printk(KERN_WARNING "statfs_percent mount option requires a numeric argument between 0 and 100\n");
214 return rv ? rv : -EINVAL; 246 return rv ? rv : -EINVAL;
215 } 247 }
216 break; 248 break;
@@ -219,15 +251,21 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
219 break; 251 break;
220 case Opt_err_panic: 252 case Opt_err_panic:
221 if (args->ar_debug) { 253 if (args->ar_debug) {
222 fs_info(sdp, "-o debug and -o errors=panic " 254 printk(KERN_WARNING "GFS2: -o debug and -o errors=panic "
223 "are mutually exclusive.\n"); 255 "are mutually exclusive.\n");
224 return -EINVAL; 256 return -EINVAL;
225 } 257 }
226 args->ar_errors = GFS2_ERRORS_PANIC; 258 args->ar_errors = GFS2_ERRORS_PANIC;
227 break; 259 break;
260 case Opt_barrier:
261 args->ar_nobarrier = 0;
262 break;
263 case Opt_nobarrier:
264 args->ar_nobarrier = 1;
265 break;
228 case Opt_error: 266 case Opt_error:
229 default: 267 default:
230 fs_info(sdp, "invalid mount option: %s\n", o); 268 printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o);
231 return -EINVAL; 269 return -EINVAL;
232 } 270 }
233 } 271 }
@@ -442,7 +480,10 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
442{ 480{
443 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); 481 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
444 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; 482 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
483 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
445 struct buffer_head *l_bh; 484 struct buffer_head *l_bh;
485 s64 x, y;
486 int need_sync = 0;
446 int error; 487 int error;
447 488
448 error = gfs2_meta_inode_buffer(l_ip, &l_bh); 489 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
@@ -456,9 +497,17 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
456 l_sc->sc_free += free; 497 l_sc->sc_free += free;
457 l_sc->sc_dinodes += dinodes; 498 l_sc->sc_dinodes += dinodes;
458 gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode)); 499 gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
500 if (sdp->sd_args.ar_statfs_percent) {
501 x = 100 * l_sc->sc_free;
502 y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
503 if (x >= y || x <= -y)
504 need_sync = 1;
505 }
459 spin_unlock(&sdp->sd_statfs_spin); 506 spin_unlock(&sdp->sd_statfs_spin);
460 507
461 brelse(l_bh); 508 brelse(l_bh);
509 if (need_sync)
510 gfs2_wake_up_statfs(sdp);
462} 511}
463 512
464void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, 513void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
@@ -484,8 +533,9 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
484 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); 533 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
485} 534}
486 535
487int gfs2_statfs_sync(struct gfs2_sbd *sdp) 536int gfs2_statfs_sync(struct super_block *sb, int type)
488{ 537{
538 struct gfs2_sbd *sdp = sb->s_fs_info;
489 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 539 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
490 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); 540 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
491 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; 541 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
@@ -521,6 +571,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp)
521 goto out_bh2; 571 goto out_bh2;
522 572
523 update_statfs(sdp, m_bh, l_bh); 573 update_statfs(sdp, m_bh, l_bh);
574 sdp->sd_statfs_force_sync = 0;
524 575
525 gfs2_trans_end(sdp); 576 gfs2_trans_end(sdp);
526 577
@@ -712,8 +763,8 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
712 int error; 763 int error;
713 764
714 flush_workqueue(gfs2_delete_workqueue); 765 flush_workqueue(gfs2_delete_workqueue);
715 gfs2_quota_sync(sdp); 766 gfs2_quota_sync(sdp->sd_vfs, 0);
716 gfs2_statfs_sync(sdp); 767 gfs2_statfs_sync(sdp->sd_vfs, 0);
717 768
718 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, 769 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
719 &t_gh); 770 &t_gh);
@@ -1061,8 +1112,13 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1061 1112
1062 spin_lock(&gt->gt_spin); 1113 spin_lock(&gt->gt_spin);
1063 args.ar_commit = gt->gt_log_flush_secs; 1114 args.ar_commit = gt->gt_log_flush_secs;
1115 args.ar_quota_quantum = gt->gt_quota_quantum;
1116 if (gt->gt_statfs_slow)
1117 args.ar_statfs_quantum = 0;
1118 else
1119 args.ar_statfs_quantum = gt->gt_statfs_quantum;
1064 spin_unlock(&gt->gt_spin); 1120 spin_unlock(&gt->gt_spin);
1065 error = gfs2_mount_args(sdp, &args, data); 1121 error = gfs2_mount_args(&args, data);
1066 if (error) 1122 if (error)
1067 return error; 1123 return error;
1068 1124
@@ -1097,8 +1153,21 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1097 sb->s_flags |= MS_POSIXACL; 1153 sb->s_flags |= MS_POSIXACL;
1098 else 1154 else
1099 sb->s_flags &= ~MS_POSIXACL; 1155 sb->s_flags &= ~MS_POSIXACL;
1156 if (sdp->sd_args.ar_nobarrier)
1157 set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
1158 else
1159 clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
1100 spin_lock(&gt->gt_spin); 1160 spin_lock(&gt->gt_spin);
1101 gt->gt_log_flush_secs = args.ar_commit; 1161 gt->gt_log_flush_secs = args.ar_commit;
1162 gt->gt_quota_quantum = args.ar_quota_quantum;
1163 if (args.ar_statfs_quantum) {
1164 gt->gt_statfs_slow = 0;
1165 gt->gt_statfs_quantum = args.ar_statfs_quantum;
1166 }
1167 else {
1168 gt->gt_statfs_slow = 1;
1169 gt->gt_statfs_quantum = 30;
1170 }
1102 spin_unlock(&gt->gt_spin); 1171 spin_unlock(&gt->gt_spin);
1103 1172
1104 gfs2_online_uevent(sdp); 1173 gfs2_online_uevent(sdp);
@@ -1179,7 +1248,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1179{ 1248{
1180 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; 1249 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
1181 struct gfs2_args *args = &sdp->sd_args; 1250 struct gfs2_args *args = &sdp->sd_args;
1182 int lfsecs; 1251 int val;
1183 1252
1184 if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) 1253 if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
1185 seq_printf(s, ",meta"); 1254 seq_printf(s, ",meta");
@@ -1240,9 +1309,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1240 } 1309 }
1241 if (args->ar_discard) 1310 if (args->ar_discard)
1242 seq_printf(s, ",discard"); 1311 seq_printf(s, ",discard");
1243 lfsecs = sdp->sd_tune.gt_log_flush_secs; 1312 val = sdp->sd_tune.gt_log_flush_secs;
1244 if (lfsecs != 60) 1313 if (val != 60)
1245 seq_printf(s, ",commit=%d", lfsecs); 1314 seq_printf(s, ",commit=%d", val);
1315 val = sdp->sd_tune.gt_statfs_quantum;
1316 if (val != 30)
1317 seq_printf(s, ",statfs_quantum=%d", val);
1318 val = sdp->sd_tune.gt_quota_quantum;
1319 if (val != 60)
1320 seq_printf(s, ",quota_quantum=%d", val);
1321 if (args->ar_statfs_percent)
1322 seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
1246 if (args->ar_errors != GFS2_ERRORS_DEFAULT) { 1323 if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
1247 const char *state; 1324 const char *state;
1248 1325
@@ -1259,6 +1336,9 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1259 } 1336 }
1260 seq_printf(s, ",errors=%s", state); 1337 seq_printf(s, ",errors=%s", state);
1261 } 1338 }
1339 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1340 seq_printf(s, ",nobarrier");
1341
1262 return 0; 1342 return 0;
1263} 1343}
1264 1344
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 235db368288..3df60f2d84e 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -27,7 +27,7 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
27 27
28extern void gfs2_jindex_free(struct gfs2_sbd *sdp); 28extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
29 29
30extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data); 30extern int gfs2_mount_args(struct gfs2_args *args, char *data);
31 31
32extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); 32extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
33extern int gfs2_jdesc_check(struct gfs2_jdesc *jd); 33extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
@@ -44,7 +44,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
44 const void *buf); 44 const void *buf);
45extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, 45extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
46 struct buffer_head *l_bh); 46 struct buffer_head *l_bh);
47extern int gfs2_statfs_sync(struct gfs2_sbd *sdp); 47extern int gfs2_statfs_sync(struct super_block *sb, int type);
48 48
49extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); 49extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
50extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); 50extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 446329728d5..0dc34621f6a 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -85,11 +85,7 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
85 buf[0] = '\0'; 85 buf[0] = '\0';
86 if (!gfs2_uuid_valid(uuid)) 86 if (!gfs2_uuid_valid(uuid))
87 return 0; 87 return 0;
88 return snprintf(buf, PAGE_SIZE, "%02X%02X%02X%02X-%02X%02X-" 88 return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid);
89 "%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
90 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5],
91 uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11],
92 uuid[12], uuid[13], uuid[14], uuid[15]);
93} 89}
94 90
95static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) 91static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
@@ -158,7 +154,7 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
158 if (simple_strtol(buf, NULL, 0) != 1) 154 if (simple_strtol(buf, NULL, 0) != 1)
159 return -EINVAL; 155 return -EINVAL;
160 156
161 gfs2_statfs_sync(sdp); 157 gfs2_statfs_sync(sdp->sd_vfs, 0);
162 return len; 158 return len;
163} 159}
164 160
@@ -171,13 +167,14 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
171 if (simple_strtol(buf, NULL, 0) != 1) 167 if (simple_strtol(buf, NULL, 0) != 1)
172 return -EINVAL; 168 return -EINVAL;
173 169
174 gfs2_quota_sync(sdp); 170 gfs2_quota_sync(sdp->sd_vfs, 0);
175 return len; 171 return len;
176} 172}
177 173
178static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, 174static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
179 size_t len) 175 size_t len)
180{ 176{
177 int error;
181 u32 id; 178 u32 id;
182 179
183 if (!capable(CAP_SYS_ADMIN)) 180 if (!capable(CAP_SYS_ADMIN))
@@ -185,13 +182,14 @@ static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
185 182
186 id = simple_strtoul(buf, NULL, 0); 183 id = simple_strtoul(buf, NULL, 0);
187 184
188 gfs2_quota_refresh(sdp, 1, id); 185 error = gfs2_quota_refresh(sdp, 1, id);
189 return len; 186 return error ? error : len;
190} 187}
191 188
192static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, 189static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
193 size_t len) 190 size_t len)
194{ 191{
192 int error;
195 u32 id; 193 u32 id;
196 194
197 if (!capable(CAP_SYS_ADMIN)) 195 if (!capable(CAP_SYS_ADMIN))
@@ -199,8 +197,8 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
199 197
200 id = simple_strtoul(buf, NULL, 0); 198 id = simple_strtoul(buf, NULL, 0);
201 199
202 gfs2_quota_refresh(sdp, 0, id); 200 error = gfs2_quota_refresh(sdp, 0, id);
203 return len; 201 return error ? error : len;
204} 202}
205 203
206static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 204static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
@@ -573,14 +571,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
573 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 571 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
574 if (!sdp->sd_args.ar_spectator) 572 if (!sdp->sd_args.ar_spectator)
575 add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); 573 add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid);
576 if (gfs2_uuid_valid(uuid)) { 574 if (gfs2_uuid_valid(uuid))
577 add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-" 575 add_uevent_var(env, "UUID=%pUB", uuid);
578 "%02X%02X-%02X%02X%02X%02X%02X%02X",
579 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4],
580 uuid[5], uuid[6], uuid[7], uuid[8], uuid[9],
581 uuid[10], uuid[11], uuid[12], uuid[13],
582 uuid[14], uuid[15]);
583 }
584 return 0; 576 return 0;
585} 577}
586 578
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 8a0f8ef6ee2..c2ebdf2c01d 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -186,8 +186,8 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
186 return 0; 186 return 0;
187} 187}
188 188
189int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, 189static int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name,
190 struct gfs2_ea_location *el) 190 struct gfs2_ea_location *el)
191{ 191{
192 struct ea_find ef; 192 struct ea_find ef;
193 int error; 193 int error;
@@ -516,8 +516,8 @@ out:
516 return error; 516 return error;
517} 517}
518 518
519int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, 519static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
520 char *data, size_t size) 520 char *data, size_t size)
521{ 521{
522 int ret; 522 int ret;
523 size_t len = GFS2_EA_DATA_LEN(el->el_ea); 523 size_t len = GFS2_EA_DATA_LEN(el->el_ea);
@@ -534,21 +534,50 @@ int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
534 return len; 534 return len;
535} 535}
536 536
537int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata)
538{
539 struct gfs2_ea_location el;
540 int error;
541 int len;
542 char *data;
543
544 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, &el);
545 if (error)
546 return error;
547 if (!el.el_ea)
548 goto out;
549 if (!GFS2_EA_DATA_LEN(el.el_ea))
550 goto out;
551
552 len = GFS2_EA_DATA_LEN(el.el_ea);
553 data = kmalloc(len, GFP_NOFS);
554 error = -ENOMEM;
555 if (data == NULL)
556 goto out;
557
558 error = gfs2_ea_get_copy(ip, &el, data, len);
559 if (error == 0)
560 error = len;
561 *ppdata = data;
562out:
563 brelse(el.el_bh);
564 return error;
565}
566
537/** 567/**
538 * gfs2_xattr_get - Get a GFS2 extended attribute 568 * gfs2_xattr_get - Get a GFS2 extended attribute
539 * @inode: The inode 569 * @inode: The inode
540 * @type: The type of extended attribute
541 * @name: The name of the extended attribute 570 * @name: The name of the extended attribute
542 * @buffer: The buffer to write the result into 571 * @buffer: The buffer to write the result into
543 * @size: The size of the buffer 572 * @size: The size of the buffer
573 * @type: The type of extended attribute
544 * 574 *
545 * Returns: actual size of data on success, -errno on error 575 * Returns: actual size of data on success, -errno on error
546 */ 576 */
547 577static int gfs2_xattr_get(struct dentry *dentry, const char *name,
548int gfs2_xattr_get(struct inode *inode, int type, const char *name, 578 void *buffer, size_t size, int type)
549 void *buffer, size_t size)
550{ 579{
551 struct gfs2_inode *ip = GFS2_I(inode); 580 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
552 struct gfs2_ea_location el; 581 struct gfs2_ea_location el;
553 int error; 582 int error;
554 583
@@ -1089,7 +1118,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1089 1118
1090/** 1119/**
1091 * gfs2_xattr_remove - Remove a GFS2 extended attribute 1120 * gfs2_xattr_remove - Remove a GFS2 extended attribute
1092 * @inode: The inode 1121 * @ip: The inode
1093 * @type: The type of the extended attribute 1122 * @type: The type of the extended attribute
1094 * @name: The name of the extended attribute 1123 * @name: The name of the extended attribute
1095 * 1124 *
@@ -1100,9 +1129,8 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1100 * Returns: 0, or errno on failure 1129 * Returns: 0, or errno on failure
1101 */ 1130 */
1102 1131
1103static int gfs2_xattr_remove(struct inode *inode, int type, const char *name) 1132static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name)
1104{ 1133{
1105 struct gfs2_inode *ip = GFS2_I(inode);
1106 struct gfs2_ea_location el; 1134 struct gfs2_ea_location el;
1107 int error; 1135 int error;
1108 1136
@@ -1126,24 +1154,24 @@ static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
1126} 1154}
1127 1155
1128/** 1156/**
1129 * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute 1157 * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
1130 * @inode: The inode 1158 * @ip: The inode
1131 * @type: The type of the extended attribute
1132 * @name: The name of the extended attribute 1159 * @name: The name of the extended attribute
1133 * @value: The value of the extended attribute (NULL for remove) 1160 * @value: The value of the extended attribute (NULL for remove)
1134 * @size: The size of the @value argument 1161 * @size: The size of the @value argument
1135 * @flags: Create or Replace 1162 * @flags: Create or Replace
1163 * @type: The type of the extended attribute
1136 * 1164 *
1137 * See gfs2_xattr_remove() for details of the removal of xattrs. 1165 * See gfs2_xattr_remove() for details of the removal of xattrs.
1138 * 1166 *
1139 * Returns: 0 or errno on failure 1167 * Returns: 0 or errno on failure
1140 */ 1168 */
1141 1169
1142int gfs2_xattr_set(struct inode *inode, int type, const char *name, 1170int __gfs2_xattr_set(struct inode *inode, const char *name,
1143 const void *value, size_t size, int flags) 1171 const void *value, size_t size, int flags, int type)
1144{ 1172{
1145 struct gfs2_sbd *sdp = GFS2_SB(inode);
1146 struct gfs2_inode *ip = GFS2_I(inode); 1173 struct gfs2_inode *ip = GFS2_I(inode);
1174 struct gfs2_sbd *sdp = GFS2_SB(inode);
1147 struct gfs2_ea_location el; 1175 struct gfs2_ea_location el;
1148 unsigned int namel = strlen(name); 1176 unsigned int namel = strlen(name);
1149 int error; 1177 int error;
@@ -1154,7 +1182,7 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1154 return -ERANGE; 1182 return -ERANGE;
1155 1183
1156 if (value == NULL) 1184 if (value == NULL)
1157 return gfs2_xattr_remove(inode, type, name); 1185 return gfs2_xattr_remove(ip, type, name);
1158 1186
1159 if (ea_check_size(sdp, namel, size)) 1187 if (ea_check_size(sdp, namel, size))
1160 return -ERANGE; 1188 return -ERANGE;
@@ -1194,6 +1222,13 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1194 return error; 1222 return error;
1195} 1223}
1196 1224
1225static int gfs2_xattr_set(struct dentry *dentry, const char *name,
1226 const void *value, size_t size, int flags, int type)
1227{
1228 return __gfs2_xattr_set(dentry->d_inode, name, value,
1229 size, flags, type);
1230}
1231
1197static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, 1232static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1198 struct gfs2_ea_header *ea, char *data) 1233 struct gfs2_ea_header *ea, char *data)
1199{ 1234{
@@ -1259,23 +1294,29 @@ fail:
1259 return error; 1294 return error;
1260} 1295}
1261 1296
1262int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1263 struct iattr *attr, char *data)
1264{ 1298{
1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1300 struct gfs2_ea_location el;
1265 struct buffer_head *dibh; 1301 struct buffer_head *dibh;
1266 int error; 1302 int error;
1267 1303
1268 if (GFS2_EA_IS_STUFFED(el->el_ea)) { 1304 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
1269 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); 1305 if (error)
1270 if (error) 1306 return error;
1271 return error;
1272 1307
1273 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 1308 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1274 memcpy(GFS2_EA2DATA(el->el_ea), data, 1309 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1275 GFS2_EA_DATA_LEN(el->el_ea)); 1310 if (error == 0) {
1276 } else 1311 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
1277 error = ea_acl_chmod_unstuffed(ip, el->el_ea, data); 1312 memcpy(GFS2_EA2DATA(el.el_ea), data,
1313 GFS2_EA_DATA_LEN(el.el_ea));
1314 }
1315 } else {
1316 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
1317 }
1278 1318
1319 brelse(el.el_bh);
1279 if (error) 1320 if (error)
1280 return error; 1321 return error;
1281 1322
@@ -1288,8 +1329,7 @@ int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
1288 brelse(dibh); 1329 brelse(dibh);
1289 } 1330 }
1290 1331
1291 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1332 gfs2_trans_end(sdp);
1292
1293 return error; 1333 return error;
1294} 1334}
1295 1335
@@ -1495,58 +1535,18 @@ out_alloc:
1495 return error; 1535 return error;
1496} 1536}
1497 1537
1498static int gfs2_xattr_user_get(struct inode *inode, const char *name,
1499 void *buffer, size_t size)
1500{
1501 return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
1502}
1503
1504static int gfs2_xattr_user_set(struct inode *inode, const char *name,
1505 const void *value, size_t size, int flags)
1506{
1507 return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
1508}
1509
1510static int gfs2_xattr_system_get(struct inode *inode, const char *name,
1511 void *buffer, size_t size)
1512{
1513 return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size);
1514}
1515
1516static int gfs2_xattr_system_set(struct inode *inode, const char *name,
1517 const void *value, size_t size, int flags)
1518{
1519 return gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, flags);
1520}
1521
1522static int gfs2_xattr_security_get(struct inode *inode, const char *name,
1523 void *buffer, size_t size)
1524{
1525 return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
1526}
1527
1528static int gfs2_xattr_security_set(struct inode *inode, const char *name,
1529 const void *value, size_t size, int flags)
1530{
1531 return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
1532}
1533
1534static struct xattr_handler gfs2_xattr_user_handler = { 1538static struct xattr_handler gfs2_xattr_user_handler = {
1535 .prefix = XATTR_USER_PREFIX, 1539 .prefix = XATTR_USER_PREFIX,
1536 .get = gfs2_xattr_user_get, 1540 .flags = GFS2_EATYPE_USR,
1537 .set = gfs2_xattr_user_set, 1541 .get = gfs2_xattr_get,
1542 .set = gfs2_xattr_set,
1538}; 1543};
1539 1544
1540static struct xattr_handler gfs2_xattr_security_handler = { 1545static struct xattr_handler gfs2_xattr_security_handler = {
1541 .prefix = XATTR_SECURITY_PREFIX, 1546 .prefix = XATTR_SECURITY_PREFIX,
1542 .get = gfs2_xattr_security_get, 1547 .flags = GFS2_EATYPE_SECURITY,
1543 .set = gfs2_xattr_security_set, 1548 .get = gfs2_xattr_get,
1544}; 1549 .set = gfs2_xattr_set,
1545
1546static struct xattr_handler gfs2_xattr_system_handler = {
1547 .prefix = XATTR_SYSTEM_PREFIX,
1548 .get = gfs2_xattr_system_get,
1549 .set = gfs2_xattr_system_set,
1550}; 1550};
1551 1551
1552struct xattr_handler *gfs2_xattr_handlers[] = { 1552struct xattr_handler *gfs2_xattr_handlers[] = {
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index cbdfd774373..d392f8358f2 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -53,20 +53,15 @@ struct gfs2_ea_location {
53 struct gfs2_ea_header *el_prev; 53 struct gfs2_ea_header *el_prev;
54}; 54};
55 55
56extern int gfs2_xattr_get(struct inode *inode, int type, const char *name, 56extern int __gfs2_xattr_set(struct inode *inode, const char *name,
57 void *buffer, size_t size); 57 const void *value, size_t size,
58extern int gfs2_xattr_set(struct inode *inode, int type, const char *name, 58 int flags, int type);
59 const void *value, size_t size, int flags);
60extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); 59extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
61extern int gfs2_ea_dealloc(struct gfs2_inode *ip); 60extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
62 61
63/* Exported to acl.c */ 62/* Exported to acl.c */
64 63
65extern int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, 64extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
66 struct gfs2_ea_location *el); 65extern int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data);
67extern int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
68 char *data, size_t size);
69extern int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
70 struct iattr *attr, char *data);
71 66
72#endif /* __EATTR_DOT_H__ */ 67#endif /* __EATTR_DOT_H__ */
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index 6d98f116ca0..424b0337f52 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -289,6 +289,10 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
289 err = hfs_brec_find(&src_fd); 289 err = hfs_brec_find(&src_fd);
290 if (err) 290 if (err)
291 goto out; 291 goto out;
292 if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
293 err = -EIO;
294 goto out;
295 }
292 296
293 hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, 297 hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset,
294 src_fd.entrylength); 298 src_fd.entrylength);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 7c69b98a2e4..2b3b8611b41 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -79,6 +79,11 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
79 filp->f_pos++; 79 filp->f_pos++;
80 /* fall through */ 80 /* fall through */
81 case 1: 81 case 1:
82 if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
83 err = -EIO;
84 goto out;
85 }
86
82 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 87 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
83 if (entry.type != HFS_CDR_THD) { 88 if (entry.type != HFS_CDR_THD) {
84 printk(KERN_ERR "hfs: bad catalog folder thread\n"); 89 printk(KERN_ERR "hfs: bad catalog folder thread\n");
@@ -109,6 +114,12 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
109 err = -EIO; 114 err = -EIO;
110 goto out; 115 goto out;
111 } 116 }
117
118 if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
119 err = -EIO;
120 goto out;
121 }
122
112 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 123 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
113 type = entry.type; 124 type = entry.type;
114 len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName); 125 len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index f7fcbe49da7..5ed7252b7b2 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -409,8 +409,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
409 /* try to get the root inode */ 409 /* try to get the root inode */
410 hfs_find_init(HFS_SB(sb)->cat_tree, &fd); 410 hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
411 res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd); 411 res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd);
412 if (!res) 412 if (!res) {
413 if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) {
414 res = -EIO;
415 goto bail;
416 }
413 hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength); 417 hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength);
418 }
414 if (res) { 419 if (res) {
415 hfs_find_exit(&fd); 420 hfs_find_exit(&fd);
416 goto bail_no_root; 421 goto bail_no_root;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f2feaa06bf2..cadc4ce4865 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -14,6 +14,7 @@
14#include <linux/magic.h> 14#include <linux/magic.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
17#include <linux/bitmap.h>
17 18
18/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ 19/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
19 20
@@ -115,15 +116,13 @@ static void hpfs_put_super(struct super_block *s)
115unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) 116unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
116{ 117{
117 struct quad_buffer_head qbh; 118 struct quad_buffer_head qbh;
118 unsigned *bits; 119 unsigned long *bits;
119 unsigned i, count; 120 unsigned count;
120 if (!(bits = hpfs_map_4sectors(s, secno, &qbh, 4))) return 0; 121
121 count = 0; 122 bits = hpfs_map_4sectors(s, secno, &qbh, 4);
122 for (i = 0; i < 2048 / sizeof(unsigned); i++) { 123 if (!bits)
123 unsigned b; 124 return 0;
124 if (!bits[i]) continue; 125 count = bitmap_weight(bits, 2048 * BITS_PER_BYTE);
125 for (b = bits[i]; b; b>>=1) count += b & 1;
126 }
127 hpfs_brelse4(&qbh); 126 hpfs_brelse4(&qbh);
128 return count; 127 return count;
129} 128}
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a5089a6dd67..7239efc690d 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -646,22 +646,27 @@ static const struct super_operations hppfs_sbops = {
646static int hppfs_readlink(struct dentry *dentry, char __user *buffer, 646static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
647 int buflen) 647 int buflen)
648{ 648{
649 struct dentry *proc_dentry; 649 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
650
651 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
652 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, 650 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
653 buflen); 651 buflen);
654} 652}
655 653
656static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 654static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
657{ 655{
658 struct dentry *proc_dentry; 656 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
659
660 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
661 657
662 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); 658 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
663} 659}
664 660
661static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
662 void *cookie)
663{
664 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
665
666 if (proc_dentry->d_inode->i_op->put_link)
667 proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
668}
669
665static const struct inode_operations hppfs_dir_iops = { 670static const struct inode_operations hppfs_dir_iops = {
666 .lookup = hppfs_lookup, 671 .lookup = hppfs_lookup,
667}; 672};
@@ -669,6 +674,7 @@ static const struct inode_operations hppfs_dir_iops = {
669static const struct inode_operations hppfs_link_iops = { 674static const struct inode_operations hppfs_link_iops = {
670 .readlink = hppfs_readlink, 675 .readlink = hppfs_readlink,
671 .follow_link = hppfs_follow_link, 676 .follow_link = hppfs_follow_link,
677 .put_link = hppfs_put_link,
672}; 678};
673 679
674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 680static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 87a1258953b..a0bbd3d1b41 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -30,7 +30,6 @@
30#include <linux/dnotify.h> 30#include <linux/dnotify.h>
31#include <linux/statfs.h> 31#include <linux/statfs.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/ima.h>
34#include <linux/magic.h> 33#include <linux/magic.h>
35 34
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -922,7 +921,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
922 int error = -ENOMEM; 921 int error = -ENOMEM;
923 struct file *file; 922 struct file *file;
924 struct inode *inode; 923 struct inode *inode;
925 struct dentry *dentry, *root; 924 struct path path;
925 struct dentry *root;
926 struct qstr quick_string; 926 struct qstr quick_string;
927 927
928 *user = NULL; 928 *user = NULL;
@@ -944,10 +944,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
944 quick_string.name = name; 944 quick_string.name = name;
945 quick_string.len = strlen(quick_string.name); 945 quick_string.len = strlen(quick_string.name);
946 quick_string.hash = 0; 946 quick_string.hash = 0;
947 dentry = d_alloc(root, &quick_string); 947 path.dentry = d_alloc(root, &quick_string);
948 if (!dentry) 948 if (!path.dentry)
949 goto out_shm_unlock; 949 goto out_shm_unlock;
950 950
951 path.mnt = mntget(hugetlbfs_vfsmount);
951 error = -ENOSPC; 952 error = -ENOSPC;
952 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(), 953 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
953 current_fsgid(), S_IFREG | S_IRWXUGO, 0); 954 current_fsgid(), S_IFREG | S_IRWXUGO, 0);
@@ -960,24 +961,22 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
960 acctflag)) 961 acctflag))
961 goto out_inode; 962 goto out_inode;
962 963
963 d_instantiate(dentry, inode); 964 d_instantiate(path.dentry, inode);
964 inode->i_size = size; 965 inode->i_size = size;
965 inode->i_nlink = 0; 966 inode->i_nlink = 0;
966 967
967 error = -ENFILE; 968 error = -ENFILE;
968 file = alloc_file(hugetlbfs_vfsmount, dentry, 969 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
969 FMODE_WRITE | FMODE_READ,
970 &hugetlbfs_file_operations); 970 &hugetlbfs_file_operations);
971 if (!file) 971 if (!file)
972 goto out_dentry; /* inode is already attached */ 972 goto out_dentry; /* inode is already attached */
973 ima_counts_get(file);
974 973
975 return file; 974 return file;
976 975
977out_inode: 976out_inode:
978 iput(inode); 977 iput(inode);
979out_dentry: 978out_dentry:
980 dput(dentry); 979 path_put(&path);
981out_shm_unlock: 980out_shm_unlock:
982 if (*user) { 981 if (*user) {
983 user_shm_unlock(size, *user); 982 user_shm_unlock(size, *user);
diff --git a/fs/inode.c b/fs/inode.c
index 4d8e3be5597..03dfeb2e392 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -18,7 +18,6 @@
18#include <linux/hash.h> 18#include <linux/hash.h>
19#include <linux/swap.h> 19#include <linux/swap.h>
20#include <linux/security.h> 20#include <linux/security.h>
21#include <linux/ima.h>
22#include <linux/pagemap.h> 21#include <linux/pagemap.h>
23#include <linux/cdev.h> 22#include <linux/cdev.h>
24#include <linux/bootmem.h> 23#include <linux/bootmem.h>
@@ -114,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
114 * Prevent speculative execution through spin_unlock(&inode_lock); 113 * Prevent speculative execution through spin_unlock(&inode_lock);
115 */ 114 */
116 smp_mb(); 115 smp_mb();
117 wake_up_bit(&inode->i_state, __I_LOCK); 116 wake_up_bit(&inode->i_state, __I_NEW);
118} 117}
119 118
120/** 119/**
@@ -157,11 +156,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
157 156
158 if (security_inode_alloc(inode)) 157 if (security_inode_alloc(inode))
159 goto out; 158 goto out;
160
161 /* allocate and initialize an i_integrity */
162 if (ima_inode_alloc(inode))
163 goto out_free_security;
164
165 spin_lock_init(&inode->i_lock); 159 spin_lock_init(&inode->i_lock);
166 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 160 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
167 161
@@ -201,9 +195,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
201#endif 195#endif
202 196
203 return 0; 197 return 0;
204
205out_free_security:
206 security_inode_free(inode);
207out: 198out:
208 return -ENOMEM; 199 return -ENOMEM;
209} 200}
@@ -235,7 +226,6 @@ static struct inode *alloc_inode(struct super_block *sb)
235void __destroy_inode(struct inode *inode) 226void __destroy_inode(struct inode *inode)
236{ 227{
237 BUG_ON(inode_has_buffers(inode)); 228 BUG_ON(inode_has_buffers(inode));
238 ima_inode_free(inode);
239 security_inode_free(inode); 229 security_inode_free(inode);
240 fsnotify_inode_delete(inode); 230 fsnotify_inode_delete(inode);
241#ifdef CONFIG_FS_POSIX_ACL 231#ifdef CONFIG_FS_POSIX_ACL
@@ -700,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
700 } 690 }
701#endif 691#endif
702 /* 692 /*
703 * This is special! We do not need the spinlock when clearing I_LOCK, 693 * This is special! We do not need the spinlock when clearing I_NEW,
704 * because we're guaranteed that nobody else tries to do anything about 694 * because we're guaranteed that nobody else tries to do anything about
705 * the state of the inode when it is locked, as we just created it (so 695 * the state of the inode when it is locked, as we just created it (so
706 * there can be no old holders that haven't tested I_LOCK). 696 * there can be no old holders that haven't tested I_NEW).
707 * However we must emit the memory barrier so that other CPUs reliably 697 * However we must emit the memory barrier so that other CPUs reliably
708 * see the clearing of I_LOCK after the other inode initialisation has 698 * see the clearing of I_NEW after the other inode initialisation has
709 * completed. 699 * completed.
710 */ 700 */
711 smp_mb(); 701 smp_mb();
712 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 702 WARN_ON(!(inode->i_state & I_NEW));
713 inode->i_state &= ~(I_LOCK|I_NEW); 703 inode->i_state &= ~I_NEW;
714 wake_up_inode(inode); 704 wake_up_inode(inode);
715} 705}
716EXPORT_SYMBOL(unlock_new_inode); 706EXPORT_SYMBOL(unlock_new_inode);
@@ -741,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
741 goto set_failed; 731 goto set_failed;
742 732
743 __inode_add_to_lists(sb, head, inode); 733 __inode_add_to_lists(sb, head, inode);
744 inode->i_state = I_LOCK|I_NEW; 734 inode->i_state = I_NEW;
745 spin_unlock(&inode_lock); 735 spin_unlock(&inode_lock);
746 736
747 /* Return the locked inode with I_NEW set, the 737 /* Return the locked inode with I_NEW set, the
@@ -788,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
788 if (!old) { 778 if (!old) {
789 inode->i_ino = ino; 779 inode->i_ino = ino;
790 __inode_add_to_lists(sb, head, inode); 780 __inode_add_to_lists(sb, head, inode);
791 inode->i_state = I_LOCK|I_NEW; 781 inode->i_state = I_NEW;
792 spin_unlock(&inode_lock); 782 spin_unlock(&inode_lock);
793 783
794 /* Return the locked inode with I_NEW set, the 784 /* Return the locked inode with I_NEW set, the
@@ -1093,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
1093 ino_t ino = inode->i_ino; 1083 ino_t ino = inode->i_ino;
1094 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1084 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1095 1085
1096 inode->i_state |= I_LOCK|I_NEW; 1086 inode->i_state |= I_NEW;
1097 while (1) { 1087 while (1) {
1098 struct hlist_node *node; 1088 struct hlist_node *node;
1099 struct inode *old = NULL; 1089 struct inode *old = NULL;
@@ -1130,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1130 struct super_block *sb = inode->i_sb; 1120 struct super_block *sb = inode->i_sb;
1131 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1132 1122
1133 inode->i_state |= I_LOCK|I_NEW; 1123 inode->i_state |= I_NEW;
1134 1124
1135 while (1) { 1125 while (1) {
1136 struct hlist_node *node; 1126 struct hlist_node *node;
@@ -1520,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
1520 * until the deletion _might_ have completed. Callers are responsible 1510 * until the deletion _might_ have completed. Callers are responsible
1521 * to recheck inode state. 1511 * to recheck inode state.
1522 * 1512 *
1523 * It doesn't matter if I_LOCK is not set initially, a call to 1513 * It doesn't matter if I_NEW is not set initially, a call to
1524 * wake_up_inode() after removing from the hash list will DTRT. 1514 * wake_up_inode() after removing from the hash list will DTRT.
1525 * 1515 *
1526 * This is called with inode_lock held. 1516 * This is called with inode_lock held.
@@ -1528,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
1528static void __wait_on_freeing_inode(struct inode *inode) 1518static void __wait_on_freeing_inode(struct inode *inode)
1529{ 1519{
1530 wait_queue_head_t *wq; 1520 wait_queue_head_t *wq;
1531 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); 1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1532 wq = bit_waitqueue(&inode->i_state, __I_LOCK); 1522 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1533 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1534 spin_unlock(&inode_lock); 1524 spin_unlock(&inode_lock);
1535 schedule(); 1525 schedule();
diff --git a/fs/internal.h b/fs/internal.h
index 515175b8b72..e96a1667d74 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -79,8 +79,16 @@ extern void chroot_fs_refs(struct path *, struct path *);
79 * file_table.c 79 * file_table.c
80 */ 80 */
81extern void mark_files_ro(struct super_block *); 81extern void mark_files_ro(struct super_block *);
82extern struct file *get_empty_filp(void);
82 83
83/* 84/*
84 * super.c 85 * super.c
85 */ 86 */
86extern int do_remount_sb(struct super_block *, int, void *, int); 87extern int do_remount_sb(struct super_block *, int, void *, int);
88
89/*
90 * open.c
91 */
92struct nameidata;
93extern struct file *nameidata_to_filp(struct nameidata *);
94extern void release_open_intent(struct nameidata *);
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index defb932eee9..0b3fa7974fa 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -36,286 +36,323 @@ static void *zisofs_zlib_workspace;
36static DEFINE_MUTEX(zisofs_zlib_lock); 36static DEFINE_MUTEX(zisofs_zlib_lock);
37 37
38/* 38/*
39 * When decompressing, we typically obtain more than one page 39 * Read data of @inode from @block_start to @block_end and uncompress
40 * per reference. We inject the additional pages into the page 40 * to one zisofs block. Store the data in the @pages array with @pcount
41 * cache as a form of readahead. 41 * entries. Start storing at offset @poffset of the first page.
42 */ 42 */
43static int zisofs_readpage(struct file *file, struct page *page) 43static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
44 loff_t block_end, int pcount,
45 struct page **pages, unsigned poffset,
46 int *errp)
44{ 47{
45 struct inode *inode = file->f_path.dentry->d_inode;
46 struct address_space *mapping = inode->i_mapping;
47 unsigned int maxpage, xpage, fpage, blockindex;
48 unsigned long offset;
49 unsigned long blockptr, blockendptr, cstart, cend, csize;
50 struct buffer_head *bh, *ptrbh[2];
51 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
52 unsigned int bufshift = ISOFS_BUFFER_BITS(inode);
53 unsigned long bufmask = bufsize - 1;
54 int err = -EIO;
55 int i;
56 unsigned int header_size = ISOFS_I(inode)->i_format_parm[0];
57 unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1]; 48 unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
58 /* unsigned long zisofs_block_size = 1UL << zisofs_block_shift; */ 49 unsigned int bufsize = ISOFS_BUFFER_SIZE(inode);
59 unsigned int zisofs_block_page_shift = zisofs_block_shift-PAGE_CACHE_SHIFT; 50 unsigned int bufshift = ISOFS_BUFFER_BITS(inode);
60 unsigned long zisofs_block_pages = 1UL << zisofs_block_page_shift; 51 unsigned int bufmask = bufsize - 1;
61 unsigned long zisofs_block_page_mask = zisofs_block_pages-1; 52 int i, block_size = block_end - block_start;
62 struct page *pages[zisofs_block_pages]; 53 z_stream stream = { .total_out = 0,
63 unsigned long index = page->index; 54 .avail_in = 0,
64 int indexblocks; 55 .avail_out = 0, };
65 56 int zerr;
66 /* We have already been given one page, this is the one 57 int needblocks = (block_size + (block_start & bufmask) + bufmask)
67 we must do. */ 58 >> bufshift;
68 xpage = index & zisofs_block_page_mask; 59 int haveblocks;
69 pages[xpage] = page; 60 blkcnt_t blocknum;
70 61 struct buffer_head *bhs[needblocks + 1];
71 /* The remaining pages need to be allocated and inserted */ 62 int curbh, curpage;
72 offset = index & ~zisofs_block_page_mask; 63
73 blockindex = offset >> zisofs_block_page_shift; 64 if (block_size > deflateBound(1UL << zisofs_block_shift)) {
74 maxpage = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 65 *errp = -EIO;
75
76 /*
77 * If this page is wholly outside i_size we just return zero;
78 * do_generic_file_read() will handle this for us
79 */
80 if (page->index >= maxpage) {
81 SetPageUptodate(page);
82 unlock_page(page);
83 return 0; 66 return 0;
84 } 67 }
85 68 /* Empty block? */
86 maxpage = min(zisofs_block_pages, maxpage-offset); 69 if (block_size == 0) {
87 70 for ( i = 0 ; i < pcount ; i++ ) {
88 for ( i = 0 ; i < maxpage ; i++, offset++ ) { 71 if (!pages[i])
89 if ( i != xpage ) { 72 continue;
90 pages[i] = grab_cache_page_nowait(mapping, offset); 73 memset(page_address(pages[i]), 0, PAGE_CACHE_SIZE);
91 } 74 flush_dcache_page(pages[i]);
92 page = pages[i]; 75 SetPageUptodate(pages[i]);
93 if ( page ) {
94 ClearPageError(page);
95 kmap(page);
96 } 76 }
77 return ((loff_t)pcount) << PAGE_CACHE_SHIFT;
97 } 78 }
98 79
99 /* This is the last page filled, plus one; used in case of abort. */ 80 /* Because zlib is not thread-safe, do all the I/O at the top. */
100 fpage = 0; 81 blocknum = block_start >> bufshift;
82 memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *));
83 haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
84 ll_rw_block(READ, haveblocks, bhs);
101 85
102 /* Find the pointer to this specific chunk */ 86 curbh = 0;
103 /* Note: we're not using isonum_731() here because the data is known aligned */ 87 curpage = 0;
104 /* Note: header_size is in 32-bit words (4 bytes) */ 88 /*
105 blockptr = (header_size + blockindex) << 2; 89 * First block is special since it may be fractional. We also wait for
106 blockendptr = blockptr + 4; 90 * it before grabbing the zlib mutex; odds are that the subsequent
91 * blocks are going to come in in short order so we don't hold the zlib
92 * mutex longer than necessary.
93 */
107 94
108 indexblocks = ((blockptr^blockendptr) >> bufshift) ? 2 : 1; 95 if (!bhs[0])
109 ptrbh[0] = ptrbh[1] = NULL; 96 goto b_eio;
110 97
111 if ( isofs_get_blocks(inode, blockptr >> bufshift, ptrbh, indexblocks) != indexblocks ) { 98 wait_on_buffer(bhs[0]);
112 if ( ptrbh[0] ) brelse(ptrbh[0]); 99 if (!buffer_uptodate(bhs[0])) {
113 printk(KERN_DEBUG "zisofs: Null buffer on reading block table, inode = %lu, block = %lu\n", 100 *errp = -EIO;
114 inode->i_ino, blockptr >> bufshift); 101 goto b_eio;
115 goto eio;
116 }
117 ll_rw_block(READ, indexblocks, ptrbh);
118
119 bh = ptrbh[0];
120 if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
121 printk(KERN_DEBUG "zisofs: Failed to read block table, inode = %lu, block = %lu\n",
122 inode->i_ino, blockptr >> bufshift);
123 if ( ptrbh[1] )
124 brelse(ptrbh[1]);
125 goto eio;
126 }
127 cstart = le32_to_cpu(*(__le32 *)(bh->b_data + (blockptr & bufmask)));
128
129 if ( indexblocks == 2 ) {
130 /* We just crossed a block boundary. Switch to the next block */
131 brelse(bh);
132 bh = ptrbh[1];
133 if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
134 printk(KERN_DEBUG "zisofs: Failed to read block table, inode = %lu, block = %lu\n",
135 inode->i_ino, blockendptr >> bufshift);
136 goto eio;
137 }
138 } 102 }
139 cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
140 brelse(bh);
141 103
142 if (cstart > cend) 104 stream.workspace = zisofs_zlib_workspace;
143 goto eio; 105 mutex_lock(&zisofs_zlib_lock);
144 106
145 csize = cend-cstart; 107 zerr = zlib_inflateInit(&stream);
146 108 if (zerr != Z_OK) {
147 if (csize > deflateBound(1UL << zisofs_block_shift)) 109 if (zerr == Z_MEM_ERROR)
148 goto eio; 110 *errp = -ENOMEM;
149 111 else
150 /* Now page[] contains an array of pages, any of which can be NULL, 112 *errp = -EIO;
151 and the locks on which we hold. We should now read the data and 113 printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
152 release the pages. If the pages are NULL the decompressed data 114 zerr);
153 for that particular page should be discarded. */ 115 goto z_eio;
154 116 }
155 if ( csize == 0 ) { 117
156 /* This data block is empty. */ 118 while (curpage < pcount && curbh < haveblocks &&
157 119 zerr != Z_STREAM_END) {
158 for ( fpage = 0 ; fpage < maxpage ; fpage++ ) { 120 if (!stream.avail_out) {
159 if ( (page = pages[fpage]) != NULL ) { 121 if (pages[curpage]) {
160 memset(page_address(page), 0, PAGE_CACHE_SIZE); 122 stream.next_out = page_address(pages[curpage])
161 123 + poffset;
162 flush_dcache_page(page); 124 stream.avail_out = PAGE_CACHE_SIZE - poffset;
163 SetPageUptodate(page); 125 poffset = 0;
164 kunmap(page); 126 } else {
165 unlock_page(page); 127 stream.next_out = (void *)&zisofs_sink_page;
166 if ( fpage == xpage ) 128 stream.avail_out = PAGE_CACHE_SIZE;
167 err = 0; /* The critical page */
168 else
169 page_cache_release(page);
170 } 129 }
171 } 130 }
172 } else { 131 if (!stream.avail_in) {
173 /* This data block is compressed. */ 132 wait_on_buffer(bhs[curbh]);
174 z_stream stream; 133 if (!buffer_uptodate(bhs[curbh])) {
175 int bail = 0, left_out = -1; 134 *errp = -EIO;
176 int zerr; 135 break;
177 int needblocks = (csize + (cstart & bufmask) + bufmask) >> bufshift; 136 }
178 int haveblocks; 137 stream.next_in = bhs[curbh]->b_data +
179 struct buffer_head *bhs[needblocks+1]; 138 (block_start & bufmask);
180 struct buffer_head **bhptr; 139 stream.avail_in = min_t(unsigned, bufsize -
181 140 (block_start & bufmask),
182 /* Because zlib is not thread-safe, do all the I/O at the top. */ 141 block_size);
183 142 block_size -= stream.avail_in;
184 blockptr = cstart >> bufshift; 143 block_start = 0;
185 memset(bhs, 0, (needblocks+1)*sizeof(struct buffer_head *));
186 haveblocks = isofs_get_blocks(inode, blockptr, bhs, needblocks);
187 ll_rw_block(READ, haveblocks, bhs);
188
189 bhptr = &bhs[0];
190 bh = *bhptr++;
191
192 /* First block is special since it may be fractional.
193 We also wait for it before grabbing the zlib
194 mutex; odds are that the subsequent blocks are
195 going to come in in short order so we don't hold
196 the zlib mutex longer than necessary. */
197
198 if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
199 printk(KERN_DEBUG "zisofs: Hit null buffer, fpage = %d, xpage = %d, csize = %ld\n",
200 fpage, xpage, csize);
201 goto b_eio;
202 }
203 stream.next_in = bh->b_data + (cstart & bufmask);
204 stream.avail_in = min(bufsize-(cstart & bufmask), csize);
205 csize -= stream.avail_in;
206
207 stream.workspace = zisofs_zlib_workspace;
208 mutex_lock(&zisofs_zlib_lock);
209
210 zerr = zlib_inflateInit(&stream);
211 if ( zerr != Z_OK ) {
212 if ( err && zerr == Z_MEM_ERROR )
213 err = -ENOMEM;
214 printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
215 zerr);
216 goto z_eio;
217 } 144 }
218 145
219 while ( !bail && fpage < maxpage ) { 146 while (stream.avail_out && stream.avail_in) {
220 page = pages[fpage]; 147 zerr = zlib_inflate(&stream, Z_SYNC_FLUSH);
221 if ( page ) 148 if (zerr == Z_BUF_ERROR && stream.avail_in == 0)
222 stream.next_out = page_address(page); 149 break;
223 else 150 if (zerr == Z_STREAM_END)
224 stream.next_out = (void *)&zisofs_sink_page; 151 break;
225 stream.avail_out = PAGE_CACHE_SIZE; 152 if (zerr != Z_OK) {
226 153 /* EOF, error, or trying to read beyond end of input */
227 while ( stream.avail_out ) { 154 if (zerr == Z_MEM_ERROR)
228 int ao, ai; 155 *errp = -ENOMEM;
229 if ( stream.avail_in == 0 && left_out ) { 156 else {
230 if ( !csize ) { 157 printk(KERN_DEBUG
231 printk(KERN_WARNING "zisofs: ZF read beyond end of input\n"); 158 "zisofs: zisofs_inflate returned"
232 bail = 1; 159 " %d, inode = %lu,"
233 break; 160 " page idx = %d, bh idx = %d,"
234 } else { 161 " avail_in = %d,"
235 bh = *bhptr++; 162 " avail_out = %d\n",
236 if ( !bh || 163 zerr, inode->i_ino, curpage,
237 (wait_on_buffer(bh), !buffer_uptodate(bh)) ) { 164 curbh, stream.avail_in,
238 /* Reached an EIO */ 165 stream.avail_out);
239 printk(KERN_DEBUG "zisofs: Hit null buffer, fpage = %d, xpage = %d, csize = %ld\n", 166 *errp = -EIO;
240 fpage, xpage, csize);
241
242 bail = 1;
243 break;
244 }
245 stream.next_in = bh->b_data;
246 stream.avail_in = min(csize,bufsize);
247 csize -= stream.avail_in;
248 }
249 }
250 ao = stream.avail_out; ai = stream.avail_in;
251 zerr = zlib_inflate(&stream, Z_SYNC_FLUSH);
252 left_out = stream.avail_out;
253 if ( zerr == Z_BUF_ERROR && stream.avail_in == 0 )
254 continue;
255 if ( zerr != Z_OK ) {
256 /* EOF, error, or trying to read beyond end of input */
257 if ( err && zerr == Z_MEM_ERROR )
258 err = -ENOMEM;
259 if ( zerr != Z_STREAM_END )
260 printk(KERN_DEBUG "zisofs: zisofs_inflate returned %d, inode = %lu, index = %lu, fpage = %d, xpage = %d, avail_in = %d, avail_out = %d, ai = %d, ao = %d\n",
261 zerr, inode->i_ino, index,
262 fpage, xpage,
263 stream.avail_in, stream.avail_out,
264 ai, ao);
265 bail = 1;
266 break;
267 } 167 }
168 goto inflate_out;
268 } 169 }
170 }
269 171
270 if ( stream.avail_out && zerr == Z_STREAM_END ) { 172 if (!stream.avail_out) {
271 /* Fractional page written before EOF. This may 173 /* This page completed */
272 be the last page in the file. */ 174 if (pages[curpage]) {
273 memset(stream.next_out, 0, stream.avail_out); 175 flush_dcache_page(pages[curpage]);
274 stream.avail_out = 0; 176 SetPageUptodate(pages[curpage]);
275 } 177 }
178 curpage++;
179 }
180 if (!stream.avail_in)
181 curbh++;
182 }
183inflate_out:
184 zlib_inflateEnd(&stream);
276 185
277 if ( !stream.avail_out ) { 186z_eio:
278 /* This page completed */ 187 mutex_unlock(&zisofs_zlib_lock);
279 if ( page ) { 188
280 flush_dcache_page(page); 189b_eio:
281 SetPageUptodate(page); 190 for (i = 0; i < haveblocks; i++)
282 kunmap(page); 191 brelse(bhs[i]);
283 unlock_page(page); 192 return stream.total_out;
284 if ( fpage == xpage ) 193}
285 err = 0; /* The critical page */ 194
286 else 195/*
287 page_cache_release(page); 196 * Uncompress data so that pages[full_page] is fully uptodate and possibly
288 } 197 * fills in other pages if we have data for them.
289 fpage++; 198 */
290 } 199static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
200 struct page **pages)
201{
202 loff_t start_off, end_off;
203 loff_t block_start, block_end;
204 unsigned int header_size = ISOFS_I(inode)->i_format_parm[0];
205 unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
206 unsigned int blockptr;
207 loff_t poffset = 0;
208 blkcnt_t cstart_block, cend_block;
209 struct buffer_head *bh;
210 unsigned int blkbits = ISOFS_BUFFER_BITS(inode);
211 unsigned int blksize = 1 << blkbits;
212 int err;
213 loff_t ret;
214
215 BUG_ON(!pages[full_page]);
216
217 /*
218 * We want to read at least 'full_page' page. Because we have to
219 * uncompress the whole compression block anyway, fill the surrounding
220 * pages with the data we have anyway...
221 */
222 start_off = page_offset(pages[full_page]);
223 end_off = min_t(loff_t, start_off + PAGE_CACHE_SIZE, inode->i_size);
224
225 cstart_block = start_off >> zisofs_block_shift;
226 cend_block = (end_off + (1 << zisofs_block_shift) - 1)
227 >> zisofs_block_shift;
228
229 WARN_ON(start_off - (full_page << PAGE_CACHE_SHIFT) !=
230 ((cstart_block << zisofs_block_shift) & PAGE_CACHE_MASK));
231
232 /* Find the pointer to this specific chunk */
233 /* Note: we're not using isonum_731() here because the data is known aligned */
234 /* Note: header_size is in 32-bit words (4 bytes) */
235 blockptr = (header_size + cstart_block) << 2;
236 bh = isofs_bread(inode, blockptr >> blkbits);
237 if (!bh)
238 return -EIO;
239 block_start = le32_to_cpu(*(__le32 *)
240 (bh->b_data + (blockptr & (blksize - 1))));
241
242 while (cstart_block < cend_block && pcount > 0) {
243 /* Load end of the compressed block in the file */
244 blockptr += 4;
245 /* Traversed to next block? */
246 if (!(blockptr & (blksize - 1))) {
247 brelse(bh);
248
249 bh = isofs_bread(inode, blockptr >> blkbits);
250 if (!bh)
251 return -EIO;
252 }
253 block_end = le32_to_cpu(*(__le32 *)
254 (bh->b_data + (blockptr & (blksize - 1))));
255 if (block_start > block_end) {
256 brelse(bh);
257 return -EIO;
258 }
259 err = 0;
260 ret = zisofs_uncompress_block(inode, block_start, block_end,
261 pcount, pages, poffset, &err);
262 poffset += ret;
263 pages += poffset >> PAGE_CACHE_SHIFT;
264 pcount -= poffset >> PAGE_CACHE_SHIFT;
265 full_page -= poffset >> PAGE_CACHE_SHIFT;
266 poffset &= ~PAGE_CACHE_MASK;
267
268 if (err) {
269 brelse(bh);
270 /*
271 * Did we finish reading the page we really wanted
272 * to read?
273 */
274 if (full_page < 0)
275 return 0;
276 return err;
291 } 277 }
292 zlib_inflateEnd(&stream);
293 278
294 z_eio: 279 block_start = block_end;
295 mutex_unlock(&zisofs_zlib_lock); 280 cstart_block++;
281 }
282
283 if (poffset && *pages) {
284 memset(page_address(*pages) + poffset, 0,
285 PAGE_CACHE_SIZE - poffset);
286 flush_dcache_page(*pages);
287 SetPageUptodate(*pages);
288 }
289 return 0;
290}
296 291
297 b_eio: 292/*
298 for ( i = 0 ; i < haveblocks ; i++ ) { 293 * When decompressing, we typically obtain more than one page
299 if ( bhs[i] ) 294 * per reference. We inject the additional pages into the page
300 brelse(bhs[i]); 295 * cache as a form of readahead.
296 */
297static int zisofs_readpage(struct file *file, struct page *page)
298{
299 struct inode *inode = file->f_path.dentry->d_inode;
300 struct address_space *mapping = inode->i_mapping;
301 int err;
302 int i, pcount, full_page;
303 unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
304 unsigned int zisofs_pages_per_cblock =
305 PAGE_CACHE_SHIFT <= zisofs_block_shift ?
306 (1 << (zisofs_block_shift - PAGE_CACHE_SHIFT)) : 0;
307 struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)];
308 pgoff_t index = page->index, end_index;
309
310 end_index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
311 /*
312 * If this page is wholly outside i_size we just return zero;
313 * do_generic_file_read() will handle this for us
314 */
315 if (index >= end_index) {
316 SetPageUptodate(page);
317 unlock_page(page);
318 return 0;
319 }
320
321 if (PAGE_CACHE_SHIFT <= zisofs_block_shift) {
322 /* We have already been given one page, this is the one
323 we must do. */
324 full_page = index & (zisofs_pages_per_cblock - 1);
325 pcount = min_t(int, zisofs_pages_per_cblock,
326 end_index - (index & ~(zisofs_pages_per_cblock - 1)));
327 index -= full_page;
328 } else {
329 full_page = 0;
330 pcount = 1;
331 }
332 pages[full_page] = page;
333
334 for (i = 0; i < pcount; i++, index++) {
335 if (i != full_page)
336 pages[i] = grab_cache_page_nowait(mapping, index);
337 if (pages[i]) {
338 ClearPageError(pages[i]);
339 kmap(pages[i]);
301 } 340 }
302 } 341 }
303 342
304eio: 343 err = zisofs_fill_pages(inode, full_page, pcount, pages);
305 344
306 /* Release any residual pages, do not SetPageUptodate */ 345 /* Release any residual pages, do not SetPageUptodate */
307 while ( fpage < maxpage ) { 346 for (i = 0; i < pcount; i++) {
308 page = pages[fpage]; 347 if (pages[i]) {
309 if ( page ) { 348 flush_dcache_page(pages[i]);
310 flush_dcache_page(page); 349 if (i == full_page && err)
311 if ( fpage == xpage ) 350 SetPageError(pages[i]);
312 SetPageError(page); 351 kunmap(pages[i]);
313 kunmap(page); 352 unlock_page(pages[i]);
314 unlock_page(page); 353 if (i != full_page)
315 if ( fpage != xpage ) 354 page_cache_release(pages[i]);
316 page_cache_release(page);
317 } 355 }
318 fpage++;
319 } 356 }
320 357
321 /* At this point, err contains 0 or -EIO depending on the "critical" page */ 358 /* At this point, err contains 0 or -EIO depending on the "critical" page */
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index e81a30593ba..ed752cb3847 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * The following files are helpful: 10 * The following files are helpful:
11 * 11 *
12 * Documentation/filesystems/Exporting 12 * Documentation/filesystems/nfs/Exporting
13 * fs/exportfs/expfs.c. 13 * fs/exportfs/expfs.c.
14 */ 14 */
15 15
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c2fb2dd0131..96a685c550f 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -518,8 +518,7 @@ repeat:
518 if (algo == SIG('p', 'z')) { 518 if (algo == SIG('p', 'z')) {
519 int block_shift = 519 int block_shift =
520 isonum_711(&rr->u.ZF.parms[1]); 520 isonum_711(&rr->u.ZF.parms[1]);
521 if (block_shift < PAGE_CACHE_SHIFT 521 if (block_shift > 17) {
522 || block_shift > 17) {
523 printk(KERN_WARNING "isofs: " 522 printk(KERN_WARNING "isofs: "
524 "Can't handle ZF block " 523 "Can't handle ZF block "
525 "size of 2^%d\n", 524 "size of 2^%d\n",
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 4160afad6d0..bd224eec9b0 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1913,7 +1913,7 @@ static void __init jbd_create_debugfs_entry(void)
1913{ 1913{
1914 jbd_debugfs_dir = debugfs_create_dir("jbd", NULL); 1914 jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
1915 if (jbd_debugfs_dir) 1915 if (jbd_debugfs_dir)
1916 jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO, 1916 jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO | S_IWUSR,
1917 jbd_debugfs_dir, 1917 jbd_debugfs_dir,
1918 &journal_enable_debug); 1918 &journal_enable_debug);
1919} 1919}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b2..88684937095 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/blkdev.h>
25#include <trace/events/jbd2.h> 26#include <trace/events/jbd2.h>
26 27
27/* 28/*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
515 journal->j_tail_sequence = first_tid; 516 journal->j_tail_sequence = first_tid;
516 journal->j_tail = blocknr; 517 journal->j_tail = blocknr;
517 spin_unlock(&journal->j_state_lock); 518 spin_unlock(&journal->j_state_lock);
519
520 /*
521 * If there is an external journal, we need to make sure that
522 * any data blocks that were recently written out --- perhaps
523 * by jbd2_log_do_checkpoint() --- are flushed out before we
524 * drop the transactions from the external journal. It's
525 * unlikely this will be necessary, especially with a
526 * appropriately sized journal, but we need this to guarantee
527 * correctness. Fortunately jbd2_cleanup_journal_tail()
528 * doesn't get called all that often.
529 */
530 if ((journal->j_fs_dev != journal->j_dev) &&
531 (journal->j_flags & JBD2_BARRIER))
532 blkdev_issue_flush(journal->j_fs_dev, NULL);
518 if (!(journal->j_flags & JBD2_ABORT)) 533 if (!(journal->j_flags & JBD2_ABORT))
519 jbd2_journal_update_superblock(journal, 1); 534 jbd2_journal_update_superblock(journal, 1);
520 return 0; 535 return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index d4cfd6d2779..1bc74b6f26d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
259 ret = err; 259 ret = err;
260 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
261 J_ASSERT(jinode->i_transaction == commit_transaction); 261 J_ASSERT(jinode->i_transaction == commit_transaction);
262 commit_transaction->t_flushed_data_blocks = 1;
262 jinode->i_flags &= ~JI_COMMIT_RUNNING; 263 jinode->i_flags &= ~JI_COMMIT_RUNNING;
263 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 264 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
264 } 265 }
@@ -286,7 +287,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
286 if (err) { 287 if (err) {
287 /* 288 /*
288 * Because AS_EIO is cleared by 289 * Because AS_EIO is cleared by
289 * wait_on_page_writeback_range(), set it again so 290 * filemap_fdatawait_range(), set it again so
290 * that user process can get -EIO from fsync(). 291 * that user process can get -EIO from fsync().
291 */ 292 */
292 set_bit(AS_EIO, 293 set_bit(AS_EIO,
@@ -636,6 +637,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
636 JBUFFER_TRACE(jh, "ph3: write metadata"); 637 JBUFFER_TRACE(jh, "ph3: write metadata");
637 flags = jbd2_journal_write_metadata_buffer(commit_transaction, 638 flags = jbd2_journal_write_metadata_buffer(commit_transaction,
638 jh, &new_jh, blocknr); 639 jh, &new_jh, blocknr);
640 if (flags < 0) {
641 jbd2_journal_abort(journal, flags);
642 continue;
643 }
639 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); 644 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
640 wbuf[bufs++] = jh2bh(new_jh); 645 wbuf[bufs++] = jh2bh(new_jh);
641 646
@@ -704,8 +709,17 @@ start_journal_io:
704 } 709 }
705 } 710 }
706 711
707 /* Done it all: now write the commit record asynchronously. */ 712 /*
713 * If the journal is not located on the file system device,
714 * then we must flush the file system device before we issue
715 * the commit record
716 */
717 if (commit_transaction->t_flushed_data_blocks &&
718 (journal->j_fs_dev != journal->j_dev) &&
719 (journal->j_flags & JBD2_BARRIER))
720 blkdev_issue_flush(journal->j_fs_dev, NULL);
708 721
722 /* Done it all: now write the commit record asynchronously. */
709 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 723 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
710 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 724 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
711 err = journal_submit_commit_record(journal, commit_transaction, 725 err = journal_submit_commit_record(journal, commit_transaction,
@@ -716,13 +730,6 @@ start_journal_io:
716 blkdev_issue_flush(journal->j_dev, NULL); 730 blkdev_issue_flush(journal->j_dev, NULL);
717 } 731 }
718 732
719 /*
720 * This is the right place to wait for data buffers both for ASYNC
721 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
722 * the commit block went to disk (which happens above). If commit is
723 * SYNC, we need to wait for data buffers before we start writing
724 * commit block, which happens below in such setting.
725 */
726 err = journal_finish_inode_data_buffers(journal, commit_transaction); 733 err = journal_finish_inode_data_buffers(journal, commit_transaction);
727 if (err) { 734 if (err) {
728 printk(KERN_WARNING 735 printk(KERN_WARNING
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index fed85388ee8..ac0d027595d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
78EXPORT_SYMBOL(jbd2_journal_ack_err); 78EXPORT_SYMBOL(jbd2_journal_ack_err);
79EXPORT_SYMBOL(jbd2_journal_clear_err); 79EXPORT_SYMBOL(jbd2_journal_clear_err);
80EXPORT_SYMBOL(jbd2_log_wait_commit); 80EXPORT_SYMBOL(jbd2_log_wait_commit);
81EXPORT_SYMBOL(jbd2_log_start_commit);
81EXPORT_SYMBOL(jbd2_journal_start_commit); 82EXPORT_SYMBOL(jbd2_journal_start_commit);
82EXPORT_SYMBOL(jbd2_journal_force_commit_nested); 83EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
83EXPORT_SYMBOL(jbd2_journal_wipe); 84EXPORT_SYMBOL(jbd2_journal_wipe);
@@ -358,6 +359,10 @@ repeat:
358 359
359 jbd_unlock_bh_state(bh_in); 360 jbd_unlock_bh_state(bh_in);
360 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); 361 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
362 if (!tmp) {
363 jbd2_journal_put_journal_head(new_jh);
364 return -ENOMEM;
365 }
361 jbd_lock_bh_state(bh_in); 366 jbd_lock_bh_state(bh_in);
362 if (jh_in->b_frozen_data) { 367 if (jh_in->b_frozen_data) {
363 jbd2_free(tmp, bh_in->b_size); 368 jbd2_free(tmp, bh_in->b_size);
@@ -809,7 +814,7 @@ static journal_t * journal_init_common (void)
809 journal_t *journal; 814 journal_t *journal;
810 int err; 815 int err;
811 816
812 journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); 817 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
813 if (!journal) 818 if (!journal)
814 goto fail; 819 goto fail;
815 820
@@ -1248,6 +1253,13 @@ int jbd2_journal_load(journal_t *journal)
1248 if (jbd2_journal_recover(journal)) 1253 if (jbd2_journal_recover(journal))
1249 goto recovery_error; 1254 goto recovery_error;
1250 1255
1256 if (journal->j_failed_commit) {
1257 printk(KERN_ERR "JBD2: journal transaction %u on %s "
1258 "is corrupt.\n", journal->j_failed_commit,
1259 journal->j_devname);
1260 return -EIO;
1261 }
1262
1251 /* OK, we've finished with the dynamic journal bits: 1263 /* OK, we've finished with the dynamic journal bits:
1252 * reinitialise the dynamic contents of the superblock in memory 1264 * reinitialise the dynamic contents of the superblock in memory
1253 * and reset them on disk. */ 1265 * and reset them on disk. */
@@ -2103,7 +2115,8 @@ static void __init jbd2_create_debugfs_entry(void)
2103{ 2115{
2104 jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL); 2116 jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
2105 if (jbd2_debugfs_dir) 2117 if (jbd2_debugfs_dir)
2106 jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO, 2118 jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME,
2119 S_IRUGO | S_IWUSR,
2107 jbd2_debugfs_dir, 2120 jbd2_debugfs_dir,
2108 &jbd2_journal_enable_debug); 2121 &jbd2_journal_enable_debug);
2109} 2122}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 7edb62e9741..7cdc3196476 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -350,8 +350,8 @@ int jffs2_acl_chmod(struct inode *inode)
350 return rc; 350 return rc;
351} 351}
352 352
353static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size, 353static size_t jffs2_acl_access_listxattr(struct dentry *dentry, char *list,
354 const char *name, size_t name_len) 354 size_t list_size, const char *name, size_t name_len, int type)
355{ 355{
356 const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS); 356 const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS);
357 357
@@ -360,8 +360,8 @@ static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t
360 return retlen; 360 return retlen;
361} 361}
362 362
363static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size, 363static size_t jffs2_acl_default_listxattr(struct dentry *dentry, char *list,
364 const char *name, size_t name_len) 364 size_t list_size, const char *name, size_t name_len, int type)
365{ 365{
366 const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT); 366 const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT);
367 367
@@ -370,12 +370,16 @@ static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_
370 return retlen; 370 return retlen;
371} 371}
372 372
373static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size) 373static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
374 void *buffer, size_t size, int type)
374{ 375{
375 struct posix_acl *acl; 376 struct posix_acl *acl;
376 int rc; 377 int rc;
377 378
378 acl = jffs2_get_acl(inode, type); 379 if (name[0] != '\0')
380 return -EINVAL;
381
382 acl = jffs2_get_acl(dentry->d_inode, type);
379 if (IS_ERR(acl)) 383 if (IS_ERR(acl))
380 return PTR_ERR(acl); 384 return PTR_ERR(acl);
381 if (!acl) 385 if (!acl)
@@ -386,26 +390,15 @@ static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_
386 return rc; 390 return rc;
387} 391}
388 392
389static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size) 393static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
390{ 394 const void *value, size_t size, int flags, int type)
391 if (name[0] != '\0')
392 return -EINVAL;
393 return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size);
394}
395
396static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
397{
398 if (name[0] != '\0')
399 return -EINVAL;
400 return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
401}
402
403static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size)
404{ 395{
405 struct posix_acl *acl; 396 struct posix_acl *acl;
406 int rc; 397 int rc;
407 398
408 if (!is_owner_or_cap(inode)) 399 if (name[0] != '\0')
400 return -EINVAL;
401 if (!is_owner_or_cap(dentry->d_inode))
409 return -EPERM; 402 return -EPERM;
410 403
411 if (value) { 404 if (value) {
@@ -420,38 +413,24 @@ static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value,
420 } else { 413 } else {
421 acl = NULL; 414 acl = NULL;
422 } 415 }
423 rc = jffs2_set_acl(inode, type, acl); 416 rc = jffs2_set_acl(dentry->d_inode, type, acl);
424 out: 417 out:
425 posix_acl_release(acl); 418 posix_acl_release(acl);
426 return rc; 419 return rc;
427} 420}
428 421
429static int jffs2_acl_access_setxattr(struct inode *inode, const char *name,
430 const void *buffer, size_t size, int flags)
431{
432 if (name[0] != '\0')
433 return -EINVAL;
434 return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size);
435}
436
437static int jffs2_acl_default_setxattr(struct inode *inode, const char *name,
438 const void *buffer, size_t size, int flags)
439{
440 if (name[0] != '\0')
441 return -EINVAL;
442 return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
443}
444
445struct xattr_handler jffs2_acl_access_xattr_handler = { 422struct xattr_handler jffs2_acl_access_xattr_handler = {
446 .prefix = POSIX_ACL_XATTR_ACCESS, 423 .prefix = POSIX_ACL_XATTR_ACCESS,
424 .flags = ACL_TYPE_DEFAULT,
447 .list = jffs2_acl_access_listxattr, 425 .list = jffs2_acl_access_listxattr,
448 .get = jffs2_acl_access_getxattr, 426 .get = jffs2_acl_getxattr,
449 .set = jffs2_acl_access_setxattr, 427 .set = jffs2_acl_setxattr,
450}; 428};
451 429
452struct xattr_handler jffs2_acl_default_xattr_handler = { 430struct xattr_handler jffs2_acl_default_xattr_handler = {
453 .prefix = POSIX_ACL_XATTR_DEFAULT, 431 .prefix = POSIX_ACL_XATTR_DEFAULT,
432 .flags = ACL_TYPE_DEFAULT,
454 .list = jffs2_acl_default_listxattr, 433 .list = jffs2_acl_default_listxattr,
455 .get = jffs2_acl_default_getxattr, 434 .get = jffs2_acl_getxattr,
456 .set = jffs2_acl_default_setxattr, 435 .set = jffs2_acl_setxattr,
457}; 436};
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index f25e70c1b51..f0294410868 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -177,7 +177,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
177 spin_unlock(&jffs2_compressor_list_lock); 177 spin_unlock(&jffs2_compressor_list_lock);
178 break; 178 break;
179 default: 179 default:
180 printk(KERN_ERR "JFFS2: unknow compression mode.\n"); 180 printk(KERN_ERR "JFFS2: unknown compression mode.\n");
181 } 181 }
182 out: 182 out:
183 if (ret == JFFS2_COMPR_NONE) { 183 if (ret == JFFS2_COMPR_NONE) {
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 090c556ffed..3b6f2fa12cf 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -700,7 +700,8 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
700 struct jffs2_raw_inode ri; 700 struct jffs2_raw_inode ri;
701 struct jffs2_node_frag *last_frag; 701 struct jffs2_node_frag *last_frag;
702 union jffs2_device_node dev; 702 union jffs2_device_node dev;
703 char *mdata = NULL, mdatalen = 0; 703 char *mdata = NULL;
704 int mdatalen = 0;
704 uint32_t alloclen, ilen; 705 uint32_t alloclen, ilen;
705 int ret; 706 int ret;
706 707
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1a80301004b..e22de8397b7 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -931,7 +931,7 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re
931 * Helper function for jffs2_get_inode_nodes(). 931 * Helper function for jffs2_get_inode_nodes().
932 * The function detects whether more data should be read and reads it if yes. 932 * The function detects whether more data should be read and reads it if yes.
933 * 933 *
934 * Returns: 0 on succes; 934 * Returns: 0 on success;
935 * negative error code on failure. 935 * negative error code on failure.
936 */ 936 */
937static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, 937static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
@@ -1284,7 +1284,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1284 f->target = NULL; 1284 f->target = NULL;
1285 mutex_unlock(&f->sem); 1285 mutex_unlock(&f->sem);
1286 jffs2_do_clear_inode(c, f); 1286 jffs2_do_clear_inode(c, f);
1287 return -ret; 1287 return ret;
1288 } 1288 }
1289 1289
1290 f->target[je32_to_cpu(latest_node->csize)] = '\0'; 1290 f->target[je32_to_cpu(latest_node->csize)] = '\0';
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 02c39c64ecb..eaccee05858 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -44,26 +44,28 @@ int jffs2_init_security(struct inode *inode, struct inode *dir)
44} 44}
45 45
46/* ---- XATTR Handler for "security.*" ----------------- */ 46/* ---- XATTR Handler for "security.*" ----------------- */
47static int jffs2_security_getxattr(struct inode *inode, const char *name, 47static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
48 void *buffer, size_t size) 48 void *buffer, size_t size, int type)
49{ 49{
50 if (!strcmp(name, "")) 50 if (!strcmp(name, ""))
51 return -EINVAL; 51 return -EINVAL;
52 52
53 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size); 53 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
54 name, buffer, size);
54} 55}
55 56
56static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer, 57static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
57 size_t size, int flags) 58 const void *buffer, size_t size, int flags, int type)
58{ 59{
59 if (!strcmp(name, "")) 60 if (!strcmp(name, ""))
60 return -EINVAL; 61 return -EINVAL;
61 62
62 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags); 63 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
64 name, buffer, size, flags);
63} 65}
64 66
65static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size, 67static size_t jffs2_security_listxattr(struct dentry *dentry, char *list,
66 const char *name, size_t name_len) 68 size_t list_size, const char *name, size_t name_len, int type)
67{ 69{
68 size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1; 70 size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
69 71
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 6caf1e1ee26..800171dca53 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,7 +23,7 @@
23 23
24int jffs2_sum_init(struct jffs2_sb_info *c) 24int jffs2_sum_init(struct jffs2_sb_info *c)
25{ 25{
26 uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE); 26 uint32_t sum_size = min_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
27 27
28 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); 28 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
29 29
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 082e844ab2d..9e75c62c85d 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -31,7 +31,7 @@
31 * is used to release xattr name/value pair and detach from c->xattrindex. 31 * is used to release xattr name/value pair and detach from c->xattrindex.
32 * reclaim_xattr_datum(c) 32 * reclaim_xattr_datum(c)
33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when 33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
34 * memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold 34 * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold
35 * is hard coded as 32KiB. 35 * is hard coded as 32KiB.
36 * do_verify_xattr_datum(c, xd) 36 * do_verify_xattr_datum(c, xd)
37 * is used to load the xdatum informations without name/value pair from the medium. 37 * is used to load the xdatum informations without name/value pair from the medium.
@@ -990,9 +990,11 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
990 if (!xhandle) 990 if (!xhandle)
991 continue; 991 continue;
992 if (buffer) { 992 if (buffer) {
993 rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len); 993 rc = xhandle->list(dentry, buffer+len, size-len,
994 xd->xname, xd->name_len, xd->flags);
994 } else { 995 } else {
995 rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len); 996 rc = xhandle->list(dentry, NULL, 0, xd->xname,
997 xd->name_len, xd->flags);
996 } 998 }
997 if (rc < 0) 999 if (rc < 0)
998 goto out; 1000 goto out;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 8ec5765ef34..3e5a5e356e0 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -16,24 +16,26 @@
16#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
17#include "nodelist.h" 17#include "nodelist.h"
18 18
19static int jffs2_trusted_getxattr(struct inode *inode, const char *name, 19static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
20 void *buffer, size_t size) 20 void *buffer, size_t size, int type)
21{ 21{
22 if (!strcmp(name, "")) 22 if (!strcmp(name, ""))
23 return -EINVAL; 23 return -EINVAL;
24 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size); 24 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
25 name, buffer, size);
25} 26}
26 27
27static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer, 28static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
28 size_t size, int flags) 29 const void *buffer, size_t size, int flags, int type)
29{ 30{
30 if (!strcmp(name, "")) 31 if (!strcmp(name, ""))
31 return -EINVAL; 32 return -EINVAL;
32 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags); 33 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
34 name, buffer, size, flags);
33} 35}
34 36
35static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size, 37static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list,
36 const char *name, size_t name_len) 38 size_t list_size, const char *name, size_t name_len, int type)
37{ 39{
38 size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1; 40 size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
39 41
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 8bbeab90ada..8544af67dff 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -16,24 +16,26 @@
16#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
17#include "nodelist.h" 17#include "nodelist.h"
18 18
19static int jffs2_user_getxattr(struct inode *inode, const char *name, 19static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
20 void *buffer, size_t size) 20 void *buffer, size_t size, int type)
21{ 21{
22 if (!strcmp(name, "")) 22 if (!strcmp(name, ""))
23 return -EINVAL; 23 return -EINVAL;
24 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size); 24 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
25 name, buffer, size);
25} 26}
26 27
27static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer, 28static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
28 size_t size, int flags) 29 const void *buffer, size_t size, int flags, int type)
29{ 30{
30 if (!strcmp(name, "")) 31 if (!strcmp(name, ""))
31 return -EINVAL; 32 return -EINVAL;
32 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags); 33 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
34 name, buffer, size, flags);
33} 35}
34 36
35static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size, 37static size_t jffs2_user_listxattr(struct dentry *dentry, char *list,
36 const char *name, size_t name_len) 38 size_t list_size, const char *name, size_t name_len, int type)
37{ 39{
38 size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1; 40 size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
39 41
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 2bc7d8aa574..d9b031cf69f 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -755,7 +755,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
755 * allocation group. 755 * allocation group.
756 */ 756 */
757 if ((blkno & (bmp->db_agsize - 1)) == 0) 757 if ((blkno & (bmp->db_agsize - 1)) == 0)
758 /* check if the AG is currenly being written to. 758 /* check if the AG is currently being written to.
759 * if so, call dbNextAG() to find a non-busy 759 * if so, call dbNextAG() to find a non-busy
760 * AG with sufficient free space. 760 * AG with sufficient free space.
761 */ 761 */
@@ -3337,7 +3337,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3337 for (i = 0, n = 0; i < agno; n++) { 3337 for (i = 0, n = 0; i < agno; n++) {
3338 bmp->db_agfree[n] = 0; /* init collection point */ 3338 bmp->db_agfree[n] = 0; /* init collection point */
3339 3339
3340 /* coalesce cotiguous k AGs; */ 3340 /* coalesce contiguous k AGs; */
3341 for (j = 0; j < k && i < agno; j++, i++) { 3341 for (j = 0; j < k && i < agno; j++, i++) {
3342 /* merge AGi to AGn */ 3342 /* merge AGi to AGn */
3343 bmp->db_agfree[n] += bmp->db_agfree[i]; 3343 bmp->db_agfree[n] += bmp->db_agfree[i];
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada..d945ea76b44 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1292 */ 1292 */
1293 /* 1293 /*
1294 * I believe this code is no longer needed. Splitting I_LOCK 1294 * I believe this code is no longer needed. Splitting I_LOCK
1295 * into two bits, I_LOCK and I_SYNC should prevent this 1295 * into two bits, I_NEW and I_SYNC should prevent this
1296 * deadlock as well. But since I don't have a JFS testload 1296 * deadlock as well. But since I don't have a JFS testload
1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. 1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1298 * Joern 1298 * Joern
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2234c73fc57..d929a822a74 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -524,7 +524,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
524 * Page cache is indexed by long. 524 * Page cache is indexed by long.
525 * I would use MAX_LFS_FILESIZE, but it's only half as big 525 * I would use MAX_LFS_FILESIZE, but it's only half as big
526 */ 526 */
527 sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes); 527 sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, (u64)sb->s_maxbytes);
528#endif 528#endif
529 sb->s_time_gran = 1; 529 sb->s_time_gran = 1;
530 return 0; 530 return 0;
diff --git a/fs/libfs.c b/fs/libfs.c
index 219576c52d8..6e8d17e1dc4 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -848,7 +848,6 @@ EXPORT_SYMBOL(simple_write_end);
848EXPORT_SYMBOL(simple_dir_inode_operations); 848EXPORT_SYMBOL(simple_dir_inode_operations);
849EXPORT_SYMBOL(simple_dir_operations); 849EXPORT_SYMBOL(simple_dir_operations);
850EXPORT_SYMBOL(simple_empty); 850EXPORT_SYMBOL(simple_empty);
851EXPORT_SYMBOL(d_alloc_name);
852EXPORT_SYMBOL(simple_fill_super); 851EXPORT_SYMBOL(simple_fill_super);
853EXPORT_SYMBOL(simple_getattr); 852EXPORT_SYMBOL(simple_getattr);
854EXPORT_SYMBOL(simple_link); 853EXPORT_SYMBOL(simple_link);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1a54ae14a19..e50cfa3d965 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -371,82 +371,74 @@ EXPORT_SYMBOL_GPL(lockd_down);
371 371
372static ctl_table nlm_sysctls[] = { 372static ctl_table nlm_sysctls[] = {
373 { 373 {
374 .ctl_name = CTL_UNNUMBERED,
375 .procname = "nlm_grace_period", 374 .procname = "nlm_grace_period",
376 .data = &nlm_grace_period, 375 .data = &nlm_grace_period,
377 .maxlen = sizeof(unsigned long), 376 .maxlen = sizeof(unsigned long),
378 .mode = 0644, 377 .mode = 0644,
379 .proc_handler = &proc_doulongvec_minmax, 378 .proc_handler = proc_doulongvec_minmax,
380 .extra1 = (unsigned long *) &nlm_grace_period_min, 379 .extra1 = (unsigned long *) &nlm_grace_period_min,
381 .extra2 = (unsigned long *) &nlm_grace_period_max, 380 .extra2 = (unsigned long *) &nlm_grace_period_max,
382 }, 381 },
383 { 382 {
384 .ctl_name = CTL_UNNUMBERED,
385 .procname = "nlm_timeout", 383 .procname = "nlm_timeout",
386 .data = &nlm_timeout, 384 .data = &nlm_timeout,
387 .maxlen = sizeof(unsigned long), 385 .maxlen = sizeof(unsigned long),
388 .mode = 0644, 386 .mode = 0644,
389 .proc_handler = &proc_doulongvec_minmax, 387 .proc_handler = proc_doulongvec_minmax,
390 .extra1 = (unsigned long *) &nlm_timeout_min, 388 .extra1 = (unsigned long *) &nlm_timeout_min,
391 .extra2 = (unsigned long *) &nlm_timeout_max, 389 .extra2 = (unsigned long *) &nlm_timeout_max,
392 }, 390 },
393 { 391 {
394 .ctl_name = CTL_UNNUMBERED,
395 .procname = "nlm_udpport", 392 .procname = "nlm_udpport",
396 .data = &nlm_udpport, 393 .data = &nlm_udpport,
397 .maxlen = sizeof(int), 394 .maxlen = sizeof(int),
398 .mode = 0644, 395 .mode = 0644,
399 .proc_handler = &proc_dointvec_minmax, 396 .proc_handler = proc_dointvec_minmax,
400 .extra1 = (int *) &nlm_port_min, 397 .extra1 = (int *) &nlm_port_min,
401 .extra2 = (int *) &nlm_port_max, 398 .extra2 = (int *) &nlm_port_max,
402 }, 399 },
403 { 400 {
404 .ctl_name = CTL_UNNUMBERED,
405 .procname = "nlm_tcpport", 401 .procname = "nlm_tcpport",
406 .data = &nlm_tcpport, 402 .data = &nlm_tcpport,
407 .maxlen = sizeof(int), 403 .maxlen = sizeof(int),
408 .mode = 0644, 404 .mode = 0644,
409 .proc_handler = &proc_dointvec_minmax, 405 .proc_handler = proc_dointvec_minmax,
410 .extra1 = (int *) &nlm_port_min, 406 .extra1 = (int *) &nlm_port_min,
411 .extra2 = (int *) &nlm_port_max, 407 .extra2 = (int *) &nlm_port_max,
412 }, 408 },
413 { 409 {
414 .ctl_name = CTL_UNNUMBERED,
415 .procname = "nsm_use_hostnames", 410 .procname = "nsm_use_hostnames",
416 .data = &nsm_use_hostnames, 411 .data = &nsm_use_hostnames,
417 .maxlen = sizeof(int), 412 .maxlen = sizeof(int),
418 .mode = 0644, 413 .mode = 0644,
419 .proc_handler = &proc_dointvec, 414 .proc_handler = proc_dointvec,
420 }, 415 },
421 { 416 {
422 .ctl_name = CTL_UNNUMBERED,
423 .procname = "nsm_local_state", 417 .procname = "nsm_local_state",
424 .data = &nsm_local_state, 418 .data = &nsm_local_state,
425 .maxlen = sizeof(int), 419 .maxlen = sizeof(int),
426 .mode = 0644, 420 .mode = 0644,
427 .proc_handler = &proc_dointvec, 421 .proc_handler = proc_dointvec,
428 }, 422 },
429 { .ctl_name = 0 } 423 { }
430}; 424};
431 425
432static ctl_table nlm_sysctl_dir[] = { 426static ctl_table nlm_sysctl_dir[] = {
433 { 427 {
434 .ctl_name = CTL_UNNUMBERED,
435 .procname = "nfs", 428 .procname = "nfs",
436 .mode = 0555, 429 .mode = 0555,
437 .child = nlm_sysctls, 430 .child = nlm_sysctls,
438 }, 431 },
439 { .ctl_name = 0 } 432 { }
440}; 433};
441 434
442static ctl_table nlm_sysctl_root[] = { 435static ctl_table nlm_sysctl_root[] = {
443 { 436 {
444 .ctl_name = CTL_FS,
445 .procname = "fs", 437 .procname = "fs",
446 .mode = 0555, 438 .mode = 0555,
447 .child = nlm_sysctl_dir, 439 .child = nlm_sysctl_dir,
448 }, 440 },
449 { .ctl_name = 0 } 441 { }
450}; 442};
451 443
452#endif /* CONFIG_SYSCTL */ 444#endif /* CONFIG_SYSCTL */
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index bd173a6ca3b..a7966eed3c1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -11,10 +11,6 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/in.h>
15#include <linux/sunrpc/svc.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/lockd.h> 14#include <linux/lockd/lockd.h>
19#include <linux/lockd/share.h> 15#include <linux/lockd/share.h>
20 16
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index e1d28ddd216..56c9519d900 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -11,10 +11,6 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/in.h>
15#include <linux/sunrpc/svc.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/lockd.h> 14#include <linux/lockd/lockd.h>
19#include <linux/lockd/share.h> 15#include <linux/lockd/share.h>
20 16
diff --git a/fs/namei.c b/fs/namei.c
index d11f404667e..94a5e60779f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,7 +35,7 @@
35#include <linux/fs_struct.h> 35#include <linux/fs_struct.h>
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37 37
38#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 38#include "internal.h"
39 39
40/* [Feb-1997 T. Schoebel-Theuer] 40/* [Feb-1997 T. Schoebel-Theuer]
41 * Fundamental changes in the pathname lookup mechanisms (namei) 41 * Fundamental changes in the pathname lookup mechanisms (namei)
@@ -108,8 +108,6 @@
108 * any extra contention... 108 * any extra contention...
109 */ 109 */
110 110
111static int __link_path_walk(const char *name, struct nameidata *nd);
112
113/* In order to reduce some races, while at the same time doing additional 111/* In order to reduce some races, while at the same time doing additional
114 * checking and hopefully speeding things up, we copy filenames to the 112 * checking and hopefully speeding things up, we copy filenames to the
115 * kernel data space before using them.. 113 * kernel data space before using them..
@@ -234,6 +232,7 @@ int generic_permission(struct inode *inode, int mask,
234 /* 232 /*
235 * Searching includes executable on directories, else just read. 233 * Searching includes executable on directories, else just read.
236 */ 234 */
235 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
237 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 236 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
238 if (capable(CAP_DAC_READ_SEARCH)) 237 if (capable(CAP_DAC_READ_SEARCH))
239 return 0; 238 return 0;
@@ -414,36 +413,55 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
414} 413}
415 414
416/* 415/*
417 * Internal lookup() using the new generic dcache. 416 * force_reval_path - force revalidation of a dentry
418 * SMP-safe 417 *
418 * In some situations the path walking code will trust dentries without
419 * revalidating them. This causes problems for filesystems that depend on
420 * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set
421 * (which indicates that it's possible for the dentry to go stale), force
422 * a d_revalidate call before proceeding.
423 *
424 * Returns 0 if the revalidation was successful. If the revalidation fails,
425 * either return the error returned by d_revalidate or -ESTALE if the
426 * revalidation it just returned 0. If d_revalidate returns 0, we attempt to
427 * invalidate the dentry. It's up to the caller to handle putting references
428 * to the path if necessary.
419 */ 429 */
420static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 430static int
431force_reval_path(struct path *path, struct nameidata *nd)
421{ 432{
422 struct dentry * dentry = __d_lookup(parent, name); 433 int status;
434 struct dentry *dentry = path->dentry;
423 435
424 /* lockess __d_lookup may fail due to concurrent d_move() 436 /*
425 * in some unrelated directory, so try with d_lookup 437 * only check on filesystems where it's possible for the dentry to
438 * become stale. It's assumed that if this flag is set then the
439 * d_revalidate op will also be defined.
426 */ 440 */
427 if (!dentry) 441 if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
428 dentry = d_lookup(parent, name); 442 return 0;
429 443
430 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 444 status = dentry->d_op->d_revalidate(dentry, nd);
431 dentry = do_revalidate(dentry, nd); 445 if (status > 0)
446 return 0;
432 447
433 return dentry; 448 if (!status) {
449 d_invalidate(dentry);
450 status = -ESTALE;
451 }
452 return status;
434} 453}
435 454
436/* 455/*
437 * Short-cut version of permission(), for calling by 456 * Short-cut version of permission(), for calling on directories
438 * path_walk(), when dcache lock is held. Combines parts 457 * during pathname resolution. Combines parts of permission()
439 * of permission() and generic_permission(), and tests ONLY for 458 * and generic_permission(), and tests ONLY for MAY_EXEC permission.
440 * MAY_EXEC permission.
441 * 459 *
442 * If appropriate, check DAC only. If not appropriate, or 460 * If appropriate, check DAC only. If not appropriate, or
443 * short-cut DAC fails, then call permission() to do more 461 * short-cut DAC fails, then call ->permission() to do more
444 * complete permission check. 462 * complete permission check.
445 */ 463 */
446static int exec_permission_lite(struct inode *inode) 464static int exec_permission(struct inode *inode)
447{ 465{
448 int ret; 466 int ret;
449 467
@@ -465,99 +483,6 @@ ok:
465 return security_inode_permission(inode, MAY_EXEC); 483 return security_inode_permission(inode, MAY_EXEC);
466} 484}
467 485
468/*
469 * This is called when everything else fails, and we actually have
470 * to go to the low-level filesystem to find out what we should do..
471 *
472 * We get the directory semaphore, and after getting that we also
473 * make sure that nobody added the entry to the dcache in the meantime..
474 * SMP-safe
475 */
476static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
477{
478 struct dentry * result;
479 struct inode *dir = parent->d_inode;
480
481 mutex_lock(&dir->i_mutex);
482 /*
483 * First re-do the cached lookup just in case it was created
484 * while we waited for the directory semaphore..
485 *
486 * FIXME! This could use version numbering or similar to
487 * avoid unnecessary cache lookups.
488 *
489 * The "dcache_lock" is purely to protect the RCU list walker
490 * from concurrent renames at this point (we mustn't get false
491 * negatives from the RCU list walk here, unlike the optimistic
492 * fast walk).
493 *
494 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
495 */
496 result = d_lookup(parent, name);
497 if (!result) {
498 struct dentry *dentry;
499
500 /* Don't create child dentry for a dead directory. */
501 result = ERR_PTR(-ENOENT);
502 if (IS_DEADDIR(dir))
503 goto out_unlock;
504
505 dentry = d_alloc(parent, name);
506 result = ERR_PTR(-ENOMEM);
507 if (dentry) {
508 result = dir->i_op->lookup(dir, dentry, nd);
509 if (result)
510 dput(dentry);
511 else
512 result = dentry;
513 }
514out_unlock:
515 mutex_unlock(&dir->i_mutex);
516 return result;
517 }
518
519 /*
520 * Uhhuh! Nasty case: the cache was re-populated while
521 * we waited on the semaphore. Need to revalidate.
522 */
523 mutex_unlock(&dir->i_mutex);
524 if (result->d_op && result->d_op->d_revalidate) {
525 result = do_revalidate(result, nd);
526 if (!result)
527 result = ERR_PTR(-ENOENT);
528 }
529 return result;
530}
531
532/*
533 * Wrapper to retry pathname resolution whenever the underlying
534 * file system returns an ESTALE.
535 *
536 * Retry the whole path once, forcing real lookup requests
537 * instead of relying on the dcache.
538 */
539static __always_inline int link_path_walk(const char *name, struct nameidata *nd)
540{
541 struct path save = nd->path;
542 int result;
543
544 /* make sure the stuff we saved doesn't go away */
545 path_get(&save);
546
547 result = __link_path_walk(name, nd);
548 if (result == -ESTALE) {
549 /* nd->path had been dropped */
550 nd->path = save;
551 path_get(&nd->path);
552 nd->flags |= LOOKUP_REVAL;
553 result = __link_path_walk(name, nd);
554 }
555
556 path_put(&save);
557
558 return result;
559}
560
561static __always_inline void set_root(struct nameidata *nd) 486static __always_inline void set_root(struct nameidata *nd)
562{ 487{
563 if (!nd->root.mnt) { 488 if (!nd->root.mnt) {
@@ -569,6 +494,8 @@ static __always_inline void set_root(struct nameidata *nd)
569 } 494 }
570} 495}
571 496
497static int link_path_walk(const char *, struct nameidata *);
498
572static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 499static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
573{ 500{
574 int res = 0; 501 int res = 0;
@@ -634,6 +561,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
634 dget(dentry); 561 dget(dentry);
635 } 562 }
636 mntget(path->mnt); 563 mntget(path->mnt);
564 nd->last_type = LAST_BIND;
637 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 565 cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
638 error = PTR_ERR(cookie); 566 error = PTR_ERR(cookie);
639 if (!IS_ERR(cookie)) { 567 if (!IS_ERR(cookie)) {
@@ -641,11 +569,14 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
641 error = 0; 569 error = 0;
642 if (s) 570 if (s)
643 error = __vfs_follow_link(nd, s); 571 error = __vfs_follow_link(nd, s);
572 else if (nd->last_type == LAST_BIND) {
573 error = force_reval_path(&nd->path, nd);
574 if (error)
575 path_put(&nd->path);
576 }
644 if (dentry->d_inode->i_op->put_link) 577 if (dentry->d_inode->i_op->put_link)
645 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 578 dentry->d_inode->i_op->put_link(dentry, nd, cookie);
646 } 579 }
647 path_put(path);
648
649 return error; 580 return error;
650} 581}
651 582
@@ -672,6 +603,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
672 current->total_link_count++; 603 current->total_link_count++;
673 nd->depth++; 604 nd->depth++;
674 err = __do_follow_link(path, nd); 605 err = __do_follow_link(path, nd);
606 path_put(path);
675 current->link_count--; 607 current->link_count--;
676 nd->depth--; 608 nd->depth--;
677 return err; 609 return err;
@@ -797,8 +729,19 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
797 struct path *path) 729 struct path *path)
798{ 730{
799 struct vfsmount *mnt = nd->path.mnt; 731 struct vfsmount *mnt = nd->path.mnt;
800 struct dentry *dentry = __d_lookup(nd->path.dentry, name); 732 struct dentry *dentry, *parent;
733 struct inode *dir;
734 /*
735 * See if the low-level filesystem might want
736 * to use its own hash..
737 */
738 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
739 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);
740 if (err < 0)
741 return err;
742 }
801 743
744 dentry = __d_lookup(nd->path.dentry, name);
802 if (!dentry) 745 if (!dentry)
803 goto need_lookup; 746 goto need_lookup;
804 if (dentry->d_op && dentry->d_op->d_revalidate) 747 if (dentry->d_op && dentry->d_op->d_revalidate)
@@ -810,7 +753,59 @@ done:
810 return 0; 753 return 0;
811 754
812need_lookup: 755need_lookup:
813 dentry = real_lookup(nd->path.dentry, name, nd); 756 parent = nd->path.dentry;
757 dir = parent->d_inode;
758
759 mutex_lock(&dir->i_mutex);
760 /*
761 * First re-do the cached lookup just in case it was created
762 * while we waited for the directory semaphore..
763 *
764 * FIXME! This could use version numbering or similar to
765 * avoid unnecessary cache lookups.
766 *
767 * The "dcache_lock" is purely to protect the RCU list walker
768 * from concurrent renames at this point (we mustn't get false
769 * negatives from the RCU list walk here, unlike the optimistic
770 * fast walk).
771 *
772 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
773 */
774 dentry = d_lookup(parent, name);
775 if (!dentry) {
776 struct dentry *new;
777
778 /* Don't create child dentry for a dead directory. */
779 dentry = ERR_PTR(-ENOENT);
780 if (IS_DEADDIR(dir))
781 goto out_unlock;
782
783 new = d_alloc(parent, name);
784 dentry = ERR_PTR(-ENOMEM);
785 if (new) {
786 dentry = dir->i_op->lookup(dir, new, nd);
787 if (dentry)
788 dput(new);
789 else
790 dentry = new;
791 }
792out_unlock:
793 mutex_unlock(&dir->i_mutex);
794 if (IS_ERR(dentry))
795 goto fail;
796 goto done;
797 }
798
799 /*
800 * Uhhuh! Nasty case: the cache was re-populated while
801 * we waited on the semaphore. Need to revalidate.
802 */
803 mutex_unlock(&dir->i_mutex);
804 if (dentry->d_op && dentry->d_op->d_revalidate) {
805 dentry = do_revalidate(dentry, nd);
806 if (!dentry)
807 dentry = ERR_PTR(-ENOENT);
808 }
814 if (IS_ERR(dentry)) 809 if (IS_ERR(dentry))
815 goto fail; 810 goto fail;
816 goto done; 811 goto done;
@@ -835,7 +830,7 @@ fail:
835 * Returns 0 and nd will have valid dentry and mnt on success. 830 * Returns 0 and nd will have valid dentry and mnt on success.
836 * Returns error and drops reference to input namei data on failure. 831 * Returns error and drops reference to input namei data on failure.
837 */ 832 */
838static int __link_path_walk(const char *name, struct nameidata *nd) 833static int link_path_walk(const char *name, struct nameidata *nd)
839{ 834{
840 struct path next; 835 struct path next;
841 struct inode *inode; 836 struct inode *inode;
@@ -858,7 +853,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
858 unsigned int c; 853 unsigned int c;
859 854
860 nd->flags |= LOOKUP_CONTINUE; 855 nd->flags |= LOOKUP_CONTINUE;
861 err = exec_permission_lite(inode); 856 err = exec_permission(inode);
862 if (err) 857 if (err)
863 break; 858 break;
864 859
@@ -898,16 +893,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
898 case 1: 893 case 1:
899 continue; 894 continue;
900 } 895 }
901 /*
902 * See if the low-level filesystem might want
903 * to use its own hash..
904 */
905 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
906 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
907 &this);
908 if (err < 0)
909 break;
910 }
911 /* This does the actual lookups.. */ 896 /* This does the actual lookups.. */
912 err = do_lookup(nd, &this, &next); 897 err = do_lookup(nd, &this, &next);
913 if (err) 898 if (err)
@@ -953,12 +938,6 @@ last_component:
953 case 1: 938 case 1:
954 goto return_reval; 939 goto return_reval;
955 } 940 }
956 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
957 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
958 &this);
959 if (err < 0)
960 break;
961 }
962 err = do_lookup(nd, &this, &next); 941 err = do_lookup(nd, &this, &next);
963 if (err) 942 if (err)
964 break; 943 break;
@@ -1017,8 +996,27 @@ return_err:
1017 996
1018static int path_walk(const char *name, struct nameidata *nd) 997static int path_walk(const char *name, struct nameidata *nd)
1019{ 998{
999 struct path save = nd->path;
1000 int result;
1001
1020 current->total_link_count = 0; 1002 current->total_link_count = 0;
1021 return link_path_walk(name, nd); 1003
1004 /* make sure the stuff we saved doesn't go away */
1005 path_get(&save);
1006
1007 result = link_path_walk(name, nd);
1008 if (result == -ESTALE) {
1009 /* nd->path had been dropped */
1010 current->total_link_count = 0;
1011 nd->path = save;
1012 path_get(&nd->path);
1013 nd->flags |= LOOKUP_REVAL;
1014 result = link_path_walk(name, nd);
1015 }
1016
1017 path_put(&save);
1018
1019 return result;
1022} 1020}
1023 1021
1024static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1022static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
@@ -1141,36 +1139,6 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1141 return retval; 1139 return retval;
1142} 1140}
1143 1141
1144/**
1145 * path_lookup_open - lookup a file path with open intent
1146 * @dfd: the directory to use as base, or AT_FDCWD
1147 * @name: pointer to file name
1148 * @lookup_flags: lookup intent flags
1149 * @nd: pointer to nameidata
1150 * @open_flags: open intent flags
1151 */
1152static int path_lookup_open(int dfd, const char *name,
1153 unsigned int lookup_flags, struct nameidata *nd, int open_flags)
1154{
1155 struct file *filp = get_empty_filp();
1156 int err;
1157
1158 if (filp == NULL)
1159 return -ENFILE;
1160 nd->intent.open.file = filp;
1161 nd->intent.open.flags = open_flags;
1162 nd->intent.open.create_mode = 0;
1163 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
1164 if (IS_ERR(nd->intent.open.file)) {
1165 if (err == 0) {
1166 err = PTR_ERR(nd->intent.open.file);
1167 path_put(&nd->path);
1168 }
1169 } else if (err != 0)
1170 release_open_intent(nd);
1171 return err;
1172}
1173
1174static struct dentry *__lookup_hash(struct qstr *name, 1142static struct dentry *__lookup_hash(struct qstr *name,
1175 struct dentry *base, struct nameidata *nd) 1143 struct dentry *base, struct nameidata *nd)
1176{ 1144{
@@ -1191,7 +1159,17 @@ static struct dentry *__lookup_hash(struct qstr *name,
1191 goto out; 1159 goto out;
1192 } 1160 }
1193 1161
1194 dentry = cached_lookup(base, name, nd); 1162 dentry = __d_lookup(base, name);
1163
1164 /* lockess __d_lookup may fail due to concurrent d_move()
1165 * in some unrelated directory, so try with d_lookup
1166 */
1167 if (!dentry)
1168 dentry = d_lookup(base, name);
1169
1170 if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
1171 dentry = do_revalidate(dentry, nd);
1172
1195 if (!dentry) { 1173 if (!dentry) {
1196 struct dentry *new; 1174 struct dentry *new;
1197 1175
@@ -1223,7 +1201,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1223{ 1201{
1224 int err; 1202 int err;
1225 1203
1226 err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC); 1204 err = exec_permission(nd->path.dentry->d_inode);
1227 if (err) 1205 if (err)
1228 return ERR_PTR(err); 1206 return ERR_PTR(err);
1229 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1207 return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1273,29 +1251,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1273 if (err) 1251 if (err)
1274 return ERR_PTR(err); 1252 return ERR_PTR(err);
1275 1253
1276 err = inode_permission(base->d_inode, MAY_EXEC); 1254 err = exec_permission(base->d_inode);
1277 if (err)
1278 return ERR_PTR(err);
1279 return __lookup_hash(&this, base, NULL);
1280}
1281
1282/**
1283 * lookup_one_noperm - bad hack for sysfs
1284 * @name: pathname component to lookup
1285 * @base: base directory to lookup from
1286 *
1287 * This is a variant of lookup_one_len that doesn't perform any permission
1288 * checks. It's a horrible hack to work around the braindead sysfs
1289 * architecture and should not be used anywhere else.
1290 *
1291 * DON'T USE THIS FUNCTION EVER, thanks.
1292 */
1293struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
1294{
1295 int err;
1296 struct qstr this;
1297
1298 err = __lookup_one_len(name, &this, base, strlen(name));
1299 if (err) 1255 if (err)
1300 return ERR_PTR(err); 1256 return ERR_PTR(err);
1301 return __lookup_hash(&this, base, NULL); 1257 return __lookup_hash(&this, base, NULL);
@@ -1533,69 +1489,45 @@ int may_open(struct path *path, int acc_mode, int flag)
1533 if (error) 1489 if (error)
1534 return error; 1490 return error;
1535 1491
1536 error = ima_path_check(path, acc_mode ?
1537 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1538 ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
1539 IMA_COUNT_UPDATE);
1540
1541 if (error)
1542 return error;
1543 /* 1492 /*
1544 * An append-only file must be opened in append mode for writing. 1493 * An append-only file must be opened in append mode for writing.
1545 */ 1494 */
1546 if (IS_APPEND(inode)) { 1495 if (IS_APPEND(inode)) {
1547 error = -EPERM;
1548 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1496 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1549 goto err_out; 1497 return -EPERM;
1550 if (flag & O_TRUNC) 1498 if (flag & O_TRUNC)
1551 goto err_out; 1499 return -EPERM;
1552 } 1500 }
1553 1501
1554 /* O_NOATIME can only be set by the owner or superuser */ 1502 /* O_NOATIME can only be set by the owner or superuser */
1555 if (flag & O_NOATIME) 1503 if (flag & O_NOATIME && !is_owner_or_cap(inode))
1556 if (!is_owner_or_cap(inode)) { 1504 return -EPERM;
1557 error = -EPERM;
1558 goto err_out;
1559 }
1560 1505
1561 /* 1506 /*
1562 * Ensure there are no outstanding leases on the file. 1507 * Ensure there are no outstanding leases on the file.
1563 */ 1508 */
1564 error = break_lease(inode, flag); 1509 return break_lease(inode, flag);
1565 if (error) 1510}
1566 goto err_out;
1567
1568 if (flag & O_TRUNC) {
1569 error = get_write_access(inode);
1570 if (error)
1571 goto err_out;
1572
1573 /*
1574 * Refuse to truncate files with mandatory locks held on them.
1575 */
1576 error = locks_verify_locked(inode);
1577 if (!error)
1578 error = security_path_truncate(path, 0,
1579 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1580 if (!error) {
1581 vfs_dq_init(inode);
1582
1583 error = do_truncate(dentry, 0,
1584 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1585 NULL);
1586 }
1587 put_write_access(inode);
1588 if (error)
1589 goto err_out;
1590 } else
1591 if (flag & FMODE_WRITE)
1592 vfs_dq_init(inode);
1593 1511
1594 return 0; 1512static int handle_truncate(struct path *path)
1595err_out: 1513{
1596 ima_counts_put(path, acc_mode ? 1514 struct inode *inode = path->dentry->d_inode;
1597 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : 1515 int error = get_write_access(inode);
1598 ACC_MODE(flag) & (MAY_READ | MAY_WRITE)); 1516 if (error)
1517 return error;
1518 /*
1519 * Refuse to truncate files with mandatory locks held on them.
1520 */
1521 error = locks_verify_locked(inode);
1522 if (!error)
1523 error = security_path_truncate(path, 0,
1524 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1525 if (!error) {
1526 error = do_truncate(path->dentry, 0,
1527 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1528 NULL);
1529 }
1530 put_write_access(inode);
1599 return error; 1531 return error;
1600} 1532}
1601 1533
@@ -1650,7 +1582,7 @@ static inline int open_to_namei_flags(int flag)
1650 return flag; 1582 return flag;
1651} 1583}
1652 1584
1653static int open_will_write_to_fs(int flag, struct inode *inode) 1585static int open_will_truncate(int flag, struct inode *inode)
1654{ 1586{
1655 /* 1587 /*
1656 * We'll never write to the fs underlying 1588 * We'll never write to the fs underlying
@@ -1675,11 +1607,21 @@ struct file *do_filp_open(int dfd, const char *pathname,
1675 struct path path; 1607 struct path path;
1676 struct dentry *dir; 1608 struct dentry *dir;
1677 int count = 0; 1609 int count = 0;
1678 int will_write; 1610 int will_truncate;
1679 int flag = open_to_namei_flags(open_flag); 1611 int flag = open_to_namei_flags(open_flag);
1612 int force_reval = 0;
1613
1614 /*
1615 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1616 * check for O_DSYNC if the need any syncing at all we enforce it's
1617 * always set instead of having to deal with possibly weird behaviour
1618 * for malicious applications setting only __O_SYNC.
1619 */
1620 if (open_flag & __O_SYNC)
1621 open_flag |= O_DSYNC;
1680 1622
1681 if (!acc_mode) 1623 if (!acc_mode)
1682 acc_mode = MAY_OPEN | ACC_MODE(flag); 1624 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1683 1625
1684 /* O_TRUNC implies we need access checks for write permissions */ 1626 /* O_TRUNC implies we need access checks for write permissions */
1685 if (flag & O_TRUNC) 1627 if (flag & O_TRUNC)
@@ -1694,8 +1636,23 @@ struct file *do_filp_open(int dfd, const char *pathname,
1694 * The simplest case - just a plain lookup. 1636 * The simplest case - just a plain lookup.
1695 */ 1637 */
1696 if (!(flag & O_CREAT)) { 1638 if (!(flag & O_CREAT)) {
1697 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1639 filp = get_empty_filp();
1698 &nd, flag); 1640
1641 if (filp == NULL)
1642 return ERR_PTR(-ENFILE);
1643 nd.intent.open.file = filp;
1644 filp->f_flags = open_flag;
1645 nd.intent.open.flags = flag;
1646 nd.intent.open.create_mode = 0;
1647 error = do_path_lookup(dfd, pathname,
1648 lookup_flags(flag)|LOOKUP_OPEN, &nd);
1649 if (IS_ERR(nd.intent.open.file)) {
1650 if (error == 0) {
1651 error = PTR_ERR(nd.intent.open.file);
1652 path_put(&nd.path);
1653 }
1654 } else if (error)
1655 release_open_intent(&nd);
1699 if (error) 1656 if (error)
1700 return ERR_PTR(error); 1657 return ERR_PTR(error);
1701 goto ok; 1658 goto ok;
@@ -1704,9 +1661,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1704 /* 1661 /*
1705 * Create - we need to know the parent. 1662 * Create - we need to know the parent.
1706 */ 1663 */
1664reval:
1707 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1665 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1708 if (error) 1666 if (error)
1709 return ERR_PTR(error); 1667 return ERR_PTR(error);
1668 if (force_reval)
1669 nd.flags |= LOOKUP_REVAL;
1710 error = path_walk(pathname, &nd); 1670 error = path_walk(pathname, &nd);
1711 if (error) { 1671 if (error) {
1712 if (nd.root.mnt) 1672 if (nd.root.mnt)
@@ -1730,6 +1690,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1730 if (filp == NULL) 1690 if (filp == NULL)
1731 goto exit_parent; 1691 goto exit_parent;
1732 nd.intent.open.file = filp; 1692 nd.intent.open.file = filp;
1693 filp->f_flags = open_flag;
1733 nd.intent.open.flags = flag; 1694 nd.intent.open.flags = flag;
1734 nd.intent.open.create_mode = mode; 1695 nd.intent.open.create_mode = mode;
1735 dir = nd.path.dentry; 1696 dir = nd.path.dentry;
@@ -1770,14 +1731,18 @@ do_last:
1770 mnt_drop_write(nd.path.mnt); 1731 mnt_drop_write(nd.path.mnt);
1771 goto exit; 1732 goto exit;
1772 } 1733 }
1773 filp = nameidata_to_filp(&nd, open_flag); 1734 filp = nameidata_to_filp(&nd);
1774 if (IS_ERR(filp))
1775 ima_counts_put(&nd.path,
1776 acc_mode & (MAY_READ | MAY_WRITE |
1777 MAY_EXEC));
1778 mnt_drop_write(nd.path.mnt); 1735 mnt_drop_write(nd.path.mnt);
1779 if (nd.root.mnt) 1736 if (nd.root.mnt)
1780 path_put(&nd.root); 1737 path_put(&nd.root);
1738 if (!IS_ERR(filp)) {
1739 error = ima_path_check(&filp->f_path, filp->f_mode &
1740 (MAY_READ | MAY_WRITE | MAY_EXEC));
1741 if (error) {
1742 fput(filp);
1743 filp = ERR_PTR(error);
1744 }
1745 }
1781 return filp; 1746 return filp;
1782 } 1747 }
1783 1748
@@ -1805,7 +1770,7 @@ do_last:
1805 1770
1806 path_to_nameidata(&path, &nd); 1771 path_to_nameidata(&path, &nd);
1807 error = -EISDIR; 1772 error = -EISDIR;
1808 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1773 if (S_ISDIR(path.dentry->d_inode->i_mode))
1809 goto exit; 1774 goto exit;
1810ok: 1775ok:
1811 /* 1776 /*
@@ -1818,28 +1783,45 @@ ok:
1818 * be avoided. Taking this mnt write here 1783 * be avoided. Taking this mnt write here
1819 * ensures that (2) can not occur. 1784 * ensures that (2) can not occur.
1820 */ 1785 */
1821 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); 1786 will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
1822 if (will_write) { 1787 if (will_truncate) {
1823 error = mnt_want_write(nd.path.mnt); 1788 error = mnt_want_write(nd.path.mnt);
1824 if (error) 1789 if (error)
1825 goto exit; 1790 goto exit;
1826 } 1791 }
1827 error = may_open(&nd.path, acc_mode, flag); 1792 error = may_open(&nd.path, acc_mode, flag);
1828 if (error) { 1793 if (error) {
1829 if (will_write) 1794 if (will_truncate)
1830 mnt_drop_write(nd.path.mnt); 1795 mnt_drop_write(nd.path.mnt);
1831 goto exit; 1796 goto exit;
1832 } 1797 }
1833 filp = nameidata_to_filp(&nd, open_flag); 1798 filp = nameidata_to_filp(&nd);
1834 if (IS_ERR(filp)) 1799 if (!IS_ERR(filp)) {
1835 ima_counts_put(&nd.path, 1800 error = ima_path_check(&filp->f_path, filp->f_mode &
1836 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); 1801 (MAY_READ | MAY_WRITE | MAY_EXEC));
1802 if (error) {
1803 fput(filp);
1804 filp = ERR_PTR(error);
1805 }
1806 }
1807 if (!IS_ERR(filp)) {
1808 if (acc_mode & MAY_WRITE)
1809 vfs_dq_init(nd.path.dentry->d_inode);
1810
1811 if (will_truncate) {
1812 error = handle_truncate(&nd.path);
1813 if (error) {
1814 fput(filp);
1815 filp = ERR_PTR(error);
1816 }
1817 }
1818 }
1837 /* 1819 /*
1838 * It is now safe to drop the mnt write 1820 * It is now safe to drop the mnt write
1839 * because the filp has had a write taken 1821 * because the filp has had a write taken
1840 * on its behalf. 1822 * on its behalf.
1841 */ 1823 */
1842 if (will_write) 1824 if (will_truncate)
1843 mnt_drop_write(nd.path.mnt); 1825 mnt_drop_write(nd.path.mnt);
1844 if (nd.root.mnt) 1826 if (nd.root.mnt)
1845 path_put(&nd.root); 1827 path_put(&nd.root);
@@ -1877,6 +1859,7 @@ do_link:
1877 if (error) 1859 if (error)
1878 goto exit_dput; 1860 goto exit_dput;
1879 error = __do_follow_link(&path, &nd); 1861 error = __do_follow_link(&path, &nd);
1862 path_put(&path);
1880 if (error) { 1863 if (error) {
1881 /* Does someone understand code flow here? Or it is only 1864 /* Does someone understand code flow here? Or it is only
1882 * me so stupid? Anathema to whoever designed this non-sense 1865 * me so stupid? Anathema to whoever designed this non-sense
@@ -1885,6 +1868,10 @@ do_link:
1885 release_open_intent(&nd); 1868 release_open_intent(&nd);
1886 if (nd.root.mnt) 1869 if (nd.root.mnt)
1887 path_put(&nd.root); 1870 path_put(&nd.root);
1871 if (error == -ESTALE && !force_reval) {
1872 force_reval = 1;
1873 goto reval;
1874 }
1888 return ERR_PTR(error); 1875 return ERR_PTR(error);
1889 } 1876 }
1890 nd.flags &= ~LOOKUP_PARENT; 1877 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index bdc3cb4fd22..c768f733c8d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -965,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
965int may_umount(struct vfsmount *mnt) 965int may_umount(struct vfsmount *mnt)
966{ 966{
967 int ret = 1; 967 int ret = 1;
968 down_read(&namespace_sem);
968 spin_lock(&vfsmount_lock); 969 spin_lock(&vfsmount_lock);
969 if (propagate_mount_busy(mnt, 2)) 970 if (propagate_mount_busy(mnt, 2))
970 ret = 0; 971 ret = 0;
971 spin_unlock(&vfsmount_lock); 972 spin_unlock(&vfsmount_lock);
973 up_read(&namespace_sem);
972 return ret; 974 return ret;
973} 975}
974 976
@@ -1352,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1352 if (err) 1354 if (err)
1353 goto out_cleanup_ids; 1355 goto out_cleanup_ids;
1354 1356
1357 spin_lock(&vfsmount_lock);
1358
1355 if (IS_MNT_SHARED(dest_mnt)) { 1359 if (IS_MNT_SHARED(dest_mnt)) {
1356 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1360 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1357 set_mnt_shared(p); 1361 set_mnt_shared(p);
1358 } 1362 }
1359
1360 spin_lock(&vfsmount_lock);
1361 if (parent_path) { 1363 if (parent_path) {
1362 detach_mnt(source_mnt, parent_path); 1364 detach_mnt(source_mnt, parent_path);
1363 attach_mnt(source_mnt, path); 1365 attach_mnt(source_mnt, path);
@@ -1534,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1534 err = change_mount_flags(path->mnt, flags); 1536 err = change_mount_flags(path->mnt, flags);
1535 else 1537 else
1536 err = do_remount_sb(sb, flags, data, 0); 1538 err = do_remount_sb(sb, flags, data, 0);
1537 if (!err) 1539 if (!err) {
1540 spin_lock(&vfsmount_lock);
1541 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
1538 path->mnt->mnt_flags = mnt_flags; 1542 path->mnt->mnt_flags = mnt_flags;
1543 spin_unlock(&vfsmount_lock);
1544 }
1539 up_write(&sb->s_umount); 1545 up_write(&sb->s_umount);
1540 if (!err) { 1546 if (!err) {
1541 security_sb_post_remount(path->mnt, flags, data); 1547 security_sb_post_remount(path->mnt, flags, data);
@@ -1665,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1665{ 1671{
1666 int err; 1672 int err;
1667 1673
1674 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
1675
1668 down_write(&namespace_sem); 1676 down_write(&namespace_sem);
1669 /* Something was mounted here while we slept */ 1677 /* Something was mounted here while we slept */
1670 while (d_mountpoint(path->dentry) && 1678 while (d_mountpoint(path->dentry) &&
@@ -1921,6 +1929,16 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1921 if (data_page) 1929 if (data_page)
1922 ((char *)data_page)[PAGE_SIZE - 1] = 0; 1930 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1923 1931
1932 /* ... and get the mountpoint */
1933 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1934 if (retval)
1935 return retval;
1936
1937 retval = security_sb_mount(dev_name, &path,
1938 type_page, flags, data_page);
1939 if (retval)
1940 goto dput_out;
1941
1924 /* Default to relatime unless overriden */ 1942 /* Default to relatime unless overriden */
1925 if (!(flags & MS_NOATIME)) 1943 if (!(flags & MS_NOATIME))
1926 mnt_flags |= MNT_RELATIME; 1944 mnt_flags |= MNT_RELATIME;
@@ -1945,16 +1963,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1945 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | 1963 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
1946 MS_STRICTATIME); 1964 MS_STRICTATIME);
1947 1965
1948 /* ... and get the mountpoint */
1949 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1950 if (retval)
1951 return retval;
1952
1953 retval = security_sb_mount(dev_name, &path,
1954 type_page, flags, data_page);
1955 if (retval)
1956 goto dput_out;
1957
1958 if (flags & MS_REMOUNT) 1966 if (flags & MS_REMOUNT)
1959 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, 1967 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
1960 data_page); 1968 data_page);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 0d58caf4a6e..ec8f45f12e0 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -835,7 +835,7 @@ static int ncp_ioctl_need_write(unsigned int cmd)
835 case NCP_IOC_SETROOT: 835 case NCP_IOC_SETROOT:
836 return 0; 836 return 0;
837 default: 837 default:
838 /* unkown IOCTL command, assume write */ 838 /* unknown IOCTL command, assume write */
839 return 1; 839 return 1;
840 } 840 }
841} 841}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a77bc25d5a..59e5673b459 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -90,7 +90,7 @@ config ROOT_NFS
90 If you want your system to mount its root file system via NFS, 90 If you want your system to mount its root file system via NFS,
91 choose Y here. This is common practice for managing systems 91 choose Y here. This is common practice for managing systems
92 without local permanent storage. For details, read 92 without local permanent storage. For details, read
93 <file:Documentation/filesystems/nfsroot.txt>. 93 <file:Documentation/filesystems/nfs/nfsroot.txt>.
94 94
95 Most people say N here. 95 Most people say N here.
96 96
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 293fa0528a6..73ab220354d 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -78,11 +78,6 @@ nfs4_callback_svc(void *vrqstp)
78 78
79 set_freezable(); 79 set_freezable();
80 80
81 /*
82 * FIXME: do we really need to run this under the BKL? If so, please
83 * add a comment about what it's intended to protect.
84 */
85 lock_kernel();
86 while (!kthread_should_stop()) { 81 while (!kthread_should_stop()) {
87 /* 82 /*
88 * Listen for a request on the socket 83 * Listen for a request on the socket
@@ -104,7 +99,6 @@ nfs4_callback_svc(void *vrqstp)
104 preverr = err; 99 preverr = err;
105 svc_process(rqstp); 100 svc_process(rqstp);
106 } 101 }
107 unlock_kernel();
108 return 0; 102 return 0;
109} 103}
110 104
@@ -160,11 +154,6 @@ nfs41_callback_svc(void *vrqstp)
160 154
161 set_freezable(); 155 set_freezable();
162 156
163 /*
164 * FIXME: do we really need to run this under the BKL? If so, please
165 * add a comment about what it's intended to protect.
166 */
167 lock_kernel();
168 while (!kthread_should_stop()) { 157 while (!kthread_should_stop()) {
169 prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); 158 prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
170 spin_lock_bh(&serv->sv_cb_lock); 159 spin_lock_bh(&serv->sv_cb_lock);
@@ -183,7 +172,6 @@ nfs41_callback_svc(void *vrqstp)
183 } 172 }
184 finish_wait(&serv->sv_cb_waitq, &wq); 173 finish_wait(&serv->sv_cb_waitq, &wq);
185 } 174 }
186 unlock_kernel();
187 return 0; 175 return 0;
188} 176}
189 177
@@ -397,6 +385,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
397 */ 385 */
398static struct svc_version *nfs4_callback_version[] = { 386static struct svc_version *nfs4_callback_version[] = {
399 [1] = &nfs4_callback_version1, 387 [1] = &nfs4_callback_version1,
388 [4] = &nfs4_callback_version4,
400}; 389};
401 390
402static struct svc_stat nfs4_callback_stats; 391static struct svc_stat nfs4_callback_stats;
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 07baa8254ca..d4036be0b58 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -106,6 +106,19 @@ struct cb_sequenceres {
106extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, 106extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
107 struct cb_sequenceres *res); 107 struct cb_sequenceres *res);
108 108
109extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
110 const nfs4_stateid *stateid);
111
112#define RCA4_TYPE_MASK_RDATA_DLG 0
113#define RCA4_TYPE_MASK_WDATA_DLG 1
114
115struct cb_recallanyargs {
116 struct sockaddr *craa_addr;
117 uint32_t craa_objs_to_keep;
118 uint32_t craa_type_mask;
119};
120
121extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
109#endif /* CONFIG_NFS_V4_1 */ 122#endif /* CONFIG_NFS_V4_1 */
110 123
111extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); 124extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
@@ -114,8 +127,9 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
114#ifdef CONFIG_NFS_V4 127#ifdef CONFIG_NFS_V4
115extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); 128extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
116extern void nfs_callback_down(int minorversion); 129extern void nfs_callback_down(int minorversion);
130extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
131 const nfs4_stateid *stateid);
117#endif /* CONFIG_NFS_V4 */ 132#endif /* CONFIG_NFS_V4 */
118
119/* 133/*
120 * nfs41: Callbacks are expected to not cause substantial latency, 134 * nfs41: Callbacks are expected to not cause substantial latency,
121 * so we limit their concurrency to 1 by setting up the maximum number 135 * so we limit their concurrency to 1 by setting up the maximum number
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index b7da1f54da6..defa9b4c470 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -61,6 +61,16 @@ out:
61 return res->status; 61 return res->status;
62} 62}
63 63
64static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
65{
66#if defined(CONFIG_NFS_V4_1)
67 if (clp->cl_minorversion > 0)
68 return nfs41_validate_delegation_stateid;
69#endif
70 return nfs4_validate_delegation_stateid;
71}
72
73
64__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) 74__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
65{ 75{
66 struct nfs_client *clp; 76 struct nfs_client *clp;
@@ -81,7 +91,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
81 inode = nfs_delegation_find_inode(clp, &args->fh); 91 inode = nfs_delegation_find_inode(clp, &args->fh);
82 if (inode != NULL) { 92 if (inode != NULL) {
83 /* Set up a helper thread to actually return the delegation */ 93 /* Set up a helper thread to actually return the delegation */
84 switch(nfs_async_inode_return_delegation(inode, &args->stateid)) { 94 switch (nfs_async_inode_return_delegation(inode, &args->stateid,
95 nfs_validate_delegation_stateid(clp))) {
85 case 0: 96 case 0:
86 res = 0; 97 res = 0;
87 break; 98 break;
@@ -102,8 +113,31 @@ out:
102 return res; 113 return res;
103} 114}
104 115
116int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
117{
118 if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
119 sizeof(delegation->stateid.data)) != 0)
120 return 0;
121 return 1;
122}
123
105#if defined(CONFIG_NFS_V4_1) 124#if defined(CONFIG_NFS_V4_1)
106 125
126int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
127{
128 if (delegation == NULL)
129 return 0;
130
131 /* seqid is 4-bytes long */
132 if (((u32 *) &stateid->data)[0] != 0)
133 return 0;
134 if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
135 sizeof(stateid->data)-4))
136 return 0;
137
138 return 1;
139}
140
107/* 141/*
108 * Validate the sequenceID sent by the server. 142 * Validate the sequenceID sent by the server.
109 * Return success if the sequenceID is one more than what we last saw on 143 * Return success if the sequenceID is one more than what we last saw on
@@ -227,4 +261,32 @@ out:
227 return res->csr_status; 261 return res->csr_status;
228} 262}
229 263
264unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
265{
266 struct nfs_client *clp;
267 int status;
268 fmode_t flags = 0;
269
270 status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
271 clp = nfs_find_client(args->craa_addr, 4);
272 if (clp == NULL)
273 goto out;
274
275 dprintk("NFS: RECALL_ANY callback request from %s\n",
276 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
277
278 if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
279 &args->craa_type_mask))
280 flags = FMODE_READ;
281 if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
282 &args->craa_type_mask))
283 flags |= FMODE_WRITE;
284
285 if (flags)
286 nfs_expire_all_delegation_types(clp, flags);
287 status = htonl(NFS4_OK);
288out:
289 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
290 return status;
291}
230#endif /* CONFIG_NFS_V4_1 */ 292#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 76b0aa0f73b..8e1a2511c8b 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -23,6 +23,7 @@
23#if defined(CONFIG_NFS_V4_1) 23#if defined(CONFIG_NFS_V4_1)
24#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 24#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
25 4 + 1 + 3) 25 4 + 1 + 3)
26#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
26#endif /* CONFIG_NFS_V4_1 */ 27#endif /* CONFIG_NFS_V4_1 */
27 28
28#define NFSDBG_FACILITY NFSDBG_CALLBACK 29#define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -326,6 +327,25 @@ out_free:
326 goto out; 327 goto out;
327} 328}
328 329
330static unsigned decode_recallany_args(struct svc_rqst *rqstp,
331 struct xdr_stream *xdr,
332 struct cb_recallanyargs *args)
333{
334 uint32_t *p;
335
336 args->craa_addr = svc_addr(rqstp);
337 p = read_buf(xdr, 4);
338 if (unlikely(p == NULL))
339 return htonl(NFS4ERR_BADXDR);
340 args->craa_objs_to_keep = ntohl(*p++);
341 p = read_buf(xdr, 4);
342 if (unlikely(p == NULL))
343 return htonl(NFS4ERR_BADXDR);
344 args->craa_type_mask = ntohl(*p);
345
346 return 0;
347}
348
329#endif /* CONFIG_NFS_V4_1 */ 349#endif /* CONFIG_NFS_V4_1 */
330 350
331static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 351static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
@@ -533,6 +553,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
533 case OP_CB_GETATTR: 553 case OP_CB_GETATTR:
534 case OP_CB_RECALL: 554 case OP_CB_RECALL:
535 case OP_CB_SEQUENCE: 555 case OP_CB_SEQUENCE:
556 case OP_CB_RECALL_ANY:
536 *op = &callback_ops[op_nr]; 557 *op = &callback_ops[op_nr];
537 break; 558 break;
538 559
@@ -540,7 +561,6 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
540 case OP_CB_NOTIFY_DEVICEID: 561 case OP_CB_NOTIFY_DEVICEID:
541 case OP_CB_NOTIFY: 562 case OP_CB_NOTIFY:
542 case OP_CB_PUSH_DELEG: 563 case OP_CB_PUSH_DELEG:
543 case OP_CB_RECALL_ANY:
544 case OP_CB_RECALLABLE_OBJ_AVAIL: 564 case OP_CB_RECALLABLE_OBJ_AVAIL:
545 case OP_CB_RECALL_SLOT: 565 case OP_CB_RECALL_SLOT:
546 case OP_CB_WANTS_CANCELLED: 566 case OP_CB_WANTS_CANCELLED:
@@ -688,6 +708,11 @@ static struct callback_op callback_ops[] = {
688 .encode_res = (callback_encode_res_t)encode_cb_sequence_res, 708 .encode_res = (callback_encode_res_t)encode_cb_sequence_res,
689 .res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ, 709 .res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ,
690 }, 710 },
711 [OP_CB_RECALL_ANY] = {
712 .process_op = (callback_process_op_t)nfs4_callback_recallany,
713 .decode_args = (callback_decode_arg_t)decode_recallany_args,
714 .res_maxsize = CB_OP_RECALLANY_RES_MAXSZ,
715 },
691#endif /* CONFIG_NFS_V4_1 */ 716#endif /* CONFIG_NFS_V4_1 */
692}; 717};
693 718
@@ -718,3 +743,10 @@ struct svc_version nfs4_callback_version1 = {
718 .vs_dispatch = NULL, 743 .vs_dispatch = NULL,
719}; 744};
720 745
746struct svc_version nfs4_callback_version4 = {
747 .vs_vers = 4,
748 .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
749 .vs_proc = nfs4_callback_procedures1,
750 .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
751 .vs_dispatch = NULL,
752};
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 99ea196f071..ee77713ce68 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1260,10 +1260,20 @@ error:
1260static void nfs4_session_set_rwsize(struct nfs_server *server) 1260static void nfs4_session_set_rwsize(struct nfs_server *server)
1261{ 1261{
1262#ifdef CONFIG_NFS_V4_1 1262#ifdef CONFIG_NFS_V4_1
1263 struct nfs4_session *sess;
1264 u32 server_resp_sz;
1265 u32 server_rqst_sz;
1266
1263 if (!nfs4_has_session(server->nfs_client)) 1267 if (!nfs4_has_session(server->nfs_client))
1264 return; 1268 return;
1265 server->rsize = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 1269 sess = server->nfs_client->cl_session;
1266 server->wsize = server->nfs_client->cl_session->fc_attrs.max_rqst_sz; 1270 server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
1271 server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
1272
1273 if (server->rsize > server_resp_sz)
1274 server->rsize = server_resp_sz;
1275 if (server->wsize > server_rqst_sz)
1276 server->wsize = server_rqst_sz;
1267#endif /* CONFIG_NFS_V4_1 */ 1277#endif /* CONFIG_NFS_V4_1 */
1268} 1278}
1269 1279
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 6dd48a4405b..2563bebc4c6 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -92,7 +92,7 @@ out:
92 return status; 92 return status;
93} 93}
94 94
95static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) 95static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
96{ 96{
97 struct nfs_inode *nfsi = NFS_I(inode); 97 struct nfs_inode *nfsi = NFS_I(inode);
98 struct nfs_open_context *ctx; 98 struct nfs_open_context *ctx;
@@ -116,10 +116,11 @@ again:
116 err = nfs_delegation_claim_locks(ctx, state); 116 err = nfs_delegation_claim_locks(ctx, state);
117 put_nfs_open_context(ctx); 117 put_nfs_open_context(ctx);
118 if (err != 0) 118 if (err != 0)
119 return; 119 return err;
120 goto again; 120 goto again;
121 } 121 }
122 spin_unlock(&inode->i_lock); 122 spin_unlock(&inode->i_lock);
123 return 0;
123} 124}
124 125
125/* 126/*
@@ -261,30 +262,34 @@ static void nfs_msync_inode(struct inode *inode)
261/* 262/*
262 * Basic procedure for returning a delegation to the server 263 * Basic procedure for returning a delegation to the server
263 */ 264 */
264static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation) 265static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
265{ 266{
266 struct nfs_inode *nfsi = NFS_I(inode); 267 struct nfs_inode *nfsi = NFS_I(inode);
268 int err;
267 269
268 nfs_msync_inode(inode);
269 /* 270 /*
270 * Guard against new delegated open/lock/unlock calls and against 271 * Guard against new delegated open/lock/unlock calls and against
271 * state recovery 272 * state recovery
272 */ 273 */
273 down_write(&nfsi->rwsem); 274 down_write(&nfsi->rwsem);
274 nfs_delegation_claim_opens(inode, &delegation->stateid); 275 err = nfs_delegation_claim_opens(inode, &delegation->stateid);
275 up_write(&nfsi->rwsem); 276 up_write(&nfsi->rwsem);
276 nfs_msync_inode(inode); 277 if (err)
278 goto out;
277 279
278 return nfs_do_return_delegation(inode, delegation, 1); 280 err = nfs_do_return_delegation(inode, delegation, issync);
281out:
282 return err;
279} 283}
280 284
281/* 285/*
282 * Return all delegations that have been marked for return 286 * Return all delegations that have been marked for return
283 */ 287 */
284void nfs_client_return_marked_delegations(struct nfs_client *clp) 288int nfs_client_return_marked_delegations(struct nfs_client *clp)
285{ 289{
286 struct nfs_delegation *delegation; 290 struct nfs_delegation *delegation;
287 struct inode *inode; 291 struct inode *inode;
292 int err = 0;
288 293
289restart: 294restart:
290 rcu_read_lock(); 295 rcu_read_lock();
@@ -298,12 +303,18 @@ restart:
298 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 303 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
299 spin_unlock(&clp->cl_lock); 304 spin_unlock(&clp->cl_lock);
300 rcu_read_unlock(); 305 rcu_read_unlock();
301 if (delegation != NULL) 306 if (delegation != NULL) {
302 __nfs_inode_return_delegation(inode, delegation); 307 filemap_flush(inode->i_mapping);
308 err = __nfs_inode_return_delegation(inode, delegation, 0);
309 }
303 iput(inode); 310 iput(inode);
304 goto restart; 311 if (!err)
312 goto restart;
313 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
314 return err;
305 } 315 }
306 rcu_read_unlock(); 316 rcu_read_unlock();
317 return 0;
307} 318}
308 319
309/* 320/*
@@ -338,8 +349,10 @@ int nfs_inode_return_delegation(struct inode *inode)
338 spin_lock(&clp->cl_lock); 349 spin_lock(&clp->cl_lock);
339 delegation = nfs_detach_delegation_locked(nfsi, NULL); 350 delegation = nfs_detach_delegation_locked(nfsi, NULL);
340 spin_unlock(&clp->cl_lock); 351 spin_unlock(&clp->cl_lock);
341 if (delegation != NULL) 352 if (delegation != NULL) {
342 err = __nfs_inode_return_delegation(inode, delegation); 353 nfs_msync_inode(inode);
354 err = __nfs_inode_return_delegation(inode, delegation, 1);
355 }
343 } 356 }
344 return err; 357 return err;
345} 358}
@@ -368,33 +381,47 @@ void nfs_super_return_all_delegations(struct super_block *sb)
368 spin_unlock(&delegation->lock); 381 spin_unlock(&delegation->lock);
369 } 382 }
370 rcu_read_unlock(); 383 rcu_read_unlock();
371 nfs_client_return_marked_delegations(clp); 384 if (nfs_client_return_marked_delegations(clp) != 0)
385 nfs4_schedule_state_manager(clp);
372} 386}
373 387
374static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) 388static
389void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags)
375{ 390{
376 struct nfs_delegation *delegation; 391 struct nfs_delegation *delegation;
377 392
378 rcu_read_lock(); 393 rcu_read_lock();
379 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 394 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
380 set_bit(NFS_DELEGATION_RETURN, &delegation->flags); 395 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
381 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 396 continue;
397 if (delegation->type & flags)
398 nfs_mark_return_delegation(clp, delegation);
382 } 399 }
383 rcu_read_unlock(); 400 rcu_read_unlock();
384} 401}
385 402
403static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
404{
405 nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
406}
407
386static void nfs_delegation_run_state_manager(struct nfs_client *clp) 408static void nfs_delegation_run_state_manager(struct nfs_client *clp)
387{ 409{
388 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) 410 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
389 nfs4_schedule_state_manager(clp); 411 nfs4_schedule_state_manager(clp);
390} 412}
391 413
392void nfs_expire_all_delegations(struct nfs_client *clp) 414void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
393{ 415{
394 nfs_client_mark_return_all_delegations(clp); 416 nfs_client_mark_return_all_delegation_types(clp, flags);
395 nfs_delegation_run_state_manager(clp); 417 nfs_delegation_run_state_manager(clp);
396} 418}
397 419
420void nfs_expire_all_delegations(struct nfs_client *clp)
421{
422 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
423}
424
398/* 425/*
399 * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. 426 * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
400 */ 427 */
@@ -413,8 +440,7 @@ static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *c
413 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 440 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
414 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) 441 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
415 continue; 442 continue;
416 set_bit(NFS_DELEGATION_RETURN, &delegation->flags); 443 nfs_mark_return_delegation(clp, delegation);
417 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
418 } 444 }
419 rcu_read_unlock(); 445 rcu_read_unlock();
420} 446}
@@ -428,18 +454,21 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
428/* 454/*
429 * Asynchronous delegation recall! 455 * Asynchronous delegation recall!
430 */ 456 */
431int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) 457int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
458 int (*validate_stateid)(struct nfs_delegation *delegation,
459 const nfs4_stateid *stateid))
432{ 460{
433 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 461 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
434 struct nfs_delegation *delegation; 462 struct nfs_delegation *delegation;
435 463
436 rcu_read_lock(); 464 rcu_read_lock();
437 delegation = rcu_dereference(NFS_I(inode)->delegation); 465 delegation = rcu_dereference(NFS_I(inode)->delegation);
438 if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data, 466
439 sizeof(delegation->stateid.data)) != 0) { 467 if (!validate_stateid(delegation, stateid)) {
440 rcu_read_unlock(); 468 rcu_read_unlock();
441 return -ENOENT; 469 return -ENOENT;
442 } 470 }
471
443 nfs_mark_return_delegation(clp, delegation); 472 nfs_mark_return_delegation(clp, delegation);
444 rcu_read_unlock(); 473 rcu_read_unlock();
445 nfs_delegation_run_state_manager(clp); 474 nfs_delegation_run_state_manager(clp);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 09f38379517..944b627ec6e 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -34,15 +34,18 @@ enum {
34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
35void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
36int nfs_inode_return_delegation(struct inode *inode); 36int nfs_inode_return_delegation(struct inode *inode);
37int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); 37int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
38 int (*validate_stateid)(struct nfs_delegation *delegation,
39 const nfs4_stateid *stateid));
38void nfs_inode_return_delegation_noreclaim(struct inode *inode); 40void nfs_inode_return_delegation_noreclaim(struct inode *inode);
39 41
40struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 42struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
41void nfs_super_return_all_delegations(struct super_block *sb); 43void nfs_super_return_all_delegations(struct super_block *sb);
42void nfs_expire_all_delegations(struct nfs_client *clp); 44void nfs_expire_all_delegations(struct nfs_client *clp);
45void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
43void nfs_expire_unreferenced_delegations(struct nfs_client *clp); 46void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
44void nfs_handle_cb_pathdown(struct nfs_client *clp); 47void nfs_handle_cb_pathdown(struct nfs_client *clp);
45void nfs_client_return_marked_delegations(struct nfs_client *clp); 48int nfs_client_return_marked_delegations(struct nfs_client *clp);
46 49
47void nfs_delegation_mark_reclaim(struct nfs_client *clp); 50void nfs_delegation_mark_reclaim(struct nfs_client *clp);
48void nfs_delegation_reap_unclaimed(struct nfs_client *clp); 51void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7cb298525ee..3c7f03b669f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1579,55 +1579,47 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1579 struct dentry *dentry = NULL, *rehash = NULL; 1579 struct dentry *dentry = NULL, *rehash = NULL;
1580 int error = -EBUSY; 1580 int error = -EBUSY;
1581 1581
1582 /*
1583 * To prevent any new references to the target during the rename,
1584 * we unhash the dentry and free the inode in advance.
1585 */
1586 if (!d_unhashed(new_dentry)) {
1587 d_drop(new_dentry);
1588 rehash = new_dentry;
1589 }
1590
1591 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", 1582 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
1592 old_dentry->d_parent->d_name.name, old_dentry->d_name.name, 1583 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1593 new_dentry->d_parent->d_name.name, new_dentry->d_name.name, 1584 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
1594 atomic_read(&new_dentry->d_count)); 1585 atomic_read(&new_dentry->d_count));
1595 1586
1596 /* 1587 /*
1597 * First check whether the target is busy ... we can't 1588 * For non-directories, check whether the target is busy and if so,
1598 * safely do _any_ rename if the target is in use. 1589 * make a copy of the dentry and then do a silly-rename. If the
1599 * 1590 * silly-rename succeeds, the copied dentry is hashed and becomes
1600 * For files, make a copy of the dentry and then do a 1591 * the new target.
1601 * silly-rename. If the silly-rename succeeds, the
1602 * copied dentry is hashed and becomes the new target.
1603 */ 1592 */
1604 if (!new_inode) 1593 if (new_inode && !S_ISDIR(new_inode->i_mode)) {
1605 goto go_ahead; 1594 /*
1606 if (S_ISDIR(new_inode->i_mode)) { 1595 * To prevent any new references to the target during the
1607 error = -EISDIR; 1596 * rename, we unhash the dentry in advance.
1608 if (!S_ISDIR(old_inode->i_mode)) 1597 */
1609 goto out; 1598 if (!d_unhashed(new_dentry)) {
1610 } else if (atomic_read(&new_dentry->d_count) > 2) { 1599 d_drop(new_dentry);
1611 int err; 1600 rehash = new_dentry;
1612 /* copy the target dentry's name */ 1601 }
1613 dentry = d_alloc(new_dentry->d_parent, 1602
1614 &new_dentry->d_name); 1603 if (atomic_read(&new_dentry->d_count) > 2) {
1615 if (!dentry) 1604 int err;
1616 goto out; 1605
1606 /* copy the target dentry's name */
1607 dentry = d_alloc(new_dentry->d_parent,
1608 &new_dentry->d_name);
1609 if (!dentry)
1610 goto out;
1617 1611
1618 /* silly-rename the existing target ... */ 1612 /* silly-rename the existing target ... */
1619 err = nfs_sillyrename(new_dir, new_dentry); 1613 err = nfs_sillyrename(new_dir, new_dentry);
1620 if (!err) { 1614 if (err)
1621 new_dentry = rehash = dentry; 1615 goto out;
1616
1617 new_dentry = dentry;
1618 rehash = NULL;
1622 new_inode = NULL; 1619 new_inode = NULL;
1623 /* instantiate the replacement target */ 1620 }
1624 d_instantiate(new_dentry, NULL);
1625 } else if (atomic_read(&new_dentry->d_count) > 1)
1626 /* dentry still busy? */
1627 goto out;
1628 } 1621 }
1629 1622
1630go_ahead:
1631 /* 1623 /*
1632 * ... prune child dentries and writebacks if needed. 1624 * ... prune child dentries and writebacks if needed.
1633 */ 1625 */
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index f4d54ba97cc..95e1ca765d4 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -146,7 +146,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
146 return 0; 146 return 0;
147} 147}
148 148
149struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd, 149static struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd,
150 struct nfs_dns_ent *key) 150 struct nfs_dns_ent *key)
151{ 151{
152 struct cache_head *ch; 152 struct cache_head *ch;
@@ -159,7 +159,7 @@ struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd,
159 return container_of(ch, struct nfs_dns_ent, h); 159 return container_of(ch, struct nfs_dns_ent, h);
160} 160}
161 161
162struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd, 162static struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd,
163 struct nfs_dns_ent *new, 163 struct nfs_dns_ent *new,
164 struct nfs_dns_ent *key) 164 struct nfs_dns_ent *key)
165{ 165{
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f5fdd39e037..6b891328f33 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -581,7 +581,7 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
581{ 581{
582 struct nfs_open_context *ctx; 582 struct nfs_open_context *ctx;
583 583
584 if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) 584 if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
585 return 1; 585 return 1;
586 ctx = nfs_file_open_context(filp); 586 ctx = nfs_file_open_context(filp);
587 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) 587 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
@@ -622,7 +622,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
622 622
623 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); 623 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
624 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 624 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
625 /* Return error values for O_SYNC and IS_SYNC() */ 625 /* Return error values for O_DSYNC and IS_SYNC() */
626 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { 626 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
627 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); 627 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
628 if (err < 0) 628 if (err < 0)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e21b1bb9972..29e464d23b3 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -30,6 +30,15 @@ static inline int nfs4_has_session(const struct nfs_client *clp)
30 return 0; 30 return 0;
31} 31}
32 32
33static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
34{
35#ifdef CONFIG_NFS_V4_1
36 if (nfs4_has_session(clp))
37 return (clp->cl_session->flags & SESSION4_PERSIST);
38#endif /* CONFIG_NFS_V4_1 */
39 return 0;
40}
41
33struct nfs_clone_mount { 42struct nfs_clone_mount {
34 const struct super_block *sb; 43 const struct super_block *sb;
35 const struct dentry *dentry; 44 const struct dentry *dentry;
@@ -156,6 +165,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
156 165
157/* callback_xdr.c */ 166/* callback_xdr.c */
158extern struct svc_version nfs4_callback_version1; 167extern struct svc_version nfs4_callback_version1;
168extern struct svc_version nfs4_callback_version4;
159 169
160/* pagelist.c */ 170/* pagelist.c */
161extern int __init nfs_init_nfspagecache(void); 171extern int __init nfs_init_nfspagecache(void);
@@ -177,24 +187,14 @@ extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int);
177extern struct rpc_procinfo nfs3_procedures[]; 187extern struct rpc_procinfo nfs3_procedures[];
178extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); 188extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int);
179 189
180/* nfs4proc.c */
181static inline void nfs4_restart_rpc(struct rpc_task *task,
182 const struct nfs_client *clp)
183{
184#ifdef CONFIG_NFS_V4_1
185 if (nfs4_has_session(clp) &&
186 test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) {
187 rpc_restart_call_prepare(task);
188 return;
189 }
190#endif /* CONFIG_NFS_V4_1 */
191 rpc_restart_call(task);
192}
193
194/* nfs4xdr.c */ 190/* nfs4xdr.c */
195#ifdef CONFIG_NFS_V4 191#ifdef CONFIG_NFS_V4
196extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 192extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
197#endif 193#endif
194#ifdef CONFIG_NFS_V4_1
195extern const u32 nfs41_maxread_overhead;
196extern const u32 nfs41_maxwrite_overhead;
197#endif
198 198
199/* nfs4proc.c */ 199/* nfs4proc.c */
200#ifdef CONFIG_NFS_V4 200#ifdef CONFIG_NFS_V4
@@ -273,20 +273,6 @@ extern int _nfs4_call_sync_session(struct nfs_server *server,
273 struct nfs4_sequence_res *res, 273 struct nfs4_sequence_res *res,
274 int cache_reply); 274 int cache_reply);
275 275
276#ifdef CONFIG_NFS_V4_1
277extern void nfs41_sequence_free_slot(const struct nfs_client *,
278 struct nfs4_sequence_res *res);
279#endif /* CONFIG_NFS_V4_1 */
280
281static inline void nfs4_sequence_free_slot(const struct nfs_client *clp,
282 struct nfs4_sequence_res *res)
283{
284#ifdef CONFIG_NFS_V4_1
285 if (nfs4_has_session(clp))
286 nfs41_sequence_free_slot(clp, res);
287#endif /* CONFIG_NFS_V4_1 */
288}
289
290/* 276/*
291 * Determine the device name as a string 277 * Determine the device name as a string
292 */ 278 */
@@ -380,3 +366,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
380 return ((unsigned long)len + (unsigned long)base + 366 return ((unsigned long)len + (unsigned long)base +
381 PAGE_SIZE - 1) >> PAGE_SHIFT; 367 PAGE_SIZE - 1) >> PAGE_SHIFT;
382} 368}
369
370/*
371 * Helper for restarting RPC calls in the possible presence of NFSv4.1
372 * sessions.
373 */
374static inline void nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp)
375{
376 if (nfs4_has_session(clp))
377 rpc_restart_call_prepare(task);
378 else
379 rpc_restart_call(task);
380}
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index ceda50aad73..46d779abafd 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -25,13 +25,7 @@ struct nfs_iostats {
25static inline void nfs_inc_server_stats(const struct nfs_server *server, 25static inline void nfs_inc_server_stats(const struct nfs_server *server,
26 enum nfs_stat_eventcounters stat) 26 enum nfs_stat_eventcounters stat)
27{ 27{
28 struct nfs_iostats *iostats; 28 this_cpu_inc(server->io_stats->events[stat]);
29 int cpu;
30
31 cpu = get_cpu();
32 iostats = per_cpu_ptr(server->io_stats, cpu);
33 iostats->events[stat]++;
34 put_cpu();
35} 29}
36 30
37static inline void nfs_inc_stats(const struct inode *inode, 31static inline void nfs_inc_stats(const struct inode *inode,
@@ -44,13 +38,7 @@ static inline void nfs_add_server_stats(const struct nfs_server *server,
44 enum nfs_stat_bytecounters stat, 38 enum nfs_stat_bytecounters stat,
45 unsigned long addend) 39 unsigned long addend)
46{ 40{
47 struct nfs_iostats *iostats; 41 this_cpu_add(server->io_stats->bytes[stat], addend);
48 int cpu;
49
50 cpu = get_cpu();
51 iostats = per_cpu_ptr(server->io_stats, cpu);
52 iostats->bytes[stat] += addend;
53 put_cpu();
54} 42}
55 43
56static inline void nfs_add_stats(const struct inode *inode, 44static inline void nfs_add_stats(const struct inode *inode,
@@ -65,13 +53,7 @@ static inline void nfs_add_fscache_stats(struct inode *inode,
65 enum nfs_stat_fscachecounters stat, 53 enum nfs_stat_fscachecounters stat,
66 unsigned long addend) 54 unsigned long addend)
67{ 55{
68 struct nfs_iostats *iostats; 56 this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
69 int cpu;
70
71 cpu = get_cpu();
72 iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
73 iostats->fscache[stat] += addend;
74 put_cpu();
75} 57}
76#endif 58#endif
77 59
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 6ea07a3c75d..865265bdca0 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,7 +44,8 @@ enum nfs4_client_state {
44 NFS4CLNT_RECLAIM_REBOOT, 44 NFS4CLNT_RECLAIM_REBOOT,
45 NFS4CLNT_RECLAIM_NOGRACE, 45 NFS4CLNT_RECLAIM_NOGRACE,
46 NFS4CLNT_DELEGRETURN, 46 NFS4CLNT_DELEGRETURN,
47 NFS4CLNT_SESSION_SETUP, 47 NFS4CLNT_SESSION_RESET,
48 NFS4CLNT_SESSION_DRAINING,
48}; 49};
49 50
50/* 51/*
@@ -107,6 +108,10 @@ enum {
107 NFS_OWNER_RECLAIM_NOGRACE 108 NFS_OWNER_RECLAIM_NOGRACE
108}; 109};
109 110
111#define NFS_LOCK_NEW 0
112#define NFS_LOCK_RECLAIM 1
113#define NFS_LOCK_EXPIRED 2
114
110/* 115/*
111 * struct nfs4_state maintains the client-side state for a given 116 * struct nfs4_state maintains the client-side state for a given
112 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). 117 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
@@ -180,6 +185,7 @@ struct nfs4_state_recovery_ops {
180 int (*recover_lock)(struct nfs4_state *, struct file_lock *); 185 int (*recover_lock)(struct nfs4_state *, struct file_lock *);
181 int (*establish_clid)(struct nfs_client *, struct rpc_cred *); 186 int (*establish_clid)(struct nfs_client *, struct rpc_cred *);
182 struct rpc_cred * (*get_clid_cred)(struct nfs_client *); 187 struct rpc_cred * (*get_clid_cred)(struct nfs_client *);
188 int (*reclaim_complete)(struct nfs_client *);
183}; 189};
184 190
185struct nfs4_state_maintenance_ops { 191struct nfs4_state_maintenance_ops {
@@ -200,9 +206,11 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
200/* nfs4proc.c */ 206/* nfs4proc.c */
201extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); 207extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
202extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); 208extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
209extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
203extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); 210extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
204extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 211extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
205extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 212extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
213extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
206extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); 214extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait);
207extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 215extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
208extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 216extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -218,9 +226,11 @@ extern int nfs4_setup_sequence(struct nfs_client *clp,
218 int cache_reply, struct rpc_task *task); 226 int cache_reply, struct rpc_task *task);
219extern void nfs4_destroy_session(struct nfs4_session *session); 227extern void nfs4_destroy_session(struct nfs4_session *session);
220extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 228extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
221extern int nfs4_proc_create_session(struct nfs_client *, int reset); 229extern int nfs4_proc_create_session(struct nfs_client *);
222extern int nfs4_proc_destroy_session(struct nfs4_session *); 230extern int nfs4_proc_destroy_session(struct nfs4_session *);
223extern int nfs4_init_session(struct nfs_server *server); 231extern int nfs4_init_session(struct nfs_server *server);
232extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
233 struct nfs_fsinfo *fsinfo);
224#else /* CONFIG_NFS_v4_1 */ 234#else /* CONFIG_NFS_v4_1 */
225static inline int nfs4_setup_sequence(struct nfs_client *clp, 235static inline int nfs4_setup_sequence(struct nfs_client *clp,
226 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 236 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
@@ -267,6 +277,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
267extern void nfs4_schedule_state_recovery(struct nfs_client *); 277extern void nfs4_schedule_state_recovery(struct nfs_client *);
268extern void nfs4_schedule_state_manager(struct nfs_client *); 278extern void nfs4_schedule_state_manager(struct nfs_client *);
269extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 279extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
280extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
270extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 281extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
271extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 282extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
272extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 283extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
@@ -275,6 +286,7 @@ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
275extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 286extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
276extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); 287extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
277extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); 288extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
289extern void nfs_release_seqid(struct nfs_seqid *seqid);
278extern void nfs_free_seqid(struct nfs_seqid *seqid); 290extern void nfs_free_seqid(struct nfs_seqid *seqid);
279 291
280extern const nfs4_stateid zero_stateid; 292extern const nfs4_stateid zero_stateid;
@@ -287,6 +299,7 @@ struct nfs4_mount_data;
287 299
288/* callback_xdr.c */ 300/* callback_xdr.c */
289extern struct svc_version nfs4_callback_version1; 301extern struct svc_version nfs4_callback_version1;
302extern struct svc_version nfs4_callback_version4;
290 303
291#else 304#else
292 305
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 741a562177f..198d51d17c1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
64 64
65struct nfs4_opendata; 65struct nfs4_opendata;
66static int _nfs4_proc_open(struct nfs4_opendata *data); 66static int _nfs4_proc_open(struct nfs4_opendata *data);
67static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
67static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 68static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
68static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 69static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
69static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 70static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
@@ -270,11 +271,18 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
270 case -NFS4ERR_SEQ_MISORDERED: 271 case -NFS4ERR_SEQ_MISORDERED:
271 dprintk("%s ERROR: %d Reset session\n", __func__, 272 dprintk("%s ERROR: %d Reset session\n", __func__,
272 errorcode); 273 errorcode);
273 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); 274 nfs4_schedule_state_recovery(clp);
274 exception->retry = 1; 275 exception->retry = 1;
275 /* FALLTHROUGH */ 276 break;
276#endif /* !defined(CONFIG_NFS_V4_1) */ 277#endif /* !defined(CONFIG_NFS_V4_1) */
277 case -NFS4ERR_FILE_OPEN: 278 case -NFS4ERR_FILE_OPEN:
279 if (exception->timeout > HZ) {
280 /* We have retried a decent amount, time to
281 * fail
282 */
283 ret = -EBUSY;
284 break;
285 }
278 case -NFS4ERR_GRACE: 286 case -NFS4ERR_GRACE:
279 case -NFS4ERR_DELAY: 287 case -NFS4ERR_DELAY:
280 ret = nfs4_delay(server->client, &exception->timeout); 288 ret = nfs4_delay(server->client, &exception->timeout);
@@ -311,48 +319,67 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
311 * so we need to scan down from highest_used_slotid to 0 looking for the now 319 * so we need to scan down from highest_used_slotid to 0 looking for the now
312 * highest slotid in use. 320 * highest slotid in use.
313 * If none found, highest_used_slotid is set to -1. 321 * If none found, highest_used_slotid is set to -1.
322 *
323 * Must be called while holding tbl->slot_tbl_lock
314 */ 324 */
315static void 325static void
316nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) 326nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
317{ 327{
318 int slotid = free_slotid; 328 int slotid = free_slotid;
319 329
320 spin_lock(&tbl->slot_tbl_lock);
321 /* clear used bit in bitmap */ 330 /* clear used bit in bitmap */
322 __clear_bit(slotid, tbl->used_slots); 331 __clear_bit(slotid, tbl->used_slots);
323 332
324 /* update highest_used_slotid when it is freed */ 333 /* update highest_used_slotid when it is freed */
325 if (slotid == tbl->highest_used_slotid) { 334 if (slotid == tbl->highest_used_slotid) {
326 slotid = find_last_bit(tbl->used_slots, tbl->max_slots); 335 slotid = find_last_bit(tbl->used_slots, tbl->max_slots);
327 if (slotid >= 0 && slotid < tbl->max_slots) 336 if (slotid < tbl->max_slots)
328 tbl->highest_used_slotid = slotid; 337 tbl->highest_used_slotid = slotid;
329 else 338 else
330 tbl->highest_used_slotid = -1; 339 tbl->highest_used_slotid = -1;
331 } 340 }
332 rpc_wake_up_next(&tbl->slot_tbl_waitq);
333 spin_unlock(&tbl->slot_tbl_lock);
334 dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, 341 dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__,
335 free_slotid, tbl->highest_used_slotid); 342 free_slotid, tbl->highest_used_slotid);
336} 343}
337 344
338void nfs41_sequence_free_slot(const struct nfs_client *clp, 345/*
339 struct nfs4_sequence_res *res) 346 * Signal state manager thread if session is drained
347 */
348static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
340{ 349{
341 struct nfs4_slot_table *tbl; 350 struct rpc_task *task;
342 351
343 if (!nfs4_has_session(clp)) { 352 if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
344 dprintk("%s: No session\n", __func__); 353 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
354 if (task)
355 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
345 return; 356 return;
346 } 357 }
358
359 if (ses->fc_slot_table.highest_used_slotid != -1)
360 return;
361
362 dprintk("%s COMPLETE: Session Drained\n", __func__);
363 complete(&ses->complete);
364}
365
366static void nfs41_sequence_free_slot(const struct nfs_client *clp,
367 struct nfs4_sequence_res *res)
368{
369 struct nfs4_slot_table *tbl;
370
347 tbl = &clp->cl_session->fc_slot_table; 371 tbl = &clp->cl_session->fc_slot_table;
348 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { 372 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
349 dprintk("%s: No slot\n", __func__);
350 /* just wake up the next guy waiting since 373 /* just wake up the next guy waiting since
351 * we may have not consumed a slot after all */ 374 * we may have not consumed a slot after all */
352 rpc_wake_up_next(&tbl->slot_tbl_waitq); 375 dprintk("%s: No slot\n", __func__);
353 return; 376 return;
354 } 377 }
378
379 spin_lock(&tbl->slot_tbl_lock);
355 nfs4_free_slot(tbl, res->sr_slotid); 380 nfs4_free_slot(tbl, res->sr_slotid);
381 nfs41_check_drain_session_complete(clp->cl_session);
382 spin_unlock(&tbl->slot_tbl_lock);
356 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 383 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
357} 384}
358 385
@@ -377,10 +404,10 @@ static void nfs41_sequence_done(struct nfs_client *clp,
377 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) 404 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
378 goto out; 405 goto out;
379 406
380 tbl = &clp->cl_session->fc_slot_table; 407 /* Check the SEQUENCE operation status */
381 slot = tbl->slots + res->sr_slotid;
382
383 if (res->sr_status == 0) { 408 if (res->sr_status == 0) {
409 tbl = &clp->cl_session->fc_slot_table;
410 slot = tbl->slots + res->sr_slotid;
384 /* Update the slot's sequence and clientid lease timer */ 411 /* Update the slot's sequence and clientid lease timer */
385 ++slot->seq_nr; 412 ++slot->seq_nr;
386 timestamp = res->sr_renewal_time; 413 timestamp = res->sr_renewal_time;
@@ -388,7 +415,8 @@ static void nfs41_sequence_done(struct nfs_client *clp,
388 if (time_before(clp->cl_last_renewal, timestamp)) 415 if (time_before(clp->cl_last_renewal, timestamp))
389 clp->cl_last_renewal = timestamp; 416 clp->cl_last_renewal = timestamp;
390 spin_unlock(&clp->cl_lock); 417 spin_unlock(&clp->cl_lock);
391 return; 418 /* Check sequence flags */
419 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
392 } 420 }
393out: 421out:
394 /* The session may be reset by one of the error handlers. */ 422 /* The session may be reset by one of the error handlers. */
@@ -407,7 +435,7 @@ out:
407 * Note: must be called with under the slot_tbl_lock. 435 * Note: must be called with under the slot_tbl_lock.
408 */ 436 */
409static u8 437static u8
410nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task) 438nfs4_find_slot(struct nfs4_slot_table *tbl)
411{ 439{
412 int slotid; 440 int slotid;
413 u8 ret_id = NFS4_MAX_SLOT_TABLE; 441 u8 ret_id = NFS4_MAX_SLOT_TABLE;
@@ -429,24 +457,6 @@ out:
429 return ret_id; 457 return ret_id;
430} 458}
431 459
432static int nfs4_recover_session(struct nfs4_session *session)
433{
434 struct nfs_client *clp = session->clp;
435 unsigned int loop;
436 int ret;
437
438 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
439 ret = nfs4_wait_clnt_recover(clp);
440 if (ret != 0)
441 break;
442 if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
443 break;
444 nfs4_schedule_state_manager(clp);
445 ret = -EIO;
446 }
447 return ret;
448}
449
450static int nfs41_setup_sequence(struct nfs4_session *session, 460static int nfs41_setup_sequence(struct nfs4_session *session,
451 struct nfs4_sequence_args *args, 461 struct nfs4_sequence_args *args,
452 struct nfs4_sequence_res *res, 462 struct nfs4_sequence_res *res,
@@ -455,7 +465,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
455{ 465{
456 struct nfs4_slot *slot; 466 struct nfs4_slot *slot;
457 struct nfs4_slot_table *tbl; 467 struct nfs4_slot_table *tbl;
458 int status = 0;
459 u8 slotid; 468 u8 slotid;
460 469
461 dprintk("--> %s\n", __func__); 470 dprintk("--> %s\n", __func__);
@@ -468,24 +477,27 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
468 tbl = &session->fc_slot_table; 477 tbl = &session->fc_slot_table;
469 478
470 spin_lock(&tbl->slot_tbl_lock); 479 spin_lock(&tbl->slot_tbl_lock);
471 if (test_bit(NFS4CLNT_SESSION_SETUP, &session->clp->cl_state)) { 480 if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
472 if (tbl->highest_used_slotid != -1) { 481 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
473 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); 482 /*
474 spin_unlock(&tbl->slot_tbl_lock); 483 * The state manager will wait until the slot table is empty.
475 dprintk("<-- %s: Session reset: draining\n", __func__); 484 * Schedule the reset thread
476 return -EAGAIN; 485 */
477 } 486 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
487 spin_unlock(&tbl->slot_tbl_lock);
488 dprintk("%s Schedule Session Reset\n", __func__);
489 return -EAGAIN;
490 }
478 491
479 /* The slot table is empty; start the reset thread */ 492 if (!rpc_queue_empty(&tbl->slot_tbl_waitq) &&
480 dprintk("%s Session Reset\n", __func__); 493 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
494 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
481 spin_unlock(&tbl->slot_tbl_lock); 495 spin_unlock(&tbl->slot_tbl_lock);
482 status = nfs4_recover_session(session); 496 dprintk("%s enforce FIFO order\n", __func__);
483 if (status) 497 return -EAGAIN;
484 return status;
485 spin_lock(&tbl->slot_tbl_lock);
486 } 498 }
487 499
488 slotid = nfs4_find_slot(tbl, task); 500 slotid = nfs4_find_slot(tbl);
489 if (slotid == NFS4_MAX_SLOT_TABLE) { 501 if (slotid == NFS4_MAX_SLOT_TABLE) {
490 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); 502 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
491 spin_unlock(&tbl->slot_tbl_lock); 503 spin_unlock(&tbl->slot_tbl_lock);
@@ -494,6 +506,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
494 } 506 }
495 spin_unlock(&tbl->slot_tbl_lock); 507 spin_unlock(&tbl->slot_tbl_lock);
496 508
509 rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
497 slot = tbl->slots + slotid; 510 slot = tbl->slots + slotid;
498 args->sa_session = session; 511 args->sa_session = session;
499 args->sa_slotid = slotid; 512 args->sa_slotid = slotid;
@@ -527,7 +540,7 @@ int nfs4_setup_sequence(struct nfs_client *clp,
527 goto out; 540 goto out;
528 ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply, 541 ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
529 task); 542 task);
530 if (ret != -EAGAIN) { 543 if (ret && ret != -EAGAIN) {
531 /* terminate rpc task */ 544 /* terminate rpc task */
532 task->tk_status = ret; 545 task->tk_status = ret;
533 task->tk_action = NULL; 546 task->tk_action = NULL;
@@ -556,12 +569,17 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
556 rpc_call_start(task); 569 rpc_call_start(task);
557} 570}
558 571
572static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata)
573{
574 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
575 nfs41_call_sync_prepare(task, calldata);
576}
577
559static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) 578static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
560{ 579{
561 struct nfs41_call_sync_data *data = calldata; 580 struct nfs41_call_sync_data *data = calldata;
562 581
563 nfs41_sequence_done(data->clp, data->seq_res, task->tk_status); 582 nfs41_sequence_done(data->clp, data->seq_res, task->tk_status);
564 nfs41_sequence_free_slot(data->clp, data->seq_res);
565} 583}
566 584
567struct rpc_call_ops nfs41_call_sync_ops = { 585struct rpc_call_ops nfs41_call_sync_ops = {
@@ -569,12 +587,18 @@ struct rpc_call_ops nfs41_call_sync_ops = {
569 .rpc_call_done = nfs41_call_sync_done, 587 .rpc_call_done = nfs41_call_sync_done,
570}; 588};
571 589
590struct rpc_call_ops nfs41_call_priv_sync_ops = {
591 .rpc_call_prepare = nfs41_call_priv_sync_prepare,
592 .rpc_call_done = nfs41_call_sync_done,
593};
594
572static int nfs4_call_sync_sequence(struct nfs_client *clp, 595static int nfs4_call_sync_sequence(struct nfs_client *clp,
573 struct rpc_clnt *clnt, 596 struct rpc_clnt *clnt,
574 struct rpc_message *msg, 597 struct rpc_message *msg,
575 struct nfs4_sequence_args *args, 598 struct nfs4_sequence_args *args,
576 struct nfs4_sequence_res *res, 599 struct nfs4_sequence_res *res,
577 int cache_reply) 600 int cache_reply,
601 int privileged)
578{ 602{
579 int ret; 603 int ret;
580 struct rpc_task *task; 604 struct rpc_task *task;
@@ -592,6 +616,8 @@ static int nfs4_call_sync_sequence(struct nfs_client *clp,
592 }; 616 };
593 617
594 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 618 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
619 if (privileged)
620 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
595 task = rpc_run_task(&task_setup); 621 task = rpc_run_task(&task_setup);
596 if (IS_ERR(task)) 622 if (IS_ERR(task))
597 ret = PTR_ERR(task); 623 ret = PTR_ERR(task);
@@ -609,7 +635,7 @@ int _nfs4_call_sync_session(struct nfs_server *server,
609 int cache_reply) 635 int cache_reply)
610{ 636{
611 return nfs4_call_sync_sequence(server->nfs_client, server->client, 637 return nfs4_call_sync_sequence(server->nfs_client, server->client,
612 msg, args, res, cache_reply); 638 msg, args, res, cache_reply, 0);
613} 639}
614 640
615#endif /* CONFIG_NFS_V4_1 */ 641#endif /* CONFIG_NFS_V4_1 */
@@ -637,15 +663,6 @@ static void nfs4_sequence_done(const struct nfs_server *server,
637#endif /* CONFIG_NFS_V4_1 */ 663#endif /* CONFIG_NFS_V4_1 */
638} 664}
639 665
640/* no restart, therefore free slot here */
641static void nfs4_sequence_done_free_slot(const struct nfs_server *server,
642 struct nfs4_sequence_res *res,
643 int rpc_status)
644{
645 nfs4_sequence_done(server, res, rpc_status);
646 nfs4_sequence_free_slot(server->nfs_client, res);
647}
648
649static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) 666static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
650{ 667{
651 struct nfs_inode *nfsi = NFS_I(dir); 668 struct nfs_inode *nfsi = NFS_I(dir);
@@ -720,9 +737,15 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
720 p->o_arg.bitmask = server->attr_bitmask; 737 p->o_arg.bitmask = server->attr_bitmask;
721 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 738 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
722 if (flags & O_EXCL) { 739 if (flags & O_EXCL) {
723 u32 *s = (u32 *) p->o_arg.u.verifier.data; 740 if (nfs4_has_persistent_session(server->nfs_client)) {
724 s[0] = jiffies; 741 /* GUARDED */
725 s[1] = current->pid; 742 p->o_arg.u.attrs = &p->attrs;
743 memcpy(&p->attrs, attrs, sizeof(p->attrs));
744 } else { /* EXCLUSIVE4_1 */
745 u32 *s = (u32 *) p->o_arg.u.verifier.data;
746 s[0] = jiffies;
747 s[1] = current->pid;
748 }
726 } else if (flags & O_CREAT) { 749 } else if (flags & O_CREAT) {
727 p->o_arg.u.attrs = &p->attrs; 750 p->o_arg.u.attrs = &p->attrs;
728 memcpy(&p->attrs, attrs, sizeof(p->attrs)); 751 memcpy(&p->attrs, attrs, sizeof(p->attrs));
@@ -776,13 +799,16 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode
776 goto out; 799 goto out;
777 switch (mode & (FMODE_READ|FMODE_WRITE)) { 800 switch (mode & (FMODE_READ|FMODE_WRITE)) {
778 case FMODE_READ: 801 case FMODE_READ:
779 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; 802 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0
803 && state->n_rdonly != 0;
780 break; 804 break;
781 case FMODE_WRITE: 805 case FMODE_WRITE:
782 ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0; 806 ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0
807 && state->n_wronly != 0;
783 break; 808 break;
784 case FMODE_READ|FMODE_WRITE: 809 case FMODE_READ|FMODE_WRITE:
785 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; 810 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0
811 && state->n_rdwr != 0;
786 } 812 }
787out: 813out:
788 return ret; 814 return ret;
@@ -1047,7 +1073,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
1047 memset(&opendata->o_res, 0, sizeof(opendata->o_res)); 1073 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
1048 memset(&opendata->c_res, 0, sizeof(opendata->c_res)); 1074 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
1049 nfs4_init_opendata_res(opendata); 1075 nfs4_init_opendata_res(opendata);
1050 ret = _nfs4_proc_open(opendata); 1076 ret = _nfs4_recover_proc_open(opendata);
1051 if (ret != 0) 1077 if (ret != 0)
1052 return ret; 1078 return ret;
1053 newstate = nfs4_opendata_to_nfs4_state(opendata); 1079 newstate = nfs4_opendata_to_nfs4_state(opendata);
@@ -1183,6 +1209,14 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1183 case -ENOENT: 1209 case -ENOENT:
1184 case -ESTALE: 1210 case -ESTALE:
1185 goto out; 1211 goto out;
1212 case -NFS4ERR_BADSESSION:
1213 case -NFS4ERR_BADSLOT:
1214 case -NFS4ERR_BAD_HIGH_SLOT:
1215 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1216 case -NFS4ERR_DEADSESSION:
1217 nfs4_schedule_state_recovery(
1218 server->nfs_client);
1219 goto out;
1186 case -NFS4ERR_STALE_CLIENTID: 1220 case -NFS4ERR_STALE_CLIENTID:
1187 case -NFS4ERR_STALE_STATEID: 1221 case -NFS4ERR_STALE_STATEID:
1188 case -NFS4ERR_EXPIRED: 1222 case -NFS4ERR_EXPIRED:
@@ -1330,14 +1364,20 @@ out_no_action:
1330 1364
1331} 1365}
1332 1366
1367static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata)
1368{
1369 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
1370 nfs4_open_prepare(task, calldata);
1371}
1372
1333static void nfs4_open_done(struct rpc_task *task, void *calldata) 1373static void nfs4_open_done(struct rpc_task *task, void *calldata)
1334{ 1374{
1335 struct nfs4_opendata *data = calldata; 1375 struct nfs4_opendata *data = calldata;
1336 1376
1337 data->rpc_status = task->tk_status; 1377 data->rpc_status = task->tk_status;
1338 1378
1339 nfs4_sequence_done_free_slot(data->o_arg.server, &data->o_res.seq_res, 1379 nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res,
1340 task->tk_status); 1380 task->tk_status);
1341 1381
1342 if (RPC_ASSASSINATED(task)) 1382 if (RPC_ASSASSINATED(task))
1343 return; 1383 return;
@@ -1388,10 +1428,13 @@ static const struct rpc_call_ops nfs4_open_ops = {
1388 .rpc_release = nfs4_open_release, 1428 .rpc_release = nfs4_open_release,
1389}; 1429};
1390 1430
1391/* 1431static const struct rpc_call_ops nfs4_recover_open_ops = {
1392 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata 1432 .rpc_call_prepare = nfs4_recover_open_prepare,
1393 */ 1433 .rpc_call_done = nfs4_open_done,
1394static int _nfs4_proc_open(struct nfs4_opendata *data) 1434 .rpc_release = nfs4_open_release,
1435};
1436
1437static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
1395{ 1438{
1396 struct inode *dir = data->dir->d_inode; 1439 struct inode *dir = data->dir->d_inode;
1397 struct nfs_server *server = NFS_SERVER(dir); 1440 struct nfs_server *server = NFS_SERVER(dir);
@@ -1418,21 +1461,57 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1418 data->rpc_done = 0; 1461 data->rpc_done = 0;
1419 data->rpc_status = 0; 1462 data->rpc_status = 0;
1420 data->cancelled = 0; 1463 data->cancelled = 0;
1464 if (isrecover)
1465 task_setup_data.callback_ops = &nfs4_recover_open_ops;
1421 task = rpc_run_task(&task_setup_data); 1466 task = rpc_run_task(&task_setup_data);
1422 if (IS_ERR(task)) 1467 if (IS_ERR(task))
1423 return PTR_ERR(task); 1468 return PTR_ERR(task);
1424 status = nfs4_wait_for_completion_rpc_task(task); 1469 status = nfs4_wait_for_completion_rpc_task(task);
1425 if (status != 0) { 1470 if (status != 0) {
1426 data->cancelled = 1; 1471 data->cancelled = 1;
1427 smp_wmb(); 1472 smp_wmb();
1428 } else 1473 } else
1429 status = data->rpc_status; 1474 status = data->rpc_status;
1430 rpc_put_task(task); 1475 rpc_put_task(task);
1476
1477 return status;
1478}
1479
1480static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1481{
1482 struct inode *dir = data->dir->d_inode;
1483 struct nfs_openres *o_res = &data->o_res;
1484 int status;
1485
1486 status = nfs4_run_open_task(data, 1);
1431 if (status != 0 || !data->rpc_done) 1487 if (status != 0 || !data->rpc_done)
1432 return status; 1488 return status;
1433 1489
1434 if (o_res->fh.size == 0) 1490 nfs_refresh_inode(dir, o_res->dir_attr);
1435 _nfs4_proc_lookup(dir, o_arg->name, &o_res->fh, o_res->f_attr); 1491
1492 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1493 status = _nfs4_proc_open_confirm(data);
1494 if (status != 0)
1495 return status;
1496 }
1497
1498 return status;
1499}
1500
1501/*
1502 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
1503 */
1504static int _nfs4_proc_open(struct nfs4_opendata *data)
1505{
1506 struct inode *dir = data->dir->d_inode;
1507 struct nfs_server *server = NFS_SERVER(dir);
1508 struct nfs_openargs *o_arg = &data->o_arg;
1509 struct nfs_openres *o_res = &data->o_res;
1510 int status;
1511
1512 status = nfs4_run_open_task(data, 0);
1513 if (status != 0 || !data->rpc_done)
1514 return status;
1436 1515
1437 if (o_arg->open_flags & O_CREAT) { 1516 if (o_arg->open_flags & O_CREAT) {
1438 update_changeattr(dir, &o_res->cinfo); 1517 update_changeattr(dir, &o_res->cinfo);
@@ -1488,7 +1567,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
1488 return ret; 1567 return ret;
1489} 1568}
1490 1569
1491static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) 1570static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
1492{ 1571{
1493 struct nfs_server *server = NFS_SERVER(state->inode); 1572 struct nfs_server *server = NFS_SERVER(state->inode);
1494 struct nfs4_exception exception = { }; 1573 struct nfs4_exception exception = { };
@@ -1496,10 +1575,16 @@ static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4
1496 1575
1497 do { 1576 do {
1498 err = _nfs4_open_expired(ctx, state); 1577 err = _nfs4_open_expired(ctx, state);
1499 if (err != -NFS4ERR_DELAY) 1578 switch (err) {
1500 break; 1579 default:
1501 nfs4_handle_exception(server, err, &exception); 1580 goto out;
1581 case -NFS4ERR_GRACE:
1582 case -NFS4ERR_DELAY:
1583 nfs4_handle_exception(server, err, &exception);
1584 err = 0;
1585 }
1502 } while (exception.retry); 1586 } while (exception.retry);
1587out:
1503 return err; 1588 return err;
1504} 1589}
1505 1590
@@ -1712,6 +1797,18 @@ static void nfs4_free_closedata(void *data)
1712 kfree(calldata); 1797 kfree(calldata);
1713} 1798}
1714 1799
1800static void nfs4_close_clear_stateid_flags(struct nfs4_state *state,
1801 fmode_t fmode)
1802{
1803 spin_lock(&state->owner->so_lock);
1804 if (!(fmode & FMODE_READ))
1805 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1806 if (!(fmode & FMODE_WRITE))
1807 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1808 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1809 spin_unlock(&state->owner->so_lock);
1810}
1811
1715static void nfs4_close_done(struct rpc_task *task, void *data) 1812static void nfs4_close_done(struct rpc_task *task, void *data)
1716{ 1813{
1717 struct nfs4_closedata *calldata = data; 1814 struct nfs4_closedata *calldata = data;
@@ -1728,6 +1825,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1728 case 0: 1825 case 0:
1729 nfs_set_open_stateid(state, &calldata->res.stateid, 0); 1826 nfs_set_open_stateid(state, &calldata->res.stateid, 0);
1730 renew_lease(server, calldata->timestamp); 1827 renew_lease(server, calldata->timestamp);
1828 nfs4_close_clear_stateid_flags(state,
1829 calldata->arg.fmode);
1731 break; 1830 break;
1732 case -NFS4ERR_STALE_STATEID: 1831 case -NFS4ERR_STALE_STATEID:
1733 case -NFS4ERR_OLD_STATEID: 1832 case -NFS4ERR_OLD_STATEID:
@@ -1736,12 +1835,10 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1736 if (calldata->arg.fmode == 0) 1835 if (calldata->arg.fmode == 0)
1737 break; 1836 break;
1738 default: 1837 default:
1739 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { 1838 if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
1740 nfs4_restart_rpc(task, server->nfs_client); 1839 rpc_restart_call_prepare(task);
1741 return;
1742 }
1743 } 1840 }
1744 nfs4_sequence_free_slot(server->nfs_client, &calldata->res.seq_res); 1841 nfs_release_seqid(calldata->arg.seqid);
1745 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 1842 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
1746} 1843}
1747 1844
@@ -1749,38 +1846,39 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1749{ 1846{
1750 struct nfs4_closedata *calldata = data; 1847 struct nfs4_closedata *calldata = data;
1751 struct nfs4_state *state = calldata->state; 1848 struct nfs4_state *state = calldata->state;
1752 int clear_rd, clear_wr, clear_rdwr; 1849 int call_close = 0;
1753 1850
1754 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 1851 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
1755 return; 1852 return;
1756 1853
1757 clear_rd = clear_wr = clear_rdwr = 0; 1854 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
1855 calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
1758 spin_lock(&state->owner->so_lock); 1856 spin_lock(&state->owner->so_lock);
1759 /* Calculate the change in open mode */ 1857 /* Calculate the change in open mode */
1760 if (state->n_rdwr == 0) { 1858 if (state->n_rdwr == 0) {
1761 if (state->n_rdonly == 0) { 1859 if (state->n_rdonly == 0) {
1762 clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags); 1860 call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
1763 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); 1861 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
1862 calldata->arg.fmode &= ~FMODE_READ;
1764 } 1863 }
1765 if (state->n_wronly == 0) { 1864 if (state->n_wronly == 0) {
1766 clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags); 1865 call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
1767 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); 1866 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
1867 calldata->arg.fmode &= ~FMODE_WRITE;
1768 } 1868 }
1769 } 1869 }
1770 spin_unlock(&state->owner->so_lock); 1870 spin_unlock(&state->owner->so_lock);
1771 if (!clear_rd && !clear_wr && !clear_rdwr) { 1871
1872 if (!call_close) {
1772 /* Note: exit _without_ calling nfs4_close_done */ 1873 /* Note: exit _without_ calling nfs4_close_done */
1773 task->tk_action = NULL; 1874 task->tk_action = NULL;
1774 return; 1875 return;
1775 } 1876 }
1877
1878 if (calldata->arg.fmode == 0)
1879 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
1880
1776 nfs_fattr_init(calldata->res.fattr); 1881 nfs_fattr_init(calldata->res.fattr);
1777 if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
1778 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
1779 calldata->arg.fmode = FMODE_READ;
1780 } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
1781 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
1782 calldata->arg.fmode = FMODE_WRITE;
1783 }
1784 calldata->timestamp = jiffies; 1882 calldata->timestamp = jiffies;
1785 if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client, 1883 if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client,
1786 &calldata->arg.seq_args, &calldata->res.seq_res, 1884 &calldata->arg.seq_args, &calldata->res.seq_res,
@@ -1832,8 +1930,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1832 calldata->state = state; 1930 calldata->state = state;
1833 calldata->arg.fh = NFS_FH(state->inode); 1931 calldata->arg.fh = NFS_FH(state->inode);
1834 calldata->arg.stateid = &state->open_stateid; 1932 calldata->arg.stateid = &state->open_stateid;
1835 if (nfs4_has_session(server->nfs_client))
1836 memset(calldata->arg.stateid->data, 0, 4); /* clear seqid */
1837 /* Serialization for the sequence id */ 1933 /* Serialization for the sequence id */
1838 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1934 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1839 if (calldata->arg.seqid == NULL) 1935 if (calldata->arg.seqid == NULL)
@@ -1981,7 +2077,7 @@ out_drop:
1981 return 0; 2077 return 0;
1982} 2078}
1983 2079
1984void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) 2080static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
1985{ 2081{
1986 if (ctx->state == NULL) 2082 if (ctx->state == NULL)
1987 return; 2083 return;
@@ -2532,7 +2628,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2532 nfs4_sequence_done(res->server, &res->seq_res, task->tk_status); 2628 nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
2533 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2629 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2534 return 0; 2630 return 0;
2535 nfs4_sequence_free_slot(res->server->nfs_client, &res->seq_res);
2536 update_changeattr(dir, &res->cinfo); 2631 update_changeattr(dir, &res->cinfo);
2537 nfs_post_op_update_inode(dir, &res->dir_attr); 2632 nfs_post_op_update_inode(dir, &res->dir_attr);
2538 return 1; 2633 return 1;
@@ -2971,11 +3066,10 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
2971 3066
2972 dprintk("--> %s\n", __func__); 3067 dprintk("--> %s\n", __func__);
2973 3068
2974 /* nfs4_sequence_free_slot called in the read rpc_call_done */
2975 nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); 3069 nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
2976 3070
2977 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3071 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
2978 nfs4_restart_rpc(task, server->nfs_client); 3072 nfs_restart_rpc(task, server->nfs_client);
2979 return -EAGAIN; 3073 return -EAGAIN;
2980 } 3074 }
2981 3075
@@ -2995,12 +3089,11 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
2995{ 3089{
2996 struct inode *inode = data->inode; 3090 struct inode *inode = data->inode;
2997 3091
2998 /* slot is freed in nfs_writeback_done */
2999 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, 3092 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
3000 task->tk_status); 3093 task->tk_status);
3001 3094
3002 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3095 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3003 nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3096 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
3004 return -EAGAIN; 3097 return -EAGAIN;
3005 } 3098 }
3006 if (task->tk_status >= 0) { 3099 if (task->tk_status >= 0) {
@@ -3028,11 +3121,9 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3028 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, 3121 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
3029 task->tk_status); 3122 task->tk_status);
3030 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3123 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
3031 nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3124 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
3032 return -EAGAIN; 3125 return -EAGAIN;
3033 } 3126 }
3034 nfs4_sequence_free_slot(NFS_SERVER(inode)->nfs_client,
3035 &data->res.seq_res);
3036 nfs_refresh_inode(inode, data->res.fattr); 3127 nfs_refresh_inode(inode, data->res.fattr);
3037 return 0; 3128 return 0;
3038} 3129}
@@ -3350,7 +3441,7 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3350 case -NFS4ERR_SEQ_MISORDERED: 3441 case -NFS4ERR_SEQ_MISORDERED:
3351 dprintk("%s ERROR %d, Reset session\n", __func__, 3442 dprintk("%s ERROR %d, Reset session\n", __func__,
3352 task->tk_status); 3443 task->tk_status);
3353 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); 3444 nfs4_schedule_state_recovery(clp);
3354 task->tk_status = 0; 3445 task->tk_status = 0;
3355 return -EAGAIN; 3446 return -EAGAIN;
3356#endif /* CONFIG_NFS_V4_1 */ 3447#endif /* CONFIG_NFS_V4_1 */
@@ -3483,12 +3574,23 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
3483{ 3574{
3484 struct nfs4_delegreturndata *data = calldata; 3575 struct nfs4_delegreturndata *data = calldata;
3485 3576
3486 nfs4_sequence_done_free_slot(data->res.server, &data->res.seq_res, 3577 nfs4_sequence_done(data->res.server, &data->res.seq_res,
3487 task->tk_status); 3578 task->tk_status);
3488 3579
3489 data->rpc_status = task->tk_status; 3580 switch (task->tk_status) {
3490 if (data->rpc_status == 0) 3581 case -NFS4ERR_STALE_STATEID:
3582 case -NFS4ERR_EXPIRED:
3583 case 0:
3491 renew_lease(data->res.server, data->timestamp); 3584 renew_lease(data->res.server, data->timestamp);
3585 break;
3586 default:
3587 if (nfs4_async_handle_error(task, data->res.server, NULL) ==
3588 -EAGAIN) {
3589 nfs_restart_rpc(task, data->res.server->nfs_client);
3590 return;
3591 }
3592 }
3593 data->rpc_status = task->tk_status;
3492} 3594}
3493 3595
3494static void nfs4_delegreturn_release(void *calldata) 3596static void nfs4_delegreturn_release(void *calldata)
@@ -3741,11 +3843,9 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3741 break; 3843 break;
3742 default: 3844 default:
3743 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) 3845 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
3744 nfs4_restart_rpc(task, 3846 nfs_restart_rpc(task,
3745 calldata->server->nfs_client); 3847 calldata->server->nfs_client);
3746 } 3848 }
3747 nfs4_sequence_free_slot(calldata->server->nfs_client,
3748 &calldata->res.seq_res);
3749} 3849}
3750 3850
3751static void nfs4_locku_prepare(struct rpc_task *task, void *data) 3851static void nfs4_locku_prepare(struct rpc_task *task, void *data)
@@ -3921,14 +4021,20 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
3921 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4021 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
3922} 4022}
3923 4023
4024static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
4025{
4026 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4027 nfs4_lock_prepare(task, calldata);
4028}
4029
3924static void nfs4_lock_done(struct rpc_task *task, void *calldata) 4030static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3925{ 4031{
3926 struct nfs4_lockdata *data = calldata; 4032 struct nfs4_lockdata *data = calldata;
3927 4033
3928 dprintk("%s: begin!\n", __func__); 4034 dprintk("%s: begin!\n", __func__);
3929 4035
3930 nfs4_sequence_done_free_slot(data->server, &data->res.seq_res, 4036 nfs4_sequence_done(data->server, &data->res.seq_res,
3931 task->tk_status); 4037 task->tk_status);
3932 4038
3933 data->rpc_status = task->tk_status; 4039 data->rpc_status = task->tk_status;
3934 if (RPC_ASSASSINATED(task)) 4040 if (RPC_ASSASSINATED(task))
@@ -3976,7 +4082,13 @@ static const struct rpc_call_ops nfs4_lock_ops = {
3976 .rpc_release = nfs4_lock_release, 4082 .rpc_release = nfs4_lock_release,
3977}; 4083};
3978 4084
3979static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int reclaim) 4085static const struct rpc_call_ops nfs4_recover_lock_ops = {
4086 .rpc_call_prepare = nfs4_recover_lock_prepare,
4087 .rpc_call_done = nfs4_lock_done,
4088 .rpc_release = nfs4_lock_release,
4089};
4090
4091static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
3980{ 4092{
3981 struct nfs4_lockdata *data; 4093 struct nfs4_lockdata *data;
3982 struct rpc_task *task; 4094 struct rpc_task *task;
@@ -4000,8 +4112,11 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4000 return -ENOMEM; 4112 return -ENOMEM;
4001 if (IS_SETLKW(cmd)) 4113 if (IS_SETLKW(cmd))
4002 data->arg.block = 1; 4114 data->arg.block = 1;
4003 if (reclaim != 0) 4115 if (recovery_type > NFS_LOCK_NEW) {
4004 data->arg.reclaim = 1; 4116 if (recovery_type == NFS_LOCK_RECLAIM)
4117 data->arg.reclaim = NFS_LOCK_RECLAIM;
4118 task_setup_data.callback_ops = &nfs4_recover_lock_ops;
4119 }
4005 msg.rpc_argp = &data->arg, 4120 msg.rpc_argp = &data->arg,
4006 msg.rpc_resp = &data->res, 4121 msg.rpc_resp = &data->res,
4007 task_setup_data.callback_data = data; 4122 task_setup_data.callback_data = data;
@@ -4028,7 +4143,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4028 /* Cache the lock if possible... */ 4143 /* Cache the lock if possible... */
4029 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4144 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4030 return 0; 4145 return 0;
4031 err = _nfs4_do_setlk(state, F_SETLK, request, 1); 4146 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
4032 if (err != -NFS4ERR_DELAY) 4147 if (err != -NFS4ERR_DELAY)
4033 break; 4148 break;
4034 nfs4_handle_exception(server, err, &exception); 4149 nfs4_handle_exception(server, err, &exception);
@@ -4048,11 +4163,17 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
4048 do { 4163 do {
4049 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4164 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4050 return 0; 4165 return 0;
4051 err = _nfs4_do_setlk(state, F_SETLK, request, 0); 4166 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
4052 if (err != -NFS4ERR_DELAY) 4167 switch (err) {
4053 break; 4168 default:
4054 nfs4_handle_exception(server, err, &exception); 4169 goto out;
4170 case -NFS4ERR_GRACE:
4171 case -NFS4ERR_DELAY:
4172 nfs4_handle_exception(server, err, &exception);
4173 err = 0;
4174 }
4055 } while (exception.retry); 4175 } while (exception.retry);
4176out:
4056 return err; 4177 return err;
4057} 4178}
4058 4179
@@ -4078,7 +4199,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4078 status = do_vfs_lock(request->fl_file, request); 4199 status = do_vfs_lock(request->fl_file, request);
4079 goto out_unlock; 4200 goto out_unlock;
4080 } 4201 }
4081 status = _nfs4_do_setlk(state, cmd, request, 0); 4202 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
4082 if (status != 0) 4203 if (status != 0)
4083 goto out_unlock; 4204 goto out_unlock;
4084 /* Note: we always want to sleep here! */ 4205 /* Note: we always want to sleep here! */
@@ -4161,7 +4282,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4161 if (err != 0) 4282 if (err != 0)
4162 goto out; 4283 goto out;
4163 do { 4284 do {
4164 err = _nfs4_do_setlk(state, F_SETLK, fl, 0); 4285 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
4165 switch (err) { 4286 switch (err) {
4166 default: 4287 default:
4167 printk(KERN_ERR "%s: unhandled error %d.\n", 4288 printk(KERN_ERR "%s: unhandled error %d.\n",
@@ -4172,6 +4293,11 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4172 case -NFS4ERR_EXPIRED: 4293 case -NFS4ERR_EXPIRED:
4173 case -NFS4ERR_STALE_CLIENTID: 4294 case -NFS4ERR_STALE_CLIENTID:
4174 case -NFS4ERR_STALE_STATEID: 4295 case -NFS4ERR_STALE_STATEID:
4296 case -NFS4ERR_BADSESSION:
4297 case -NFS4ERR_BADSLOT:
4298 case -NFS4ERR_BAD_HIGH_SLOT:
4299 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4300 case -NFS4ERR_DEADSESSION:
4175 nfs4_schedule_state_recovery(server->nfs_client); 4301 nfs4_schedule_state_recovery(server->nfs_client);
4176 goto out; 4302 goto out;
4177 case -ERESTARTSYS: 4303 case -ERESTARTSYS:
@@ -4296,7 +4422,7 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4296 * NFS4ERR_BADSESSION in the sequence operation, and will therefore 4422 * NFS4ERR_BADSESSION in the sequence operation, and will therefore
4297 * be in some phase of session reset. 4423 * be in some phase of session reset.
4298 */ 4424 */
4299static int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) 4425int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4300{ 4426{
4301 nfs4_verifier verifier; 4427 nfs4_verifier verifier;
4302 struct nfs41_exchange_id_args args = { 4428 struct nfs41_exchange_id_args args = {
@@ -4318,6 +4444,9 @@ static int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4318 dprintk("--> %s\n", __func__); 4444 dprintk("--> %s\n", __func__);
4319 BUG_ON(clp == NULL); 4445 BUG_ON(clp == NULL);
4320 4446
4447 /* Remove server-only flags */
4448 args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R;
4449
4321 p = (u32 *)verifier.data; 4450 p = (u32 *)verifier.data;
4322 *p++ = htonl((u32)clp->cl_boot_time.tv_sec); 4451 *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
4323 *p = htonl((u32)clp->cl_boot_time.tv_nsec); 4452 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
@@ -4361,11 +4490,12 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
4361 (struct nfs4_get_lease_time_data *)calldata; 4490 (struct nfs4_get_lease_time_data *)calldata;
4362 4491
4363 dprintk("--> %s\n", __func__); 4492 dprintk("--> %s\n", __func__);
4493 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4364 /* just setup sequence, do not trigger session recovery 4494 /* just setup sequence, do not trigger session recovery
4365 since we're invoked within one */ 4495 since we're invoked within one */
4366 ret = nfs41_setup_sequence(data->clp->cl_session, 4496 ret = nfs41_setup_sequence(data->clp->cl_session,
4367 &data->args->la_seq_args, 4497 &data->args->la_seq_args,
4368 &data->res->lr_seq_res, 0, task); 4498 &data->res->lr_seq_res, 0, task);
4369 4499
4370 BUG_ON(ret == -EAGAIN); 4500 BUG_ON(ret == -EAGAIN);
4371 rpc_call_start(task); 4501 rpc_call_start(task);
@@ -4389,10 +4519,9 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4389 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); 4519 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
4390 rpc_delay(task, NFS4_POLL_RETRY_MIN); 4520 rpc_delay(task, NFS4_POLL_RETRY_MIN);
4391 task->tk_status = 0; 4521 task->tk_status = 0;
4392 nfs4_restart_rpc(task, data->clp); 4522 nfs_restart_rpc(task, data->clp);
4393 return; 4523 return;
4394 } 4524 }
4395 nfs41_sequence_free_slot(data->clp, &data->res->lr_seq_res);
4396 dprintk("<-- %s\n", __func__); 4525 dprintk("<-- %s\n", __func__);
4397} 4526}
4398 4527
@@ -4465,7 +4594,6 @@ static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots,
4465 spin_lock(&tbl->slot_tbl_lock); 4594 spin_lock(&tbl->slot_tbl_lock);
4466 for (i = 0; i < max_slots; ++i) 4595 for (i = 0; i < max_slots; ++i)
4467 tbl->slots[i].seq_nr = ivalue; 4596 tbl->slots[i].seq_nr = ivalue;
4468 tbl->highest_used_slotid = -1;
4469 spin_unlock(&tbl->slot_tbl_lock); 4597 spin_unlock(&tbl->slot_tbl_lock);
4470 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, 4598 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
4471 tbl, tbl->slots, tbl->max_slots); 4599 tbl, tbl->slots, tbl->max_slots);
@@ -4515,7 +4643,6 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session)
4515static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, 4643static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
4516 int max_slots, int ivalue) 4644 int max_slots, int ivalue)
4517{ 4645{
4518 int i;
4519 struct nfs4_slot *slot; 4646 struct nfs4_slot *slot;
4520 int ret = -ENOMEM; 4647 int ret = -ENOMEM;
4521 4648
@@ -4526,18 +4653,9 @@ static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
4526 slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL); 4653 slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL);
4527 if (!slot) 4654 if (!slot)
4528 goto out; 4655 goto out;
4529 for (i = 0; i < max_slots; ++i)
4530 slot[i].seq_nr = ivalue;
4531 ret = 0; 4656 ret = 0;
4532 4657
4533 spin_lock(&tbl->slot_tbl_lock); 4658 spin_lock(&tbl->slot_tbl_lock);
4534 if (tbl->slots != NULL) {
4535 spin_unlock(&tbl->slot_tbl_lock);
4536 dprintk("%s: slot table already initialized. tbl=%p slots=%p\n",
4537 __func__, tbl, tbl->slots);
4538 WARN_ON(1);
4539 goto out_free;
4540 }
4541 tbl->max_slots = max_slots; 4659 tbl->max_slots = max_slots;
4542 tbl->slots = slot; 4660 tbl->slots = slot;
4543 tbl->highest_used_slotid = -1; /* no slot is currently used */ 4661 tbl->highest_used_slotid = -1; /* no slot is currently used */
@@ -4547,10 +4665,6 @@ static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
4547out: 4665out:
4548 dprintk("<-- %s: return %d\n", __func__, ret); 4666 dprintk("<-- %s: return %d\n", __func__, ret);
4549 return ret; 4667 return ret;
4550
4551out_free:
4552 kfree(slot);
4553 goto out;
4554} 4668}
4555 4669
4556/* 4670/*
@@ -4558,17 +4672,24 @@ out_free:
4558 */ 4672 */
4559static int nfs4_init_slot_tables(struct nfs4_session *session) 4673static int nfs4_init_slot_tables(struct nfs4_session *session)
4560{ 4674{
4561 int status; 4675 struct nfs4_slot_table *tbl;
4676 int status = 0;
4562 4677
4563 status = nfs4_init_slot_table(&session->fc_slot_table, 4678 tbl = &session->fc_slot_table;
4564 session->fc_attrs.max_reqs, 1); 4679 if (tbl->slots == NULL) {
4565 if (status) 4680 status = nfs4_init_slot_table(tbl,
4566 return status; 4681 session->fc_attrs.max_reqs, 1);
4682 if (status)
4683 return status;
4684 }
4567 4685
4568 status = nfs4_init_slot_table(&session->bc_slot_table, 4686 tbl = &session->bc_slot_table;
4569 session->bc_attrs.max_reqs, 0); 4687 if (tbl->slots == NULL) {
4570 if (status) 4688 status = nfs4_init_slot_table(tbl,
4571 nfs4_destroy_slot_tables(session); 4689 session->bc_attrs.max_reqs, 0);
4690 if (status)
4691 nfs4_destroy_slot_tables(session);
4692 }
4572 4693
4573 return status; 4694 return status;
4574} 4695}
@@ -4582,7 +4703,6 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4582 if (!session) 4703 if (!session)
4583 return NULL; 4704 return NULL;
4584 4705
4585 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
4586 /* 4706 /*
4587 * The create session reply races with the server back 4707 * The create session reply races with the server back
4588 * channel probe. Mark the client NFS_CS_SESSION_INITING 4708 * channel probe. Mark the client NFS_CS_SESSION_INITING
@@ -4590,12 +4710,15 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4590 * nfs_client struct 4710 * nfs_client struct
4591 */ 4711 */
4592 clp->cl_cons_state = NFS_CS_SESSION_INITING; 4712 clp->cl_cons_state = NFS_CS_SESSION_INITING;
4713 init_completion(&session->complete);
4593 4714
4594 tbl = &session->fc_slot_table; 4715 tbl = &session->fc_slot_table;
4716 tbl->highest_used_slotid = -1;
4595 spin_lock_init(&tbl->slot_tbl_lock); 4717 spin_lock_init(&tbl->slot_tbl_lock);
4596 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); 4718 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
4597 4719
4598 tbl = &session->bc_slot_table; 4720 tbl = &session->bc_slot_table;
4721 tbl->highest_used_slotid = -1;
4599 spin_lock_init(&tbl->slot_tbl_lock); 4722 spin_lock_init(&tbl->slot_tbl_lock);
4600 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); 4723 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
4601 4724
@@ -4747,11 +4870,10 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
4747 * It is the responsibility of the caller to verify the session is 4870 * It is the responsibility of the caller to verify the session is
4748 * expired before calling this routine. 4871 * expired before calling this routine.
4749 */ 4872 */
4750int nfs4_proc_create_session(struct nfs_client *clp, int reset) 4873int nfs4_proc_create_session(struct nfs_client *clp)
4751{ 4874{
4752 int status; 4875 int status;
4753 unsigned *ptr; 4876 unsigned *ptr;
4754 struct nfs_fsinfo fsinfo;
4755 struct nfs4_session *session = clp->cl_session; 4877 struct nfs4_session *session = clp->cl_session;
4756 4878
4757 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 4879 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
@@ -4760,35 +4882,19 @@ int nfs4_proc_create_session(struct nfs_client *clp, int reset)
4760 if (status) 4882 if (status)
4761 goto out; 4883 goto out;
4762 4884
4763 /* Init or reset the fore channel */ 4885 /* Init and reset the fore channel */
4764 if (reset) 4886 status = nfs4_init_slot_tables(session);
4765 status = nfs4_reset_slot_tables(session); 4887 dprintk("slot table initialization returned %d\n", status);
4766 else 4888 if (status)
4767 status = nfs4_init_slot_tables(session); 4889 goto out;
4768 dprintk("fore channel slot table initialization returned %d\n", status); 4890 status = nfs4_reset_slot_tables(session);
4891 dprintk("slot table reset returned %d\n", status);
4769 if (status) 4892 if (status)
4770 goto out; 4893 goto out;
4771 4894
4772 ptr = (unsigned *)&session->sess_id.data[0]; 4895 ptr = (unsigned *)&session->sess_id.data[0];
4773 dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, 4896 dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__,
4774 clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); 4897 clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]);
4775
4776 if (reset)
4777 /* Lease time is aleady set */
4778 goto out;
4779
4780 /* Get the lease time */
4781 status = nfs4_proc_get_lease_time(clp, &fsinfo);
4782 if (status == 0) {
4783 /* Update lease time and schedule renewal */
4784 spin_lock(&clp->cl_lock);
4785 clp->cl_lease_time = fsinfo.lease_time * HZ;
4786 clp->cl_last_renewal = jiffies;
4787 clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
4788 spin_unlock(&clp->cl_lock);
4789
4790 nfs4_schedule_state_renewal(clp);
4791 }
4792out: 4898out:
4793 dprintk("<-- %s\n", __func__); 4899 dprintk("<-- %s\n", __func__);
4794 return status; 4900 return status;
@@ -4827,13 +4933,24 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
4827int nfs4_init_session(struct nfs_server *server) 4933int nfs4_init_session(struct nfs_server *server)
4828{ 4934{
4829 struct nfs_client *clp = server->nfs_client; 4935 struct nfs_client *clp = server->nfs_client;
4936 struct nfs4_session *session;
4937 unsigned int rsize, wsize;
4830 int ret; 4938 int ret;
4831 4939
4832 if (!nfs4_has_session(clp)) 4940 if (!nfs4_has_session(clp))
4833 return 0; 4941 return 0;
4834 4942
4835 clp->cl_session->fc_attrs.max_rqst_sz = server->wsize; 4943 rsize = server->rsize;
4836 clp->cl_session->fc_attrs.max_resp_sz = server->rsize; 4944 if (rsize == 0)
4945 rsize = NFS_MAX_FILE_IO_SIZE;
4946 wsize = server->wsize;
4947 if (wsize == 0)
4948 wsize = NFS_MAX_FILE_IO_SIZE;
4949
4950 session = clp->cl_session;
4951 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
4952 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
4953
4837 ret = nfs4_recover_expired_lease(server); 4954 ret = nfs4_recover_expired_lease(server);
4838 if (!ret) 4955 if (!ret)
4839 ret = nfs4_check_client_ready(clp); 4956 ret = nfs4_check_client_ready(clp);
@@ -4858,7 +4975,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
4858 args.sa_cache_this = 0; 4975 args.sa_cache_this = 0;
4859 4976
4860 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args, 4977 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
4861 &res, 0); 4978 &res, args.sa_cache_this, 1);
4862} 4979}
4863 4980
4864void nfs41_sequence_call_done(struct rpc_task *task, void *data) 4981void nfs41_sequence_call_done(struct rpc_task *task, void *data)
@@ -4872,11 +4989,10 @@ void nfs41_sequence_call_done(struct rpc_task *task, void *data)
4872 4989
4873 if (_nfs4_async_handle_error(task, NULL, clp, NULL) 4990 if (_nfs4_async_handle_error(task, NULL, clp, NULL)
4874 == -EAGAIN) { 4991 == -EAGAIN) {
4875 nfs4_restart_rpc(task, clp); 4992 nfs_restart_rpc(task, clp);
4876 return; 4993 return;
4877 } 4994 }
4878 } 4995 }
4879 nfs41_sequence_free_slot(clp, task->tk_msg.rpc_resp);
4880 dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); 4996 dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
4881 4997
4882 kfree(task->tk_msg.rpc_argp); 4998 kfree(task->tk_msg.rpc_argp);
@@ -4931,6 +5047,110 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp,
4931 &nfs41_sequence_ops, (void *)clp); 5047 &nfs41_sequence_ops, (void *)clp);
4932} 5048}
4933 5049
5050struct nfs4_reclaim_complete_data {
5051 struct nfs_client *clp;
5052 struct nfs41_reclaim_complete_args arg;
5053 struct nfs41_reclaim_complete_res res;
5054};
5055
5056static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
5057{
5058 struct nfs4_reclaim_complete_data *calldata = data;
5059
5060 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
5061 if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
5062 &calldata->res.seq_res, 0, task))
5063 return;
5064
5065 rpc_call_start(task);
5066}
5067
5068static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
5069{
5070 struct nfs4_reclaim_complete_data *calldata = data;
5071 struct nfs_client *clp = calldata->clp;
5072 struct nfs4_sequence_res *res = &calldata->res.seq_res;
5073
5074 dprintk("--> %s\n", __func__);
5075 nfs41_sequence_done(clp, res, task->tk_status);
5076 switch (task->tk_status) {
5077 case 0:
5078 case -NFS4ERR_COMPLETE_ALREADY:
5079 break;
5080 case -NFS4ERR_BADSESSION:
5081 case -NFS4ERR_DEADSESSION:
5082 /*
5083 * Handle the session error, but do not retry the operation, as
5084 * we have no way of telling whether the clientid had to be
5085 * reset before we got our reply. If reset, a new wave of
5086 * reclaim operations will follow, containing their own reclaim
5087 * complete. We don't want our retry to get on the way of
5088 * recovery by incorrectly indicating to the server that we're
5089 * done reclaiming state since the process had to be restarted.
5090 */
5091 _nfs4_async_handle_error(task, NULL, clp, NULL);
5092 break;
5093 default:
5094 if (_nfs4_async_handle_error(
5095 task, NULL, clp, NULL) == -EAGAIN) {
5096 rpc_restart_call_prepare(task);
5097 return;
5098 }
5099 }
5100
5101 dprintk("<-- %s\n", __func__);
5102}
5103
5104static void nfs4_free_reclaim_complete_data(void *data)
5105{
5106 struct nfs4_reclaim_complete_data *calldata = data;
5107
5108 kfree(calldata);
5109}
5110
5111static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = {
5112 .rpc_call_prepare = nfs4_reclaim_complete_prepare,
5113 .rpc_call_done = nfs4_reclaim_complete_done,
5114 .rpc_release = nfs4_free_reclaim_complete_data,
5115};
5116
5117/*
5118 * Issue a global reclaim complete.
5119 */
5120static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5121{
5122 struct nfs4_reclaim_complete_data *calldata;
5123 struct rpc_task *task;
5124 struct rpc_message msg = {
5125 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE],
5126 };
5127 struct rpc_task_setup task_setup_data = {
5128 .rpc_client = clp->cl_rpcclient,
5129 .rpc_message = &msg,
5130 .callback_ops = &nfs4_reclaim_complete_call_ops,
5131 .flags = RPC_TASK_ASYNC,
5132 };
5133 int status = -ENOMEM;
5134
5135 dprintk("--> %s\n", __func__);
5136 calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
5137 if (calldata == NULL)
5138 goto out;
5139 calldata->clp = clp;
5140 calldata->arg.one_fs = 0;
5141 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
5142
5143 msg.rpc_argp = &calldata->arg;
5144 msg.rpc_resp = &calldata->res;
5145 task_setup_data.callback_data = calldata;
5146 task = rpc_run_task(&task_setup_data);
5147 if (IS_ERR(task))
5148 status = PTR_ERR(task);
5149 rpc_put_task(task);
5150out:
5151 dprintk("<-- %s status=%d\n", __func__, status);
5152 return status;
5153}
4934#endif /* CONFIG_NFS_V4_1 */ 5154#endif /* CONFIG_NFS_V4_1 */
4935 5155
4936struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5156struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -4948,8 +5168,9 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
4948 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, 5168 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
4949 .recover_open = nfs4_open_reclaim, 5169 .recover_open = nfs4_open_reclaim,
4950 .recover_lock = nfs4_lock_reclaim, 5170 .recover_lock = nfs4_lock_reclaim,
4951 .establish_clid = nfs4_proc_exchange_id, 5171 .establish_clid = nfs41_init_clientid,
4952 .get_clid_cred = nfs4_get_exchange_id_cred, 5172 .get_clid_cred = nfs4_get_exchange_id_cred,
5173 .reclaim_complete = nfs41_proc_reclaim_complete,
4953}; 5174};
4954#endif /* CONFIG_NFS_V4_1 */ 5175#endif /* CONFIG_NFS_V4_1 */
4955 5176
@@ -4968,7 +5189,7 @@ struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
4968 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 5189 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
4969 .recover_open = nfs4_open_expired, 5190 .recover_open = nfs4_open_expired,
4970 .recover_lock = nfs4_lock_expired, 5191 .recover_lock = nfs4_lock_expired,
4971 .establish_clid = nfs4_proc_exchange_id, 5192 .establish_clid = nfs41_init_clientid,
4972 .get_clid_cred = nfs4_get_exchange_id_cred, 5193 .get_clid_cred = nfs4_get_exchange_id_cred,
4973}; 5194};
4974#endif /* CONFIG_NFS_V4_1 */ 5195#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2ef4fecf398..6d263ed79e9 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -116,6 +116,79 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
116 116
117#if defined(CONFIG_NFS_V4_1) 117#if defined(CONFIG_NFS_V4_1)
118 118
119static int nfs41_setup_state_renewal(struct nfs_client *clp)
120{
121 int status;
122 struct nfs_fsinfo fsinfo;
123
124 status = nfs4_proc_get_lease_time(clp, &fsinfo);
125 if (status == 0) {
126 /* Update lease time and schedule renewal */
127 spin_lock(&clp->cl_lock);
128 clp->cl_lease_time = fsinfo.lease_time * HZ;
129 clp->cl_last_renewal = jiffies;
130 spin_unlock(&clp->cl_lock);
131
132 nfs4_schedule_state_renewal(clp);
133 }
134
135 return status;
136}
137
138static void nfs4_end_drain_session(struct nfs_client *clp)
139{
140 struct nfs4_session *ses = clp->cl_session;
141 int max_slots;
142
143 if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
144 spin_lock(&ses->fc_slot_table.slot_tbl_lock);
145 max_slots = ses->fc_slot_table.max_slots;
146 while (max_slots--) {
147 struct rpc_task *task;
148
149 task = rpc_wake_up_next(&ses->fc_slot_table.
150 slot_tbl_waitq);
151 if (!task)
152 break;
153 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
154 }
155 spin_unlock(&ses->fc_slot_table.slot_tbl_lock);
156 }
157}
158
159static int nfs4_begin_drain_session(struct nfs_client *clp)
160{
161 struct nfs4_session *ses = clp->cl_session;
162 struct nfs4_slot_table *tbl = &ses->fc_slot_table;
163
164 spin_lock(&tbl->slot_tbl_lock);
165 set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
166 if (tbl->highest_used_slotid != -1) {
167 INIT_COMPLETION(ses->complete);
168 spin_unlock(&tbl->slot_tbl_lock);
169 return wait_for_completion_interruptible(&ses->complete);
170 }
171 spin_unlock(&tbl->slot_tbl_lock);
172 return 0;
173}
174
175int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
176{
177 int status;
178
179 nfs4_begin_drain_session(clp);
180 status = nfs4_proc_exchange_id(clp, cred);
181 if (status != 0)
182 goto out;
183 status = nfs4_proc_create_session(clp);
184 if (status != 0)
185 goto out;
186 nfs41_setup_state_renewal(clp);
187 nfs_mark_client_ready(clp, NFS_CS_READY);
188out:
189 return status;
190}
191
119struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) 192struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
120{ 193{
121 struct rpc_cred *cred; 194 struct rpc_cred *cred;
@@ -693,16 +766,21 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
693 return new; 766 return new;
694} 767}
695 768
696void nfs_free_seqid(struct nfs_seqid *seqid) 769void nfs_release_seqid(struct nfs_seqid *seqid)
697{ 770{
698 if (!list_empty(&seqid->list)) { 771 if (!list_empty(&seqid->list)) {
699 struct rpc_sequence *sequence = seqid->sequence->sequence; 772 struct rpc_sequence *sequence = seqid->sequence->sequence;
700 773
701 spin_lock(&sequence->lock); 774 spin_lock(&sequence->lock);
702 list_del(&seqid->list); 775 list_del_init(&seqid->list);
703 spin_unlock(&sequence->lock); 776 spin_unlock(&sequence->lock);
704 rpc_wake_up(&sequence->wait); 777 rpc_wake_up(&sequence->wait);
705 } 778 }
779}
780
781void nfs_free_seqid(struct nfs_seqid *seqid)
782{
783 nfs_release_seqid(seqid);
706 kfree(seqid); 784 kfree(seqid);
707} 785}
708 786
@@ -877,6 +955,10 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
877 case -NFS4ERR_EXPIRED: 955 case -NFS4ERR_EXPIRED:
878 case -NFS4ERR_NO_GRACE: 956 case -NFS4ERR_NO_GRACE:
879 case -NFS4ERR_STALE_CLIENTID: 957 case -NFS4ERR_STALE_CLIENTID:
958 case -NFS4ERR_BADSESSION:
959 case -NFS4ERR_BADSLOT:
960 case -NFS4ERR_BAD_HIGH_SLOT:
961 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
880 goto out; 962 goto out;
881 default: 963 default:
882 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", 964 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
@@ -959,6 +1041,10 @@ restart:
959 case -NFS4ERR_NO_GRACE: 1041 case -NFS4ERR_NO_GRACE:
960 nfs4_state_mark_reclaim_nograce(sp->so_client, state); 1042 nfs4_state_mark_reclaim_nograce(sp->so_client, state);
961 case -NFS4ERR_STALE_CLIENTID: 1043 case -NFS4ERR_STALE_CLIENTID:
1044 case -NFS4ERR_BADSESSION:
1045 case -NFS4ERR_BADSLOT:
1046 case -NFS4ERR_BAD_HIGH_SLOT:
1047 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
962 goto out_err; 1048 goto out_err;
963 } 1049 }
964 nfs4_put_open_state(state); 1050 nfs4_put_open_state(state);
@@ -1011,6 +1097,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
1011 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); 1097 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
1012} 1098}
1013 1099
1100static void nfs4_reclaim_complete(struct nfs_client *clp,
1101 const struct nfs4_state_recovery_ops *ops)
1102{
1103 /* Notify the server we're done reclaiming our state */
1104 if (ops->reclaim_complete)
1105 (void)ops->reclaim_complete(clp);
1106}
1107
1014static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) 1108static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1015{ 1109{
1016 struct nfs4_state_owner *sp; 1110 struct nfs4_state_owner *sp;
@@ -1020,6 +1114,9 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1020 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1114 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1021 return; 1115 return;
1022 1116
1117 nfs4_reclaim_complete(clp,
1118 nfs4_reboot_recovery_ops[clp->cl_minorversion]);
1119
1023 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1120 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
1024 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1121 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
1025 spin_lock(&sp->so_lock); 1122 spin_lock(&sp->so_lock);
@@ -1046,25 +1143,25 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1046 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); 1143 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1047} 1144}
1048 1145
1049static void nfs4_state_end_reclaim_nograce(struct nfs_client *clp) 1146static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1050{
1051 clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
1052}
1053
1054static void nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1055{ 1147{
1056 switch (error) { 1148 switch (error) {
1057 case -NFS4ERR_CB_PATH_DOWN: 1149 case -NFS4ERR_CB_PATH_DOWN:
1058 nfs_handle_cb_pathdown(clp); 1150 nfs_handle_cb_pathdown(clp);
1059 break; 1151 return 0;
1152 case -NFS4ERR_NO_GRACE:
1153 nfs4_state_end_reclaim_reboot(clp);
1154 return 0;
1060 case -NFS4ERR_STALE_CLIENTID: 1155 case -NFS4ERR_STALE_CLIENTID:
1061 case -NFS4ERR_LEASE_MOVED: 1156 case -NFS4ERR_LEASE_MOVED:
1062 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1157 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1158 nfs4_state_end_reclaim_reboot(clp);
1063 nfs4_state_start_reclaim_reboot(clp); 1159 nfs4_state_start_reclaim_reboot(clp);
1064 break; 1160 break;
1065 case -NFS4ERR_EXPIRED: 1161 case -NFS4ERR_EXPIRED:
1066 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1162 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1067 nfs4_state_start_reclaim_nograce(clp); 1163 nfs4_state_start_reclaim_nograce(clp);
1164 break;
1068 case -NFS4ERR_BADSESSION: 1165 case -NFS4ERR_BADSESSION:
1069 case -NFS4ERR_BADSLOT: 1166 case -NFS4ERR_BADSLOT:
1070 case -NFS4ERR_BAD_HIGH_SLOT: 1167 case -NFS4ERR_BAD_HIGH_SLOT:
@@ -1072,8 +1169,11 @@ static void nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1072 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1169 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1073 case -NFS4ERR_SEQ_FALSE_RETRY: 1170 case -NFS4ERR_SEQ_FALSE_RETRY:
1074 case -NFS4ERR_SEQ_MISORDERED: 1171 case -NFS4ERR_SEQ_MISORDERED:
1075 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); 1172 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1173 /* Zero session reset errors */
1174 return 0;
1076 } 1175 }
1176 return error;
1077} 1177}
1078 1178
1079static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) 1179static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
@@ -1093,8 +1193,7 @@ restart:
1093 if (status < 0) { 1193 if (status < 0) {
1094 set_bit(ops->owner_flag_bit, &sp->so_flags); 1194 set_bit(ops->owner_flag_bit, &sp->so_flags);
1095 nfs4_put_state_owner(sp); 1195 nfs4_put_state_owner(sp);
1096 nfs4_recovery_handle_error(clp, status); 1196 return nfs4_recovery_handle_error(clp, status);
1097 return status;
1098 } 1197 }
1099 nfs4_put_state_owner(sp); 1198 nfs4_put_state_owner(sp);
1100 goto restart; 1199 goto restart;
@@ -1124,8 +1223,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
1124 status = ops->renew_lease(clp, cred); 1223 status = ops->renew_lease(clp, cred);
1125 put_rpccred(cred); 1224 put_rpccred(cred);
1126out: 1225out:
1127 nfs4_recovery_handle_error(clp, status); 1226 return nfs4_recovery_handle_error(clp, status);
1128 return status;
1129} 1227}
1130 1228
1131static int nfs4_reclaim_lease(struct nfs_client *clp) 1229static int nfs4_reclaim_lease(struct nfs_client *clp)
@@ -1151,55 +1249,59 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1151} 1249}
1152 1250
1153#ifdef CONFIG_NFS_V4_1 1251#ifdef CONFIG_NFS_V4_1
1154static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err) 1252void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1155{ 1253{
1156 switch (err) { 1254 if (!flags)
1157 case -NFS4ERR_STALE_CLIENTID: 1255 return;
1256 else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) {
1158 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1257 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1159 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); 1258 nfs4_state_start_reclaim_reboot(clp);
1160 } 1259 nfs4_schedule_state_recovery(clp);
1260 } else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
1261 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
1262 SEQ4_STATUS_ADMIN_STATE_REVOKED |
1263 SEQ4_STATUS_RECALLABLE_STATE_REVOKED |
1264 SEQ4_STATUS_LEASE_MOVED)) {
1265 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1266 nfs4_state_start_reclaim_nograce(clp);
1267 nfs4_schedule_state_recovery(clp);
1268 } else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1269 SEQ4_STATUS_BACKCHANNEL_FAULT |
1270 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
1271 nfs_expire_all_delegations(clp);
1161} 1272}
1162 1273
1163static int nfs4_reset_session(struct nfs_client *clp) 1274static int nfs4_reset_session(struct nfs_client *clp)
1164{ 1275{
1165 int status; 1276 int status;
1166 1277
1278 nfs4_begin_drain_session(clp);
1167 status = nfs4_proc_destroy_session(clp->cl_session); 1279 status = nfs4_proc_destroy_session(clp->cl_session);
1168 if (status && status != -NFS4ERR_BADSESSION && 1280 if (status && status != -NFS4ERR_BADSESSION &&
1169 status != -NFS4ERR_DEADSESSION) { 1281 status != -NFS4ERR_DEADSESSION) {
1170 nfs4_session_recovery_handle_error(clp, status); 1282 status = nfs4_recovery_handle_error(clp, status);
1171 goto out; 1283 goto out;
1172 } 1284 }
1173 1285
1174 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); 1286 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
1175 status = nfs4_proc_create_session(clp, 1); 1287 status = nfs4_proc_create_session(clp);
1176 if (status) 1288 if (status)
1177 nfs4_session_recovery_handle_error(clp, status); 1289 status = nfs4_recovery_handle_error(clp, status);
1178 /* fall through*/
1179out:
1180 /* Wake up the next rpc task even on error */
1181 rpc_wake_up_next(&clp->cl_session->fc_slot_table.slot_tbl_waitq);
1182 return status;
1183}
1184 1290
1185static int nfs4_initialize_session(struct nfs_client *clp) 1291out:
1186{ 1292 /*
1187 int status; 1293 * Let the state manager reestablish state
1294 */
1295 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1296 status == 0)
1297 nfs41_setup_state_renewal(clp);
1188 1298
1189 status = nfs4_proc_create_session(clp, 0);
1190 if (!status) {
1191 nfs_mark_client_ready(clp, NFS_CS_READY);
1192 } else if (status == -NFS4ERR_STALE_CLIENTID) {
1193 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1194 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
1195 } else {
1196 nfs_mark_client_ready(clp, status);
1197 }
1198 return status; 1299 return status;
1199} 1300}
1301
1200#else /* CONFIG_NFS_V4_1 */ 1302#else /* CONFIG_NFS_V4_1 */
1201static int nfs4_reset_session(struct nfs_client *clp) { return 0; } 1303static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1202static int nfs4_initialize_session(struct nfs_client *clp) { return 0; } 1304static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1203#endif /* CONFIG_NFS_V4_1 */ 1305#endif /* CONFIG_NFS_V4_1 */
1204 1306
1205/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors 1307/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
@@ -1234,7 +1336,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
1234 status = nfs4_reclaim_lease(clp); 1336 status = nfs4_reclaim_lease(clp);
1235 if (status) { 1337 if (status) {
1236 nfs4_set_lease_expired(clp, status); 1338 nfs4_set_lease_expired(clp, status);
1237 if (status == -EAGAIN) 1339 if (test_bit(NFS4CLNT_LEASE_EXPIRED,
1340 &clp->cl_state))
1238 continue; 1341 continue;
1239 if (clp->cl_cons_state == 1342 if (clp->cl_cons_state ==
1240 NFS_CS_SESSION_INITING) 1343 NFS_CS_SESSION_INITING)
@@ -1242,57 +1345,54 @@ static void nfs4_state_manager(struct nfs_client *clp)
1242 goto out_error; 1345 goto out_error;
1243 } 1346 }
1244 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1347 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1348 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1245 } 1349 }
1246 1350
1247 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1351 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
1248 status = nfs4_check_lease(clp); 1352 status = nfs4_check_lease(clp);
1249 if (status != 0) 1353 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1250 continue; 1354 continue;
1355 if (status < 0 && status != -NFS4ERR_CB_PATH_DOWN)
1356 goto out_error;
1251 } 1357 }
1358
1252 /* Initialize or reset the session */ 1359 /* Initialize or reset the session */
1253 if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state) 1360 if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)
1254 && nfs4_has_session(clp)) { 1361 && nfs4_has_session(clp)) {
1255 if (clp->cl_cons_state == NFS_CS_SESSION_INITING) 1362 status = nfs4_reset_session(clp);
1256 status = nfs4_initialize_session(clp); 1363 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1257 else 1364 continue;
1258 status = nfs4_reset_session(clp); 1365 if (status < 0)
1259 if (status) {
1260 if (status == -NFS4ERR_STALE_CLIENTID)
1261 continue;
1262 goto out_error; 1366 goto out_error;
1263 }
1264 } 1367 }
1368
1265 /* First recover reboot state... */ 1369 /* First recover reboot state... */
1266 if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { 1370 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
1267 status = nfs4_do_reclaim(clp, 1371 status = nfs4_do_reclaim(clp,
1268 nfs4_reboot_recovery_ops[clp->cl_minorversion]); 1372 nfs4_reboot_recovery_ops[clp->cl_minorversion]);
1269 if (status == -NFS4ERR_STALE_CLIENTID) 1373 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
1270 continue; 1374 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
1271 if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
1272 continue; 1375 continue;
1273 nfs4_state_end_reclaim_reboot(clp); 1376 nfs4_state_end_reclaim_reboot(clp);
1274 continue; 1377 if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
1378 continue;
1379 if (status < 0)
1380 goto out_error;
1275 } 1381 }
1276 1382
1277 /* Now recover expired state... */ 1383 /* Now recover expired state... */
1278 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { 1384 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
1279 status = nfs4_do_reclaim(clp, 1385 status = nfs4_do_reclaim(clp,
1280 nfs4_nograce_recovery_ops[clp->cl_minorversion]); 1386 nfs4_nograce_recovery_ops[clp->cl_minorversion]);
1281 if (status < 0) { 1387 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
1282 set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); 1388 test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
1283 if (status == -NFS4ERR_STALE_CLIENTID) 1389 test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1284 continue; 1390 continue;
1285 if (status == -NFS4ERR_EXPIRED) 1391 if (status < 0)
1286 continue;
1287 if (test_bit(NFS4CLNT_SESSION_SETUP,
1288 &clp->cl_state))
1289 continue;
1290 goto out_error; 1392 goto out_error;
1291 } else
1292 nfs4_state_end_reclaim_nograce(clp);
1293 continue;
1294 } 1393 }
1295 1394
1395 nfs4_end_drain_session(clp);
1296 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { 1396 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
1297 nfs_client_return_marked_delegations(clp); 1397 nfs_client_return_marked_delegations(clp);
1298 continue; 1398 continue;
@@ -1309,8 +1409,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1309out_error: 1409out_error:
1310 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" 1410 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
1311 " with error %d\n", clp->cl_hostname, -status); 1411 " with error %d\n", clp->cl_hostname, -status);
1312 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1412 nfs4_end_drain_session(clp);
1313 nfs4_state_end_reclaim_reboot(clp);
1314 nfs4_clear_state_manager_bit(clp); 1413 nfs4_clear_state_manager_bit(clp);
1315} 1414}
1316 1415
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 20b4e30e6c8..e437fd6a819 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -46,11 +46,13 @@
46#include <linux/proc_fs.h> 46#include <linux/proc_fs.h>
47#include <linux/kdev_t.h> 47#include <linux/kdev_t.h>
48#include <linux/sunrpc/clnt.h> 48#include <linux/sunrpc/clnt.h>
49#include <linux/sunrpc/msg_prot.h>
49#include <linux/nfs.h> 50#include <linux/nfs.h>
50#include <linux/nfs4.h> 51#include <linux/nfs4.h>
51#include <linux/nfs_fs.h> 52#include <linux/nfs_fs.h>
52#include <linux/nfs_idmap.h> 53#include <linux/nfs_idmap.h>
53#include "nfs4_fs.h" 54#include "nfs4_fs.h"
55#include "internal.h"
54 56
55#define NFSDBG_FACILITY NFSDBG_XDR 57#define NFSDBG_FACILITY NFSDBG_XDR
56 58
@@ -134,7 +136,7 @@ static int nfs4_stat_to_errno(int);
134#define decode_lookup_maxsz (op_decode_hdr_maxsz) 136#define decode_lookup_maxsz (op_decode_hdr_maxsz)
135#define encode_share_access_maxsz \ 137#define encode_share_access_maxsz \
136 (2) 138 (2)
137#define encode_createmode_maxsz (1 + encode_attrs_maxsz) 139#define encode_createmode_maxsz (1 + encode_attrs_maxsz + encode_verifier_maxsz)
138#define encode_opentype_maxsz (1 + encode_createmode_maxsz) 140#define encode_opentype_maxsz (1 + encode_createmode_maxsz)
139#define encode_claim_null_maxsz (1 + nfs4_name_maxsz) 141#define encode_claim_null_maxsz (1 + nfs4_name_maxsz)
140#define encode_open_maxsz (op_encode_hdr_maxsz + \ 142#define encode_open_maxsz (op_encode_hdr_maxsz + \
@@ -299,6 +301,8 @@ static int nfs4_stat_to_errno(int);
299 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4) 301 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
300#define decode_sequence_maxsz (op_decode_hdr_maxsz + \ 302#define decode_sequence_maxsz (op_decode_hdr_maxsz + \
301 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) 303 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
304#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4)
305#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4)
302#else /* CONFIG_NFS_V4_1 */ 306#else /* CONFIG_NFS_V4_1 */
303#define encode_sequence_maxsz 0 307#define encode_sequence_maxsz 0
304#define decode_sequence_maxsz 0 308#define decode_sequence_maxsz 0
@@ -676,6 +680,25 @@ static int nfs4_stat_to_errno(int);
676 decode_sequence_maxsz + \ 680 decode_sequence_maxsz + \
677 decode_putrootfh_maxsz + \ 681 decode_putrootfh_maxsz + \
678 decode_fsinfo_maxsz) 682 decode_fsinfo_maxsz)
683#define NFS4_enc_reclaim_complete_sz (compound_encode_hdr_maxsz + \
684 encode_sequence_maxsz + \
685 encode_reclaim_complete_maxsz)
686#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \
687 decode_sequence_maxsz + \
688 decode_reclaim_complete_maxsz)
689
690const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
691 compound_encode_hdr_maxsz +
692 encode_sequence_maxsz +
693 encode_putfh_maxsz +
694 encode_getattr_maxsz) *
695 XDR_UNIT);
696
697const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
698 compound_decode_hdr_maxsz +
699 decode_sequence_maxsz +
700 decode_putfh_maxsz) *
701 XDR_UNIT);
679#endif /* CONFIG_NFS_V4_1 */ 702#endif /* CONFIG_NFS_V4_1 */
680 703
681static const umode_t nfs_type2fmt[] = { 704static const umode_t nfs_type2fmt[] = {
@@ -1140,6 +1163,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1140static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1163static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
1141{ 1164{
1142 __be32 *p; 1165 __be32 *p;
1166 struct nfs_client *clp;
1143 1167
1144 p = reserve_space(xdr, 4); 1168 p = reserve_space(xdr, 4);
1145 switch(arg->open_flags & O_EXCL) { 1169 switch(arg->open_flags & O_EXCL) {
@@ -1148,8 +1172,23 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
1148 encode_attrs(xdr, arg->u.attrs, arg->server); 1172 encode_attrs(xdr, arg->u.attrs, arg->server);
1149 break; 1173 break;
1150 default: 1174 default:
1151 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); 1175 clp = arg->server->nfs_client;
1152 encode_nfs4_verifier(xdr, &arg->u.verifier); 1176 if (clp->cl_minorversion > 0) {
1177 if (nfs4_has_persistent_session(clp)) {
1178 *p = cpu_to_be32(NFS4_CREATE_GUARDED);
1179 encode_attrs(xdr, arg->u.attrs, arg->server);
1180 } else {
1181 struct iattr dummy;
1182
1183 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
1184 encode_nfs4_verifier(xdr, &arg->u.verifier);
1185 dummy.ia_valid = 0;
1186 encode_attrs(xdr, &dummy, arg->server);
1187 }
1188 } else {
1189 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
1190 encode_nfs4_verifier(xdr, &arg->u.verifier);
1191 }
1153 } 1192 }
1154} 1193}
1155 1194
@@ -1592,6 +1631,19 @@ static void encode_destroy_session(struct xdr_stream *xdr,
1592 hdr->nops++; 1631 hdr->nops++;
1593 hdr->replen += decode_destroy_session_maxsz; 1632 hdr->replen += decode_destroy_session_maxsz;
1594} 1633}
1634
1635static void encode_reclaim_complete(struct xdr_stream *xdr,
1636 struct nfs41_reclaim_complete_args *args,
1637 struct compound_hdr *hdr)
1638{
1639 __be32 *p;
1640
1641 p = reserve_space(xdr, 8);
1642 *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE);
1643 *p++ = cpu_to_be32(args->one_fs);
1644 hdr->nops++;
1645 hdr->replen += decode_reclaim_complete_maxsz;
1646}
1595#endif /* CONFIG_NFS_V4_1 */ 1647#endif /* CONFIG_NFS_V4_1 */
1596 1648
1597static void encode_sequence(struct xdr_stream *xdr, 1649static void encode_sequence(struct xdr_stream *xdr,
@@ -2096,7 +2148,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
2096 encode_compound_hdr(&xdr, req, &hdr); 2148 encode_compound_hdr(&xdr, req, &hdr);
2097 encode_sequence(&xdr, &args->seq_args, &hdr); 2149 encode_sequence(&xdr, &args->seq_args, &hdr);
2098 encode_putfh(&xdr, args->fh, &hdr); 2150 encode_putfh(&xdr, args->fh, &hdr);
2099 replen = hdr.replen + nfs4_fattr_bitmap_maxsz + 1; 2151 replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
2100 encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); 2152 encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr);
2101 2153
2102 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, 2154 xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
@@ -2420,6 +2472,26 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
2420 encode_nops(&hdr); 2472 encode_nops(&hdr);
2421 return 0; 2473 return 0;
2422} 2474}
2475
2476/*
2477 * a RECLAIM_COMPLETE request
2478 */
2479static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
2480 struct nfs41_reclaim_complete_args *args)
2481{
2482 struct xdr_stream xdr;
2483 struct compound_hdr hdr = {
2484 .minorversion = nfs4_xdr_minorversion(&args->seq_args)
2485 };
2486
2487 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2488 encode_compound_hdr(&xdr, req, &hdr);
2489 encode_sequence(&xdr, &args->seq_args, &hdr);
2490 encode_reclaim_complete(&xdr, args, &hdr);
2491 encode_nops(&hdr);
2492 return 0;
2493}
2494
2423#endif /* CONFIG_NFS_V4_1 */ 2495#endif /* CONFIG_NFS_V4_1 */
2424 2496
2425static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2497static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4528,6 +4600,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
4528{ 4600{
4529 return decode_op_hdr(xdr, OP_DESTROY_SESSION); 4601 return decode_op_hdr(xdr, OP_DESTROY_SESSION);
4530} 4602}
4603
4604static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)
4605{
4606 return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE);
4607}
4531#endif /* CONFIG_NFS_V4_1 */ 4608#endif /* CONFIG_NFS_V4_1 */
4532 4609
4533static int decode_sequence(struct xdr_stream *xdr, 4610static int decode_sequence(struct xdr_stream *xdr,
@@ -4583,8 +4660,8 @@ static int decode_sequence(struct xdr_stream *xdr,
4583 dummy = be32_to_cpup(p++); 4660 dummy = be32_to_cpup(p++);
4584 /* target highest slot id - currently not processed */ 4661 /* target highest slot id - currently not processed */
4585 dummy = be32_to_cpup(p++); 4662 dummy = be32_to_cpup(p++);
4586 /* result flags - currently not processed */ 4663 /* result flags */
4587 dummy = be32_to_cpup(p); 4664 res->sr_status_flags = be32_to_cpup(p);
4588 status = 0; 4665 status = 0;
4589out_err: 4666out_err:
4590 res->sr_status = status; 4667 res->sr_status = status;
@@ -5309,7 +5386,7 @@ out:
5309} 5386}
5310 5387
5311/* 5388/*
5312 * FSINFO request 5389 * Decode FSINFO response
5313 */ 5390 */
5314static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, 5391static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
5315 struct nfs4_fsinfo_res *res) 5392 struct nfs4_fsinfo_res *res)
@@ -5330,7 +5407,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
5330} 5407}
5331 5408
5332/* 5409/*
5333 * PATHCONF request 5410 * Decode PATHCONF response
5334 */ 5411 */
5335static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, 5412static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p,
5336 struct nfs4_pathconf_res *res) 5413 struct nfs4_pathconf_res *res)
@@ -5351,7 +5428,7 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p,
5351} 5428}
5352 5429
5353/* 5430/*
5354 * STATFS request 5431 * Decode STATFS response
5355 */ 5432 */
5356static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, 5433static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p,
5357 struct nfs4_statfs_res *res) 5434 struct nfs4_statfs_res *res)
@@ -5372,7 +5449,7 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p,
5372} 5449}
5373 5450
5374/* 5451/*
5375 * GETATTR_BITMAP request 5452 * Decode GETATTR_BITMAP response
5376 */ 5453 */
5377static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res) 5454static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res)
5378{ 5455{
@@ -5411,7 +5488,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
5411} 5488}
5412 5489
5413/* 5490/*
5414 * a SETCLIENTID request 5491 * Decode SETCLIENTID response
5415 */ 5492 */
5416static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, 5493static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
5417 struct nfs_client *clp) 5494 struct nfs_client *clp)
@@ -5428,7 +5505,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
5428} 5505}
5429 5506
5430/* 5507/*
5431 * a SETCLIENTID_CONFIRM request 5508 * Decode SETCLIENTID_CONFIRM response
5432 */ 5509 */
5433static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo) 5510static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo)
5434{ 5511{
@@ -5448,7 +5525,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
5448} 5525}
5449 5526
5450/* 5527/*
5451 * DELEGRETURN request 5528 * Decode DELEGRETURN response
5452 */ 5529 */
5453static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res) 5530static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res)
5454{ 5531{
@@ -5474,7 +5551,7 @@ out:
5474} 5551}
5475 5552
5476/* 5553/*
5477 * FS_LOCATIONS request 5554 * Decode FS_LOCATIONS response
5478 */ 5555 */
5479static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, 5556static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
5480 struct nfs4_fs_locations_res *res) 5557 struct nfs4_fs_locations_res *res)
@@ -5504,7 +5581,7 @@ out:
5504 5581
5505#if defined(CONFIG_NFS_V4_1) 5582#if defined(CONFIG_NFS_V4_1)
5506/* 5583/*
5507 * EXCHANGE_ID request 5584 * Decode EXCHANGE_ID response
5508 */ 5585 */
5509static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p, 5586static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
5510 void *res) 5587 void *res)
@@ -5521,7 +5598,7 @@ static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
5521} 5598}
5522 5599
5523/* 5600/*
5524 * a CREATE_SESSION request 5601 * Decode CREATE_SESSION response
5525 */ 5602 */
5526static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p, 5603static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p,
5527 struct nfs41_create_session_res *res) 5604 struct nfs41_create_session_res *res)
@@ -5538,7 +5615,7 @@ static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p,
5538} 5615}
5539 5616
5540/* 5617/*
5541 * a DESTROY_SESSION request 5618 * Decode DESTROY_SESSION response
5542 */ 5619 */
5543static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p, 5620static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p,
5544 void *dummy) 5621 void *dummy)
@@ -5555,7 +5632,7 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p,
5555} 5632}
5556 5633
5557/* 5634/*
5558 * a SEQUENCE request 5635 * Decode SEQUENCE response
5559 */ 5636 */
5560static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p, 5637static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p,
5561 struct nfs4_sequence_res *res) 5638 struct nfs4_sequence_res *res)
@@ -5572,7 +5649,7 @@ static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p,
5572} 5649}
5573 5650
5574/* 5651/*
5575 * a GET_LEASE_TIME request 5652 * Decode GET_LEASE_TIME response
5576 */ 5653 */
5577static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, 5654static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
5578 struct nfs4_get_lease_time_res *res) 5655 struct nfs4_get_lease_time_res *res)
@@ -5591,6 +5668,25 @@ static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
5591 status = decode_fsinfo(&xdr, res->lr_fsinfo); 5668 status = decode_fsinfo(&xdr, res->lr_fsinfo);
5592 return status; 5669 return status;
5593} 5670}
5671
5672/*
5673 * Decode RECLAIM_COMPLETE response
5674 */
5675static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
5676 struct nfs41_reclaim_complete_res *res)
5677{
5678 struct xdr_stream xdr;
5679 struct compound_hdr hdr;
5680 int status;
5681
5682 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
5683 status = decode_compound_hdr(&xdr, &hdr);
5684 if (!status)
5685 status = decode_sequence(&xdr, &res->seq_res, rqstp);
5686 if (!status)
5687 status = decode_reclaim_complete(&xdr, (void *)NULL);
5688 return status;
5689}
5594#endif /* CONFIG_NFS_V4_1 */ 5690#endif /* CONFIG_NFS_V4_1 */
5595 5691
5596__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 5692__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
@@ -5767,6 +5863,7 @@ struct rpc_procinfo nfs4_procedures[] = {
5767 PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), 5863 PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
5768 PROC(SEQUENCE, enc_sequence, dec_sequence), 5864 PROC(SEQUENCE, enc_sequence, dec_sequence),
5769 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 5865 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
5866 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
5770#endif /* CONFIG_NFS_V4_1 */ 5867#endif /* CONFIG_NFS_V4_1 */
5771}; 5868};
5772 5869
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 12c9e66d3f1..db9b360ae19 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -356,25 +356,19 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
356 struct nfs_readres *resp = &data->res; 356 struct nfs_readres *resp = &data->res;
357 357
358 if (resp->eof || resp->count == argp->count) 358 if (resp->eof || resp->count == argp->count)
359 goto out; 359 return;
360 360
361 /* This is a short read! */ 361 /* This is a short read! */
362 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 362 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
363 /* Has the server at least made some progress? */ 363 /* Has the server at least made some progress? */
364 if (resp->count == 0) 364 if (resp->count == 0)
365 goto out; 365 return;
366 366
367 /* Yes, so retry the read at the end of the data */ 367 /* Yes, so retry the read at the end of the data */
368 argp->offset += resp->count; 368 argp->offset += resp->count;
369 argp->pgbase += resp->count; 369 argp->pgbase += resp->count;
370 argp->count -= resp->count; 370 argp->count -= resp->count;
371 nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); 371 nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
372 return;
373out:
374 nfs4_sequence_free_slot(NFS_SERVER(data->inode)->nfs_client,
375 &data->res.seq_res);
376 return;
377
378} 372}
379 373
380/* 374/*
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 90be551b80c..ce907efc550 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -175,14 +175,16 @@ static const match_table_t nfs_mount_option_tokens = {
175}; 175};
176 176
177enum { 177enum {
178 Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma, 178 Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma,
179 179
180 Opt_xprt_err 180 Opt_xprt_err
181}; 181};
182 182
183static const match_table_t nfs_xprt_protocol_tokens = { 183static const match_table_t nfs_xprt_protocol_tokens = {
184 { Opt_xprt_udp, "udp" }, 184 { Opt_xprt_udp, "udp" },
185 { Opt_xprt_udp6, "udp6" },
185 { Opt_xprt_tcp, "tcp" }, 186 { Opt_xprt_tcp, "tcp" },
187 { Opt_xprt_tcp6, "tcp6" },
186 { Opt_xprt_rdma, "rdma" }, 188 { Opt_xprt_rdma, "rdma" },
187 189
188 { Opt_xprt_err, NULL } 190 { Opt_xprt_err, NULL }
@@ -492,6 +494,45 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
492 return sec_flavours[i].str; 494 return sec_flavours[i].str;
493} 495}
494 496
497static void nfs_show_mountd_netid(struct seq_file *m, struct nfs_server *nfss,
498 int showdefaults)
499{
500 struct sockaddr *sap = (struct sockaddr *) &nfss->mountd_address;
501
502 seq_printf(m, ",mountproto=");
503 switch (sap->sa_family) {
504 case AF_INET:
505 switch (nfss->mountd_protocol) {
506 case IPPROTO_UDP:
507 seq_printf(m, RPCBIND_NETID_UDP);
508 break;
509 case IPPROTO_TCP:
510 seq_printf(m, RPCBIND_NETID_TCP);
511 break;
512 default:
513 if (showdefaults)
514 seq_printf(m, "auto");
515 }
516 break;
517 case AF_INET6:
518 switch (nfss->mountd_protocol) {
519 case IPPROTO_UDP:
520 seq_printf(m, RPCBIND_NETID_UDP6);
521 break;
522 case IPPROTO_TCP:
523 seq_printf(m, RPCBIND_NETID_TCP6);
524 break;
525 default:
526 if (showdefaults)
527 seq_printf(m, "auto");
528 }
529 break;
530 default:
531 if (showdefaults)
532 seq_printf(m, "auto");
533 }
534}
535
495static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, 536static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
496 int showdefaults) 537 int showdefaults)
497{ 538{
@@ -505,7 +546,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
505 } 546 }
506 case AF_INET6: { 547 case AF_INET6: {
507 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; 548 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
508 seq_printf(m, ",mountaddr=%pI6", &sin6->sin6_addr); 549 seq_printf(m, ",mountaddr=%pI6c", &sin6->sin6_addr);
509 break; 550 break;
510 } 551 }
511 default: 552 default:
@@ -518,17 +559,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
518 if (nfss->mountd_port || showdefaults) 559 if (nfss->mountd_port || showdefaults)
519 seq_printf(m, ",mountport=%u", nfss->mountd_port); 560 seq_printf(m, ",mountport=%u", nfss->mountd_port);
520 561
521 switch (nfss->mountd_protocol) { 562 nfs_show_mountd_netid(m, nfss, showdefaults);
522 case IPPROTO_UDP:
523 seq_printf(m, ",mountproto=udp");
524 break;
525 case IPPROTO_TCP:
526 seq_printf(m, ",mountproto=tcp");
527 break;
528 default:
529 if (showdefaults)
530 seq_printf(m, ",mountproto=auto");
531 }
532} 563}
533 564
534/* 565/*
@@ -578,7 +609,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
578 seq_puts(m, nfs_infop->nostr); 609 seq_puts(m, nfs_infop->nostr);
579 } 610 }
580 seq_printf(m, ",proto=%s", 611 seq_printf(m, ",proto=%s",
581 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); 612 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID));
582 if (version == 4) { 613 if (version == 4) {
583 if (nfss->port != NFS_PORT) 614 if (nfss->port != NFS_PORT)
584 seq_printf(m, ",port=%u", nfss->port); 615 seq_printf(m, ",port=%u", nfss->port);
@@ -714,8 +745,6 @@ static void nfs_umount_begin(struct super_block *sb)
714 struct nfs_server *server; 745 struct nfs_server *server;
715 struct rpc_clnt *rpc; 746 struct rpc_clnt *rpc;
716 747
717 lock_kernel();
718
719 server = NFS_SB(sb); 748 server = NFS_SB(sb);
720 /* -EIO all pending I/O */ 749 /* -EIO all pending I/O */
721 rpc = server->client_acl; 750 rpc = server->client_acl;
@@ -724,8 +753,6 @@ static void nfs_umount_begin(struct super_block *sb)
724 rpc = server->client; 753 rpc = server->client;
725 if (!IS_ERR(rpc)) 754 if (!IS_ERR(rpc))
726 rpc_killall_tasks(rpc); 755 rpc_killall_tasks(rpc);
727
728 unlock_kernel();
729} 756}
730 757
731static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version) 758static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version)
@@ -734,8 +761,6 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
734 761
735 data = kzalloc(sizeof(*data), GFP_KERNEL); 762 data = kzalloc(sizeof(*data), GFP_KERNEL);
736 if (data) { 763 if (data) {
737 data->rsize = NFS_MAX_FILE_IO_SIZE;
738 data->wsize = NFS_MAX_FILE_IO_SIZE;
739 data->acregmin = NFS_DEF_ACREGMIN; 764 data->acregmin = NFS_DEF_ACREGMIN;
740 data->acregmax = NFS_DEF_ACREGMAX; 765 data->acregmax = NFS_DEF_ACREGMAX;
741 data->acdirmin = NFS_DEF_ACDIRMIN; 766 data->acdirmin = NFS_DEF_ACDIRMIN;
@@ -887,6 +912,8 @@ static int nfs_parse_mount_options(char *raw,
887{ 912{
888 char *p, *string, *secdata; 913 char *p, *string, *secdata;
889 int rc, sloppy = 0, invalid_option = 0; 914 int rc, sloppy = 0, invalid_option = 0;
915 unsigned short protofamily = AF_UNSPEC;
916 unsigned short mountfamily = AF_UNSPEC;
890 917
891 if (!raw) { 918 if (!raw) {
892 dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); 919 dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
@@ -1232,12 +1259,17 @@ static int nfs_parse_mount_options(char *raw,
1232 token = match_token(string, 1259 token = match_token(string,
1233 nfs_xprt_protocol_tokens, args); 1260 nfs_xprt_protocol_tokens, args);
1234 1261
1262 protofamily = AF_INET;
1235 switch (token) { 1263 switch (token) {
1264 case Opt_xprt_udp6:
1265 protofamily = AF_INET6;
1236 case Opt_xprt_udp: 1266 case Opt_xprt_udp:
1237 mnt->flags &= ~NFS_MOUNT_TCP; 1267 mnt->flags &= ~NFS_MOUNT_TCP;
1238 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1268 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
1239 kfree(string); 1269 kfree(string);
1240 break; 1270 break;
1271 case Opt_xprt_tcp6:
1272 protofamily = AF_INET6;
1241 case Opt_xprt_tcp: 1273 case Opt_xprt_tcp:
1242 mnt->flags |= NFS_MOUNT_TCP; 1274 mnt->flags |= NFS_MOUNT_TCP;
1243 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1275 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
@@ -1265,10 +1297,15 @@ static int nfs_parse_mount_options(char *raw,
1265 nfs_xprt_protocol_tokens, args); 1297 nfs_xprt_protocol_tokens, args);
1266 kfree(string); 1298 kfree(string);
1267 1299
1300 mountfamily = AF_INET;
1268 switch (token) { 1301 switch (token) {
1302 case Opt_xprt_udp6:
1303 mountfamily = AF_INET6;
1269 case Opt_xprt_udp: 1304 case Opt_xprt_udp:
1270 mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; 1305 mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
1271 break; 1306 break;
1307 case Opt_xprt_tcp6:
1308 mountfamily = AF_INET6;
1272 case Opt_xprt_tcp: 1309 case Opt_xprt_tcp:
1273 mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; 1310 mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
1274 break; 1311 break;
@@ -1367,8 +1404,33 @@ static int nfs_parse_mount_options(char *raw,
1367 if (!sloppy && invalid_option) 1404 if (!sloppy && invalid_option)
1368 return 0; 1405 return 0;
1369 1406
1407 /*
1408 * verify that any proto=/mountproto= options match the address
1409 * familiies in the addr=/mountaddr= options.
1410 */
1411 if (protofamily != AF_UNSPEC &&
1412 protofamily != mnt->nfs_server.address.ss_family)
1413 goto out_proto_mismatch;
1414
1415 if (mountfamily != AF_UNSPEC) {
1416 if (mnt->mount_server.addrlen) {
1417 if (mountfamily != mnt->mount_server.address.ss_family)
1418 goto out_mountproto_mismatch;
1419 } else {
1420 if (mountfamily != mnt->nfs_server.address.ss_family)
1421 goto out_mountproto_mismatch;
1422 }
1423 }
1424
1370 return 1; 1425 return 1;
1371 1426
1427out_mountproto_mismatch:
1428 printk(KERN_INFO "NFS: mount server address does not match mountproto= "
1429 "option\n");
1430 return 0;
1431out_proto_mismatch:
1432 printk(KERN_INFO "NFS: server address does not match proto= option\n");
1433 return 0;
1372out_invalid_address: 1434out_invalid_address:
1373 printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); 1435 printk(KERN_INFO "NFS: bad IP address specified: %s\n", p);
1374 return 0; 1436 return 0;
@@ -1881,7 +1943,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
1881 if (data == NULL) 1943 if (data == NULL)
1882 return -ENOMEM; 1944 return -ENOMEM;
1883 1945
1884 lock_kernel();
1885 /* fill out struct with values from existing mount */ 1946 /* fill out struct with values from existing mount */
1886 data->flags = nfss->flags; 1947 data->flags = nfss->flags;
1887 data->rsize = nfss->rsize; 1948 data->rsize = nfss->rsize;
@@ -1907,7 +1968,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
1907 error = nfs_compare_remount_data(nfss, data); 1968 error = nfs_compare_remount_data(nfss, data);
1908out: 1969out:
1909 kfree(data); 1970 kfree(data);
1910 unlock_kernel();
1911 return error; 1971 return error;
1912} 1972}
1913 1973
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index b62481dabae..70e1fbbaaea 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -22,63 +22,55 @@ static struct ctl_table_header *nfs_callback_sysctl_table;
22static ctl_table nfs_cb_sysctls[] = { 22static ctl_table nfs_cb_sysctls[] = {
23#ifdef CONFIG_NFS_V4 23#ifdef CONFIG_NFS_V4
24 { 24 {
25 .ctl_name = CTL_UNNUMBERED,
26 .procname = "nfs_callback_tcpport", 25 .procname = "nfs_callback_tcpport",
27 .data = &nfs_callback_set_tcpport, 26 .data = &nfs_callback_set_tcpport,
28 .maxlen = sizeof(int), 27 .maxlen = sizeof(int),
29 .mode = 0644, 28 .mode = 0644,
30 .proc_handler = &proc_dointvec_minmax, 29 .proc_handler = proc_dointvec_minmax,
31 .extra1 = (int *)&nfs_set_port_min, 30 .extra1 = (int *)&nfs_set_port_min,
32 .extra2 = (int *)&nfs_set_port_max, 31 .extra2 = (int *)&nfs_set_port_max,
33 }, 32 },
34 { 33 {
35 .ctl_name = CTL_UNNUMBERED,
36 .procname = "idmap_cache_timeout", 34 .procname = "idmap_cache_timeout",
37 .data = &nfs_idmap_cache_timeout, 35 .data = &nfs_idmap_cache_timeout,
38 .maxlen = sizeof(int), 36 .maxlen = sizeof(int),
39 .mode = 0644, 37 .mode = 0644,
40 .proc_handler = &proc_dointvec_jiffies, 38 .proc_handler = proc_dointvec_jiffies,
41 .strategy = &sysctl_jiffies,
42 }, 39 },
43#endif 40#endif
44 { 41 {
45 .ctl_name = CTL_UNNUMBERED,
46 .procname = "nfs_mountpoint_timeout", 42 .procname = "nfs_mountpoint_timeout",
47 .data = &nfs_mountpoint_expiry_timeout, 43 .data = &nfs_mountpoint_expiry_timeout,
48 .maxlen = sizeof(nfs_mountpoint_expiry_timeout), 44 .maxlen = sizeof(nfs_mountpoint_expiry_timeout),
49 .mode = 0644, 45 .mode = 0644,
50 .proc_handler = &proc_dointvec_jiffies, 46 .proc_handler = proc_dointvec_jiffies,
51 .strategy = &sysctl_jiffies,
52 }, 47 },
53 { 48 {
54 .ctl_name = CTL_UNNUMBERED,
55 .procname = "nfs_congestion_kb", 49 .procname = "nfs_congestion_kb",
56 .data = &nfs_congestion_kb, 50 .data = &nfs_congestion_kb,
57 .maxlen = sizeof(nfs_congestion_kb), 51 .maxlen = sizeof(nfs_congestion_kb),
58 .mode = 0644, 52 .mode = 0644,
59 .proc_handler = &proc_dointvec, 53 .proc_handler = proc_dointvec,
60 }, 54 },
61 { .ctl_name = 0 } 55 { }
62}; 56};
63 57
64static ctl_table nfs_cb_sysctl_dir[] = { 58static ctl_table nfs_cb_sysctl_dir[] = {
65 { 59 {
66 .ctl_name = CTL_UNNUMBERED,
67 .procname = "nfs", 60 .procname = "nfs",
68 .mode = 0555, 61 .mode = 0555,
69 .child = nfs_cb_sysctls, 62 .child = nfs_cb_sysctls,
70 }, 63 },
71 { .ctl_name = 0 } 64 { }
72}; 65};
73 66
74static ctl_table nfs_cb_sysctl_root[] = { 67static ctl_table nfs_cb_sysctl_root[] = {
75 { 68 {
76 .ctl_name = CTL_FS,
77 .procname = "fs", 69 .procname = "fs",
78 .mode = 0555, 70 .mode = 0555,
79 .child = nfs_cb_sysctl_dir, 71 .child = nfs_cb_sysctl_dir,
80 }, 72 },
81 { .ctl_name = 0 } 73 { }
82}; 74};
83 75
84int nfs_register_sysctl(void) 76int nfs_register_sysctl(void)
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 1064c91ae81..6da3d3ff6ed 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -83,7 +83,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
83 struct inode *dir = data->dir; 83 struct inode *dir = data->dir;
84 84
85 if (!NFS_PROTO(dir)->unlink_done(task, dir)) 85 if (!NFS_PROTO(dir)->unlink_done(task, dir))
86 nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client); 86 nfs_restart_rpc(task, NFS_SERVER(dir)->nfs_client);
87} 87}
88 88
89/** 89/**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53eb26c16b5..d171696017f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -178,7 +178,7 @@ static int wb_priority(struct writeback_control *wbc)
178{ 178{
179 if (wbc->for_reclaim) 179 if (wbc->for_reclaim)
180 return FLUSH_HIGHPRI | FLUSH_STABLE; 180 return FLUSH_HIGHPRI | FLUSH_STABLE;
181 if (wbc->for_kupdate) 181 if (wbc->for_kupdate || wbc->for_background)
182 return FLUSH_LOWPRI; 182 return FLUSH_LOWPRI;
183 return 0; 183 return 0;
184} 184}
@@ -774,7 +774,7 @@ int nfs_updatepage(struct file *file, struct page *page,
774 */ 774 */
775 if (nfs_write_pageuptodate(page, inode) && 775 if (nfs_write_pageuptodate(page, inode) &&
776 inode->i_flock == NULL && 776 inode->i_flock == NULL &&
777 !(file->f_flags & O_SYNC)) { 777 !(file->f_flags & O_DSYNC)) {
778 count = max(count + offset, nfs_page_length(page)); 778 count = max(count + offset, nfs_page_length(page));
779 offset = 0; 779 offset = 0;
780 } 780 }
@@ -1216,7 +1216,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1216 */ 1216 */
1217 argp->stable = NFS_FILE_SYNC; 1217 argp->stable = NFS_FILE_SYNC;
1218 } 1218 }
1219 nfs4_restart_rpc(task, server->nfs_client); 1219 nfs_restart_rpc(task, server->nfs_client);
1220 return -EAGAIN; 1220 return -EAGAIN;
1221 } 1221 }
1222 if (time_before(complain, jiffies)) { 1222 if (time_before(complain, jiffies)) {
@@ -1228,7 +1228,6 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1228 /* Can't do anything about it except throw an error. */ 1228 /* Can't do anything about it except throw an error. */
1229 task->tk_status = -EIO; 1229 task->tk_status = -EIO;
1230 } 1230 }
1231 nfs4_sequence_free_slot(server->nfs_client, &data->res.seq_res);
1232 return 0; 1231 return 0;
1233} 1232}
1234 1233
@@ -1612,15 +1611,16 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1612 if (ret) 1611 if (ret)
1613 goto out_unlock; 1612 goto out_unlock;
1614 page_cache_get(newpage); 1613 page_cache_get(newpage);
1614 spin_lock(&mapping->host->i_lock);
1615 req->wb_page = newpage; 1615 req->wb_page = newpage;
1616 SetPagePrivate(newpage); 1616 SetPagePrivate(newpage);
1617 set_page_private(newpage, page_private(page)); 1617 set_page_private(newpage, (unsigned long)req);
1618 ClearPagePrivate(page); 1618 ClearPagePrivate(page);
1619 set_page_private(page, 0); 1619 set_page_private(page, 0);
1620 spin_unlock(&mapping->host->i_lock);
1620 page_cache_release(page); 1621 page_cache_release(page);
1621out_unlock: 1622out_unlock:
1622 nfs_clear_page_tag_locked(req); 1623 nfs_clear_page_tag_locked(req);
1623 nfs_release_request(req);
1624out: 1624out:
1625 return ret; 1625 return ret;
1626} 1626}
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 8f9a20556f7..d3854d94b7c 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -7,8 +7,6 @@
7#include <linux/types.h> 7#include <linux/types.h>
8#include <linux/file.h> 8#include <linux/file.h>
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/sunrpc/svc.h>
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/syscall.h> 10#include <linux/nfsd/syscall.h>
13#include <linux/cred.h> 11#include <linux/cred.h>
14#include <linux/sched.h> 12#include <linux/sched.h>
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 36fcabbf518..79717a40dab 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -1,15 +1,7 @@
1/* 1/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
2 * linux/fs/nfsd/auth.c
3 *
4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
5 */
6 2
7#include <linux/types.h>
8#include <linux/sched.h> 3#include <linux/sched.h>
9#include <linux/sunrpc/svc.h> 4#include "nfsd.h"
10#include <linux/sunrpc/svcauth.h>
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/export.h>
13#include "auth.h" 5#include "auth.h"
14 6
15int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) 7int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
new file mode 100644
index 00000000000..d892be61016
--- /dev/null
+++ b/fs/nfsd/cache.h
@@ -0,0 +1,83 @@
1/*
2 * Request reply cache. This was heavily inspired by the
3 * implementation in 4.3BSD/4.4BSD.
4 *
5 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
6 */
7
8#ifndef NFSCACHE_H
9#define NFSCACHE_H
10
11#include <linux/sunrpc/svc.h>
12
13/*
14 * Representation of a reply cache entry.
15 */
16struct svc_cacherep {
17 struct hlist_node c_hash;
18 struct list_head c_lru;
19
20 unsigned char c_state, /* unused, inprog, done */
21 c_type, /* status, buffer */
22 c_secure : 1; /* req came from port < 1024 */
23 struct sockaddr_in c_addr;
24 __be32 c_xid;
25 u32 c_prot;
26 u32 c_proc;
27 u32 c_vers;
28 unsigned long c_timestamp;
29 union {
30 struct kvec u_vec;
31 __be32 u_status;
32 } c_u;
33};
34
35#define c_replvec c_u.u_vec
36#define c_replstat c_u.u_status
37
38/* cache entry states */
39enum {
40 RC_UNUSED,
41 RC_INPROG,
42 RC_DONE
43};
44
45/* return values */
46enum {
47 RC_DROPIT,
48 RC_REPLY,
49 RC_DOIT,
50 RC_INTR
51};
52
53/*
54 * Cache types.
55 * We may want to add more types one day, e.g. for diropres and
56 * attrstat replies. Using cache entries with fixed length instead
57 * of buffer pointers may be more efficient.
58 */
59enum {
60 RC_NOCACHE,
61 RC_REPLSTAT,
62 RC_REPLBUFF,
63};
64
65/*
66 * If requests are retransmitted within this interval, they're dropped.
67 */
68#define RC_DELAY (HZ/5)
69
70int nfsd_reply_cache_init(void);
71void nfsd_reply_cache_shutdown(void);
72int nfsd_cache_lookup(struct svc_rqst *, int);
73void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
74
75#ifdef CONFIG_NFSD_V4
76void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
77#else /* CONFIG_NFSD_V4 */
78static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
79{
80}
81#endif /* CONFIG_NFSD_V4 */
82
83#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c1c9e035d4a..c487810a236 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1,7 +1,5 @@
1#define MSNFS /* HACK HACK */ 1#define MSNFS /* HACK HACK */
2/* 2/*
3 * linux/fs/nfsd/export.c
4 *
5 * NFS exporting and validation. 3 * NFS exporting and validation.
6 * 4 *
7 * We maintain a list of clients, each of which has a list of 5 * We maintain a list of clients, each of which has a list of
@@ -14,29 +12,16 @@
14 * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de> 12 * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
15 */ 13 */
16 14
17#include <linux/unistd.h>
18#include <linux/slab.h>
19#include <linux/stat.h>
20#include <linux/in.h>
21#include <linux/seq_file.h>
22#include <linux/syscalls.h>
23#include <linux/rwsem.h>
24#include <linux/dcache.h>
25#include <linux/namei.h> 15#include <linux/namei.h>
26#include <linux/mount.h>
27#include <linux/hash.h>
28#include <linux/module.h> 16#include <linux/module.h>
29#include <linux/exportfs.h> 17#include <linux/exportfs.h>
30 18
31#include <linux/sunrpc/svc.h>
32#include <linux/nfsd/nfsd.h>
33#include <linux/nfsd/nfsfh.h>
34#include <linux/nfsd/syscall.h> 19#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h>
36#include <linux/sunrpc/msg_prot.h>
37#include <linux/sunrpc/gss_api.h>
38#include <net/ipv6.h> 20#include <net/ipv6.h>
39 21
22#include "nfsd.h"
23#include "nfsfh.h"
24
40#define NFSDDBG_FACILITY NFSDDBG_EXPORT 25#define NFSDDBG_FACILITY NFSDDBG_EXPORT
41 26
42typedef struct auth_domain svc_client; 27typedef struct auth_domain svc_client;
@@ -369,16 +354,25 @@ static struct svc_export *svc_export_update(struct svc_export *new,
369 struct svc_export *old); 354 struct svc_export *old);
370static struct svc_export *svc_export_lookup(struct svc_export *); 355static struct svc_export *svc_export_lookup(struct svc_export *);
371 356
372static int check_export(struct inode *inode, int flags, unsigned char *uuid) 357static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
373{ 358{
374 359
375 /* We currently export only dirs and regular files. 360 /*
376 * This is what umountd does. 361 * We currently export only dirs, regular files, and (for v4
362 * pseudoroot) symlinks.
377 */ 363 */
378 if (!S_ISDIR(inode->i_mode) && 364 if (!S_ISDIR(inode->i_mode) &&
365 !S_ISLNK(inode->i_mode) &&
379 !S_ISREG(inode->i_mode)) 366 !S_ISREG(inode->i_mode))
380 return -ENOTDIR; 367 return -ENOTDIR;
381 368
369 /*
370 * Mountd should never pass down a writeable V4ROOT export, but,
371 * just to make sure:
372 */
373 if (*flags & NFSEXP_V4ROOT)
374 *flags |= NFSEXP_READONLY;
375
382 /* There are two requirements on a filesystem to be exportable. 376 /* There are two requirements on a filesystem to be exportable.
383 * 1: We must be able to identify the filesystem from a number. 377 * 1: We must be able to identify the filesystem from a number.
384 * either a device number (so FS_REQUIRES_DEV needed) 378 * either a device number (so FS_REQUIRES_DEV needed)
@@ -387,7 +381,7 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid)
387 * This means that s_export_op must be set. 381 * This means that s_export_op must be set.
388 */ 382 */
389 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && 383 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
390 !(flags & NFSEXP_FSID) && 384 !(*flags & NFSEXP_FSID) &&
391 uuid == NULL) { 385 uuid == NULL) {
392 dprintk("exp_export: export of non-dev fs without fsid\n"); 386 dprintk("exp_export: export of non-dev fs without fsid\n");
393 return -EINVAL; 387 return -EINVAL;
@@ -602,7 +596,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
602 goto out4; 596 goto out4;
603 } 597 }
604 598
605 err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags, 599 err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags,
606 exp.ex_uuid); 600 exp.ex_uuid);
607 if (err) 601 if (err)
608 goto out4; 602 goto out4;
@@ -1041,7 +1035,7 @@ exp_export(struct nfsctl_export *nxp)
1041 goto finish; 1035 goto finish;
1042 } 1036 }
1043 1037
1044 err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL); 1038 err = check_export(path.dentry->d_inode, &nxp->ex_flags, NULL);
1045 if (err) goto finish; 1039 if (err) goto finish;
1046 1040
1047 err = -ENOMEM; 1041 err = -ENOMEM;
@@ -1320,6 +1314,23 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
1320 return exp; 1314 return exp;
1321} 1315}
1322 1316
1317static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp)
1318{
1319 struct svc_export *exp;
1320 u32 fsidv[2];
1321
1322 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
1323
1324 exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
1325 /*
1326 * We shouldn't have accepting an nfsv4 request at all if we
1327 * don't have a pseudoexport!:
1328 */
1329 if (IS_ERR(exp) && PTR_ERR(exp) == -ENOENT)
1330 exp = ERR_PTR(-ESERVERFAULT);
1331 return exp;
1332}
1333
1323/* 1334/*
1324 * Called when we need the filehandle for the root of the pseudofs, 1335 * Called when we need the filehandle for the root of the pseudofs,
1325 * for a given NFSv4 client. The root is defined to be the 1336 * for a given NFSv4 client. The root is defined to be the
@@ -1330,11 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1330{ 1341{
1331 struct svc_export *exp; 1342 struct svc_export *exp;
1332 __be32 rv; 1343 __be32 rv;
1333 u32 fsidv[2];
1334 1344
1335 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); 1345 exp = find_fsidzero_export(rqstp);
1336
1337 exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
1338 if (IS_ERR(exp)) 1346 if (IS_ERR(exp))
1339 return nfserrno(PTR_ERR(exp)); 1347 return nfserrno(PTR_ERR(exp));
1340 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); 1348 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
@@ -1425,6 +1433,7 @@ static struct flags {
1425 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, 1433 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
1426 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, 1434 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
1427 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, 1435 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
1436 { NFSEXP_V4ROOT, {"v4root", ""}},
1428#ifdef MSNFS 1437#ifdef MSNFS
1429 { NFSEXP_MSNFS, {"msnfs", ""}}, 1438 { NFSEXP_MSNFS, {"msnfs", ""}},
1430#endif 1439#endif
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index b2786a5f9af..0c6d8167013 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/lockd.c
3 *
4 * This file contains all the stubs needed when communicating with lockd. 2 * This file contains all the stubs needed when communicating with lockd.
5 * This level of indirection is necessary so we can run nfsd+lockd without 3 * This level of indirection is necessary so we can run nfsd+lockd without
6 * requiring the nfs client to be compiled in/loaded, and vice versa. 4 * requiring the nfs client to be compiled in/loaded, and vice versa.
@@ -8,14 +6,10 @@
8 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
9 */ 7 */
10 8
11#include <linux/types.h>
12#include <linux/fs.h>
13#include <linux/file.h> 9#include <linux/file.h>
14#include <linux/mount.h>
15#include <linux/sunrpc/clnt.h>
16#include <linux/sunrpc/svc.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/bind.h> 10#include <linux/lockd/bind.h>
11#include "nfsd.h"
12#include "vfs.h"
19 13
20#define NFSDDBG_FACILITY NFSDDBG_LOCKD 14#define NFSDDBG_FACILITY NFSDDBG_LOCKD
21 15
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4e3219e8411..f20589d2ae2 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -1,19 +1,15 @@
1/* 1/*
2 * linux/fs/nfsd/nfs2acl.c
3 *
4 * Process version 2 NFSACL requests. 2 * Process version 2 NFSACL requests.
5 * 3 *
6 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> 4 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
7 */ 5 */
8 6
9#include <linux/sunrpc/svc.h> 7#include "nfsd.h"
10#include <linux/nfs.h> 8/* FIXME: nfsacl.h is a broken header */
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/cache.h>
13#include <linux/nfsd/xdr.h>
14#include <linux/nfsd/xdr3.h>
15#include <linux/posix_acl.h>
16#include <linux/nfsacl.h> 9#include <linux/nfsacl.h>
10#include "cache.h"
11#include "xdr3.h"
12#include "vfs.h"
17 13
18#define NFSDDBG_FACILITY NFSDDBG_PROC 14#define NFSDDBG_FACILITY NFSDDBG_PROC
19#define RETURN_STATUS(st) { resp->status = (st); return (st); } 15#define RETURN_STATUS(st) { resp->status = (st); return (st); }
@@ -217,6 +213,16 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
217 * XDR encode functions 213 * XDR encode functions
218 */ 214 */
219 215
216/*
217 * There must be an encoding function for void results so svc_process
218 * will work properly.
219 */
220int
221nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
222{
223 return xdr_ressize_check(rqstp, p);
224}
225
220/* GETACL */ 226/* GETACL */
221static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, 227static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
222 struct nfsd3_getaclres *resp) 228 struct nfsd3_getaclres *resp)
@@ -308,7 +314,6 @@ static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
308} 314}
309 315
310#define nfsaclsvc_decode_voidargs NULL 316#define nfsaclsvc_decode_voidargs NULL
311#define nfsaclsvc_encode_voidres NULL
312#define nfsaclsvc_release_void NULL 317#define nfsaclsvc_release_void NULL
313#define nfsd3_fhandleargs nfsd_fhandle 318#define nfsd3_fhandleargs nfsd_fhandle
314#define nfsd3_attrstatres nfsd_attrstat 319#define nfsd3_attrstatres nfsd_attrstat
@@ -346,5 +351,5 @@ struct svc_version nfsd_acl_version2 = {
346 .vs_proc = nfsd_acl_procedures2, 351 .vs_proc = nfsd_acl_procedures2,
347 .vs_dispatch = nfsd_dispatch, 352 .vs_dispatch = nfsd_dispatch,
348 .vs_xdrsize = NFS3_SVC_XDRSIZE, 353 .vs_xdrsize = NFS3_SVC_XDRSIZE,
349 .vs_hidden = 1, 354 .vs_hidden = 0,
350}; 355};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9981dbb377a..e0c4846bad9 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -1,18 +1,15 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3acl.c
3 *
4 * Process version 3 NFSACL requests. 2 * Process version 3 NFSACL requests.
5 * 3 *
6 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> 4 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
7 */ 5 */
8 6
9#include <linux/sunrpc/svc.h> 7#include "nfsd.h"
10#include <linux/nfs3.h> 8/* FIXME: nfsacl.h is a broken header */
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/cache.h>
13#include <linux/nfsd/xdr3.h>
14#include <linux/posix_acl.h>
15#include <linux/nfsacl.h> 9#include <linux/nfsacl.h>
10#include "cache.h"
11#include "xdr3.h"
12#include "vfs.h"
16 13
17#define RETURN_STATUS(st) { resp->status = (st); return (st); } 14#define RETURN_STATUS(st) { resp->status = (st); return (st); }
18 15
@@ -264,6 +261,6 @@ struct svc_version nfsd_acl_version3 = {
264 .vs_proc = nfsd_acl_procedures3, 261 .vs_proc = nfsd_acl_procedures3,
265 .vs_dispatch = nfsd_dispatch, 262 .vs_dispatch = nfsd_dispatch,
266 .vs_xdrsize = NFS3_SVC_XDRSIZE, 263 .vs_xdrsize = NFS3_SVC_XDRSIZE,
267 .vs_hidden = 1, 264 .vs_hidden = 0,
268}; 265};
269 266
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a713c418a92..3d68f45a37b 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -1,30 +1,16 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3proc.c
3 *
4 * Process version 3 NFS requests. 2 * Process version 3 NFS requests.
5 * 3 *
6 * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/linkage.h>
10#include <linux/time.h>
11#include <linux/errno.h>
12#include <linux/fs.h> 7#include <linux/fs.h>
13#include <linux/ext2_fs.h> 8#include <linux/ext2_fs.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/net.h>
17#include <linux/in.h>
18#include <linux/unistd.h>
19#include <linux/slab.h>
20#include <linux/major.h>
21#include <linux/magic.h> 9#include <linux/magic.h>
22 10
23#include <linux/sunrpc/svc.h> 11#include "cache.h"
24#include <linux/nfsd/nfsd.h> 12#include "xdr3.h"
25#include <linux/nfsd/cache.h> 13#include "vfs.h"
26#include <linux/nfsd/xdr3.h>
27#include <linux/nfs3.h>
28 14
29#define NFSDDBG_FACILITY NFSDDBG_PROC 15#define NFSDDBG_FACILITY NFSDDBG_PROC
30 16
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index d0a2ce1b432..2a533a0af2a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3xdr.c
3 *
4 * XDR support for nfsd/protocol version 3. 2 * XDR support for nfsd/protocol version 3.
5 * 3 *
6 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
@@ -8,19 +6,8 @@
8 * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()! 6 * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()!
9 */ 7 */
10 8
11#include <linux/types.h>
12#include <linux/time.h>
13#include <linux/nfs3.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <linux/dcache.h>
17#include <linux/namei.h> 9#include <linux/namei.h>
18#include <linux/mm.h> 10#include "xdr3.h"
19#include <linux/vfs.h>
20#include <linux/sunrpc/xdr.h>
21#include <linux/sunrpc/svc.h>
22#include <linux/nfsd/nfsd.h>
23#include <linux/nfsd/xdr3.h>
24#include "auth.h" 11#include "auth.h"
25 12
26#define NFSDDBG_FACILITY NFSDDBG_XDR 13#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 725d02f210e..88150685df3 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfs4acl/acl.c
3 *
4 * Common NFSv4 ACL handling code. 2 * Common NFSv4 ACL handling code.
5 * 3 *
6 * Copyright (c) 2002, 2003 The Regents of the University of Michigan. 4 * Copyright (c) 2002, 2003 The Regents of the University of Michigan.
@@ -36,15 +34,7 @@
36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */ 35 */
38 36
39#include <linux/string.h>
40#include <linux/slab.h>
41#include <linux/list.h>
42#include <linux/types.h>
43#include <linux/fs.h>
44#include <linux/module.h>
45#include <linux/nfs_fs.h> 37#include <linux/nfs_fs.h>
46#include <linux/posix_acl.h>
47#include <linux/nfs4.h>
48#include <linux/nfs4_acl.h> 38#include <linux/nfs4_acl.h>
49 39
50 40
@@ -389,7 +379,7 @@ sort_pacl(struct posix_acl *pacl)
389 sort_pacl_range(pacl, 1, i-1); 379 sort_pacl_range(pacl, 1, i-1);
390 380
391 BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); 381 BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ);
392 j = i++; 382 j = ++i;
393 while (pacl->a_entries[j].e_tag == ACL_GROUP) 383 while (pacl->a_entries[j].e_tag == ACL_GROUP)
394 j++; 384 j++;
395 sort_pacl_range(pacl, i, j-1); 385 sort_pacl_range(pacl, i, j-1);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 24e8d78f8dd..c6eed2a3b09 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfs4callback.c
3 *
4 * Copyright (c) 2001 The Regents of the University of Michigan. 2 * Copyright (c) 2001 The Regents of the University of Michigan.
5 * All rights reserved. 3 * All rights reserved.
6 * 4 *
@@ -33,22 +31,9 @@
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */ 32 */
35 33
36#include <linux/module.h>
37#include <linux/list.h>
38#include <linux/inet.h>
39#include <linux/errno.h>
40#include <linux/delay.h>
41#include <linux/sched.h>
42#include <linux/kthread.h>
43#include <linux/sunrpc/xdr.h>
44#include <linux/sunrpc/svc.h>
45#include <linux/sunrpc/clnt.h> 34#include <linux/sunrpc/clnt.h>
46#include <linux/sunrpc/svcsock.h> 35#include "nfsd.h"
47#include <linux/nfsd/nfsd.h> 36#include "state.h"
48#include <linux/nfsd/state.h>
49#include <linux/sunrpc/sched.h>
50#include <linux/nfs4.h>
51#include <linux/sunrpc/xprtsock.h>
52 37
53#define NFSDDBG_FACILITY NFSDDBG_PROC 38#define NFSDDBG_FACILITY NFSDDBG_PROC
54 39
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index ba2c199592f..6e2983b27f3 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfsd/nfs4idmap.c
3 *
4 * Mapping of UID/GIDs to name and vice versa. 2 * Mapping of UID/GIDs to name and vice versa.
5 * 3 *
6 * Copyright (c) 2002, 2003 The Regents of the University of 4 * Copyright (c) 2002, 2003 The Regents of the University of
@@ -35,22 +33,9 @@
35 */ 33 */
36 34
37#include <linux/module.h> 35#include <linux/module.h>
38#include <linux/init.h>
39
40#include <linux/mm.h>
41#include <linux/errno.h>
42#include <linux/string.h>
43#include <linux/sunrpc/clnt.h>
44#include <linux/nfs.h>
45#include <linux/nfs4.h>
46#include <linux/nfs_fs.h>
47#include <linux/nfs_page.h>
48#include <linux/sunrpc/cache.h>
49#include <linux/nfsd_idmap.h> 36#include <linux/nfsd_idmap.h>
50#include <linux/list.h>
51#include <linux/time.h>
52#include <linux/seq_file.h> 37#include <linux/seq_file.h>
53#include <linux/sunrpc/svcauth.h> 38#include <linux/sched.h>
54 39
55/* 40/*
56 * Cache entry 41 * Cache entry
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bebc0c2e1b0..37514c46984 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfsd/nfs4proc.c
3 *
4 * Server-side procedures for NFSv4. 2 * Server-side procedures for NFSv4.
5 * 3 *
6 * Copyright (c) 2002 The Regents of the University of Michigan. 4 * Copyright (c) 2002 The Regents of the University of Michigan.
@@ -34,20 +32,11 @@
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */ 34 */
37
38#include <linux/param.h>
39#include <linux/major.h>
40#include <linux/slab.h>
41#include <linux/file.h> 35#include <linux/file.h>
42 36
43#include <linux/sunrpc/svc.h> 37#include "cache.h"
44#include <linux/nfsd/nfsd.h> 38#include "xdr4.h"
45#include <linux/nfsd/cache.h> 39#include "vfs.h"
46#include <linux/nfs4.h>
47#include <linux/nfsd/state.h>
48#include <linux/nfsd/xdr4.h>
49#include <linux/nfs4_acl.h>
50#include <linux/sunrpc/gss_api.h>
51 40
52#define NFSDDBG_FACILITY NFSDDBG_PROC 41#define NFSDDBG_FACILITY NFSDDBG_PROC
53 42
@@ -170,7 +159,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
170 accmode |= NFSD_MAY_READ; 159 accmode |= NFSD_MAY_READ;
171 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 160 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
172 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC); 161 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
173 if (open->op_share_deny & NFS4_SHARE_DENY_WRITE) 162 if (open->op_share_deny & NFS4_SHARE_DENY_READ)
174 accmode |= NFSD_MAY_WRITE; 163 accmode |= NFSD_MAY_WRITE;
175 164
176 status = fh_verify(rqstp, current_fh, S_IFREG, accmode); 165 status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b5348405046..5a754f7b71e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1,6 +1,4 @@
1/* 1/*
2* linux/fs/nfsd/nfs4recover.c
3*
4* Copyright (c) 2004 The Regents of the University of Michigan. 2* Copyright (c) 2004 The Regents of the University of Michigan.
5* All rights reserved. 3* All rights reserved.
6* 4*
@@ -33,20 +31,14 @@
33* 31*
34*/ 32*/
35 33
36#include <linux/err.h>
37#include <linux/sunrpc/svc.h>
38#include <linux/nfsd/nfsd.h>
39#include <linux/nfs4.h>
40#include <linux/nfsd/state.h>
41#include <linux/nfsd/xdr4.h>
42#include <linux/param.h>
43#include <linux/file.h> 34#include <linux/file.h>
44#include <linux/namei.h> 35#include <linux/namei.h>
45#include <asm/uaccess.h>
46#include <linux/scatterlist.h>
47#include <linux/crypto.h> 36#include <linux/crypto.h>
48#include <linux/sched.h> 37#include <linux/sched.h>
49#include <linux/mount.h> 38
39#include "nfsd.h"
40#include "state.h"
41#include "vfs.h"
50 42
51#define NFSDDBG_FACILITY NFSDDBG_PROC 43#define NFSDDBG_FACILITY NFSDDBG_PROC
52 44
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2153f9bdbeb..f19ed866c95 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1,6 +1,4 @@
1/* 1/*
2* linux/fs/nfsd/nfs4state.c
3*
4* Copyright (c) 2001 The Regents of the University of Michigan. 2* Copyright (c) 2001 The Regents of the University of Michigan.
5* All rights reserved. 3* All rights reserved.
6* 4*
@@ -34,28 +32,14 @@
34* 32*
35*/ 33*/
36 34
37#include <linux/param.h>
38#include <linux/major.h>
39#include <linux/slab.h>
40
41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/cache.h>
44#include <linux/file.h> 35#include <linux/file.h>
45#include <linux/mount.h>
46#include <linux/workqueue.h>
47#include <linux/smp_lock.h> 36#include <linux/smp_lock.h>
48#include <linux/kthread.h>
49#include <linux/nfs4.h>
50#include <linux/nfsd/state.h>
51#include <linux/nfsd/xdr4.h>
52#include <linux/namei.h> 37#include <linux/namei.h>
53#include <linux/swap.h> 38#include <linux/swap.h>
54#include <linux/mutex.h>
55#include <linux/lockd/bind.h>
56#include <linux/module.h>
57#include <linux/sunrpc/svcauth_gss.h> 39#include <linux/sunrpc/svcauth_gss.h>
58#include <linux/sunrpc/clnt.h> 40#include <linux/sunrpc/clnt.h>
41#include "xdr4.h"
42#include "vfs.h"
59 43
60#define NFSDDBG_FACILITY NFSDDBG_PROC 44#define NFSDDBG_FACILITY NFSDDBG_PROC
61 45
@@ -477,13 +461,14 @@ static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
477 461
478/* 462/*
479 * fchan holds the client values on input, and the server values on output 463 * fchan holds the client values on input, and the server values on output
464 * sv_max_mesg is the maximum payload plus one page for overhead.
480 */ 465 */
481static int init_forechannel_attrs(struct svc_rqst *rqstp, 466static int init_forechannel_attrs(struct svc_rqst *rqstp,
482 struct nfsd4_channel_attrs *session_fchan, 467 struct nfsd4_channel_attrs *session_fchan,
483 struct nfsd4_channel_attrs *fchan) 468 struct nfsd4_channel_attrs *fchan)
484{ 469{
485 int status = 0; 470 int status = 0;
486 __u32 maxcount = svc_max_payload(rqstp); 471 __u32 maxcount = nfsd_serv->sv_max_mesg;
487 472
488 /* headerpadsz set to zero in encode routine */ 473 /* headerpadsz set to zero in encode routine */
489 474
@@ -523,6 +508,15 @@ free_session_slots(struct nfsd4_session *ses)
523 kfree(ses->se_slots[i]); 508 kfree(ses->se_slots[i]);
524} 509}
525 510
511/*
512 * We don't actually need to cache the rpc and session headers, so we
513 * can allocate a little less for each slot:
514 */
515static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
516{
517 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
518}
519
526static int 520static int
527alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, 521alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
528 struct nfsd4_create_session *cses) 522 struct nfsd4_create_session *cses)
@@ -554,7 +548,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
554 memcpy(new, &tmp, sizeof(*new)); 548 memcpy(new, &tmp, sizeof(*new));
555 549
556 /* allocate each struct nfsd4_slot and data cache in one piece */ 550 /* allocate each struct nfsd4_slot and data cache in one piece */
557 cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; 551 cachesize = slot_bytes(&new->se_fchannel);
558 for (i = 0; i < new->se_fchannel.maxreqs; i++) { 552 for (i = 0; i < new->se_fchannel.maxreqs; i++) {
559 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); 553 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
560 if (!sp) 554 if (!sp)
@@ -628,10 +622,12 @@ void
628free_session(struct kref *kref) 622free_session(struct kref *kref)
629{ 623{
630 struct nfsd4_session *ses; 624 struct nfsd4_session *ses;
625 int mem;
631 626
632 ses = container_of(kref, struct nfsd4_session, se_ref); 627 ses = container_of(kref, struct nfsd4_session, se_ref);
633 spin_lock(&nfsd_drc_lock); 628 spin_lock(&nfsd_drc_lock);
634 nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; 629 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
630 nfsd_drc_mem_used -= mem;
635 spin_unlock(&nfsd_drc_lock); 631 spin_unlock(&nfsd_drc_lock);
636 free_session_slots(ses); 632 free_session_slots(ses);
637 kfree(ses); 633 kfree(ses);
@@ -2404,11 +2400,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2404 2400
2405 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); 2401 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2406 2402
2407 dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n", 2403 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
2408 dp->dl_stateid.si_boot, 2404 STATEID_VAL(&dp->dl_stateid));
2409 dp->dl_stateid.si_stateownerid,
2410 dp->dl_stateid.si_fileid,
2411 dp->dl_stateid.si_generation);
2412out: 2405out:
2413 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS 2406 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
2414 && flag == NFS4_OPEN_DELEGATE_NONE 2407 && flag == NFS4_OPEN_DELEGATE_NONE
@@ -2498,9 +2491,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2498 2491
2499 status = nfs_ok; 2492 status = nfs_ok;
2500 2493
2501 dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n", 2494 dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
2502 stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, 2495 STATEID_VAL(&stp->st_stateid));
2503 stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
2504out: 2496out:
2505 if (fp) 2497 if (fp)
2506 put_nfs4_file(fp); 2498 put_nfs4_file(fp);
@@ -2666,9 +2658,8 @@ STALE_STATEID(stateid_t *stateid)
2666{ 2658{
2667 if (time_after((unsigned long)boot_time, 2659 if (time_after((unsigned long)boot_time,
2668 (unsigned long)stateid->si_boot)) { 2660 (unsigned long)stateid->si_boot)) {
2669 dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", 2661 dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
2670 stateid->si_boot, stateid->si_stateownerid, 2662 STATEID_VAL(stateid));
2671 stateid->si_fileid, stateid->si_generation);
2672 return 1; 2663 return 1;
2673 } 2664 }
2674 return 0; 2665 return 0;
@@ -2680,9 +2671,8 @@ EXPIRED_STATEID(stateid_t *stateid)
2680 if (time_before((unsigned long)boot_time, 2671 if (time_before((unsigned long)boot_time,
2681 ((unsigned long)stateid->si_boot)) && 2672 ((unsigned long)stateid->si_boot)) &&
2682 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) { 2673 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
2683 dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n", 2674 dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
2684 stateid->si_boot, stateid->si_stateownerid, 2675 STATEID_VAL(stateid));
2685 stateid->si_fileid, stateid->si_generation);
2686 return 1; 2676 return 1;
2687 } 2677 }
2688 return 0; 2678 return 0;
@@ -2696,9 +2686,8 @@ stateid_error_map(stateid_t *stateid)
2696 if (EXPIRED_STATEID(stateid)) 2686 if (EXPIRED_STATEID(stateid))
2697 return nfserr_expired; 2687 return nfserr_expired;
2698 2688
2699 dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n", 2689 dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
2700 stateid->si_boot, stateid->si_stateownerid, 2690 STATEID_VAL(stateid));
2701 stateid->si_fileid, stateid->si_generation);
2702 return nfserr_bad_stateid; 2691 return nfserr_bad_stateid;
2703} 2692}
2704 2693
@@ -2884,10 +2873,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2884 struct svc_fh *current_fh = &cstate->current_fh; 2873 struct svc_fh *current_fh = &cstate->current_fh;
2885 __be32 status; 2874 __be32 status;
2886 2875
2887 dprintk("NFSD: preprocess_seqid_op: seqid=%d " 2876 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
2888 "stateid = (%08x/%08x/%08x/%08x)\n", seqid, 2877 seqid, STATEID_VAL(stateid));
2889 stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
2890 stateid->si_generation);
2891 2878
2892 *stpp = NULL; 2879 *stpp = NULL;
2893 *sopp = NULL; 2880 *sopp = NULL;
@@ -3019,12 +3006,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3019 sop->so_confirmed = 1; 3006 sop->so_confirmed = 1;
3020 update_stateid(&stp->st_stateid); 3007 update_stateid(&stp->st_stateid);
3021 memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); 3008 memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
3022 dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " 3009 dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
3023 "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid, 3010 __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid));
3024 stp->st_stateid.si_boot,
3025 stp->st_stateid.si_stateownerid,
3026 stp->st_stateid.si_fileid,
3027 stp->st_stateid.si_generation);
3028 3011
3029 nfsd4_create_clid_dir(sop->so_client); 3012 nfsd4_create_clid_dir(sop->so_client);
3030out: 3013out:
@@ -3283,9 +3266,8 @@ find_delegation_stateid(struct inode *ino, stateid_t *stid)
3283 struct nfs4_file *fp; 3266 struct nfs4_file *fp;
3284 struct nfs4_delegation *dl; 3267 struct nfs4_delegation *dl;
3285 3268
3286 dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n", 3269 dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__,
3287 stid->si_boot, stid->si_stateownerid, 3270 STATEID_VAL(stid));
3288 stid->si_fileid, stid->si_generation);
3289 3271
3290 fp = find_file(ino); 3272 fp = find_file(ino);
3291 if (!fp) 3273 if (!fp)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0fbd50cee1f..a8587e90fd5 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,24 +40,16 @@
40 * at the end of nfs4svc_decode_compoundargs. 40 * at the end of nfs4svc_decode_compoundargs.
41 */ 41 */
42 42
43#include <linux/param.h>
44#include <linux/smp.h>
45#include <linux/fs.h>
46#include <linux/namei.h> 43#include <linux/namei.h>
47#include <linux/vfs.h> 44#include <linux/statfs.h>
48#include <linux/utsname.h> 45#include <linux/utsname.h>
49#include <linux/sunrpc/xdr.h>
50#include <linux/sunrpc/svc.h>
51#include <linux/sunrpc/clnt.h>
52#include <linux/nfsd/nfsd.h>
53#include <linux/nfsd/state.h>
54#include <linux/nfsd/xdr4.h>
55#include <linux/nfsd_idmap.h> 46#include <linux/nfsd_idmap.h>
56#include <linux/nfs4.h>
57#include <linux/nfs4_acl.h> 47#include <linux/nfs4_acl.h>
58#include <linux/sunrpc/gss_api.h>
59#include <linux/sunrpc/svcauth_gss.h> 48#include <linux/sunrpc/svcauth_gss.h>
60 49
50#include "xdr4.h"
51#include "vfs.h"
52
61#define NFSDDBG_FACILITY NFSDDBG_XDR 53#define NFSDDBG_FACILITY NFSDDBG_XDR
62 54
63/* 55/*
@@ -2204,11 +2196,14 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
2204 * we will not follow the cross mount and will fill the attribtutes 2196 * we will not follow the cross mount and will fill the attribtutes
2205 * directly from the mountpoint dentry. 2197 * directly from the mountpoint dentry.
2206 */ 2198 */
2207 if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval)) 2199 if (nfsd_mountpoint(dentry, exp)) {
2208 ignore_crossmnt = 1;
2209 else if (d_mountpoint(dentry)) {
2210 int err; 2200 int err;
2211 2201
2202 if (!(exp->ex_flags & NFSEXP_V4ROOT)
2203 && !attributes_need_mount(cd->rd_bmval)) {
2204 ignore_crossmnt = 1;
2205 goto out_encode;
2206 }
2212 /* 2207 /*
2213 * Why the heck aren't we just using nfsd_lookup?? 2208 * Why the heck aren't we just using nfsd_lookup??
2214 * Different "."/".." handling? Something else? 2209 * Different "."/".." handling? Something else?
@@ -2224,6 +2219,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
2224 goto out_put; 2219 goto out_put;
2225 2220
2226 } 2221 }
2222out_encode:
2227 nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, 2223 nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
2228 cd->rd_rqstp, ignore_crossmnt); 2224 cd->rd_rqstp, ignore_crossmnt);
2229out_put: 2225out_put:
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 4638635c5d8..da08560c481 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfscache.c
3 *
4 * Request reply cache. This is currently a global cache, but this may 2 * Request reply cache. This is currently a global cache, but this may
5 * change in the future and be a per-client cache. 3 * change in the future and be a per-client cache.
6 * 4 *
@@ -10,16 +8,8 @@
10 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 8 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
11 */ 9 */
12 10
13#include <linux/kernel.h> 11#include "nfsd.h"
14#include <linux/time.h> 12#include "cache.h"
15#include <linux/slab.h>
16#include <linux/string.h>
17#include <linux/spinlock.h>
18#include <linux/list.h>
19
20#include <linux/sunrpc/svc.h>
21#include <linux/nfsd/nfsd.h>
22#include <linux/nfsd/cache.h>
23 13
24/* Size of reply cache. Common values are: 14/* Size of reply cache. Common values are:
25 * 4.3BSD: 128 15 * 4.3BSD: 128
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5c01fc148ce..2604c3e70ea 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1,46 +1,20 @@
1/* 1/*
2 * linux/fs/nfsd/nfsctl.c
3 *
4 * Syscall interface to knfsd. 2 * Syscall interface to knfsd.
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/module.h>
10
11#include <linux/linkage.h>
12#include <linux/time.h>
13#include <linux/errno.h>
14#include <linux/fs.h>
15#include <linux/namei.h> 7#include <linux/namei.h>
16#include <linux/fcntl.h>
17#include <linux/net.h>
18#include <linux/in.h>
19#include <linux/syscalls.h>
20#include <linux/unistd.h>
21#include <linux/slab.h>
22#include <linux/proc_fs.h>
23#include <linux/seq_file.h>
24#include <linux/pagemap.h>
25#include <linux/init.h>
26#include <linux/inet.h>
27#include <linux/string.h>
28#include <linux/ctype.h> 8#include <linux/ctype.h>
29 9
30#include <linux/nfs.h>
31#include <linux/nfsd_idmap.h> 10#include <linux/nfsd_idmap.h>
32#include <linux/lockd/bind.h>
33#include <linux/sunrpc/svc.h>
34#include <linux/sunrpc/svcsock.h> 11#include <linux/sunrpc/svcsock.h>
35#include <linux/nfsd/nfsd.h>
36#include <linux/nfsd/cache.h>
37#include <linux/nfsd/xdr.h>
38#include <linux/nfsd/syscall.h> 12#include <linux/nfsd/syscall.h>
39#include <linux/lockd/lockd.h> 13#include <linux/lockd/lockd.h>
40#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
41 15
42#include <asm/uaccess.h> 16#include "nfsd.h"
43#include <net/ipv6.h> 17#include "cache.h"
44 18
45/* 19/*
46 * We have a single directory with 9 nodes in it. 20 * We have a single directory with 9 nodes in it.
@@ -55,6 +29,7 @@ enum {
55 NFSD_Getfd, 29 NFSD_Getfd,
56 NFSD_Getfs, 30 NFSD_Getfs,
57 NFSD_List, 31 NFSD_List,
32 NFSD_Export_features,
58 NFSD_Fh, 33 NFSD_Fh,
59 NFSD_FO_UnlockIP, 34 NFSD_FO_UnlockIP,
60 NFSD_FO_UnlockFS, 35 NFSD_FO_UnlockFS,
@@ -173,6 +148,24 @@ static const struct file_operations exports_operations = {
173 .owner = THIS_MODULE, 148 .owner = THIS_MODULE,
174}; 149};
175 150
151static int export_features_show(struct seq_file *m, void *v)
152{
153 seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS);
154 return 0;
155}
156
157static int export_features_open(struct inode *inode, struct file *file)
158{
159 return single_open(file, export_features_show, NULL);
160}
161
162static struct file_operations export_features_operations = {
163 .open = export_features_open,
164 .read = seq_read,
165 .llseek = seq_lseek,
166 .release = single_release,
167};
168
176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 169extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
177extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); 170extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
178 171
@@ -1330,6 +1323,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1330 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, 1323 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
1331 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, 1324 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
1332 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 1325 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
1326 [NFSD_Export_features] = {"export_features",
1327 &export_features_operations, S_IRUGO},
1333 [NFSD_FO_UnlockIP] = {"unlock_ip", 1328 [NFSD_FO_UnlockIP] = {"unlock_ip",
1334 &transaction_ops, S_IWUSR|S_IRUSR}, 1329 &transaction_ops, S_IWUSR|S_IRUSR},
1335 [NFSD_FO_UnlockFS] = {"unlock_filesystem", 1330 [NFSD_FO_UnlockFS] = {"unlock_filesystem",
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
new file mode 100644
index 00000000000..e942a1aaac9
--- /dev/null
+++ b/fs/nfsd/nfsd.h
@@ -0,0 +1,338 @@
1/*
2 * Hodge-podge collection of knfsd-related stuff.
3 * I will sort this out later.
4 *
5 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
6 */
7
8#ifndef LINUX_NFSD_NFSD_H
9#define LINUX_NFSD_NFSD_H
10
11#include <linux/types.h>
12#include <linux/mount.h>
13
14#include <linux/nfsd/debug.h>
15#include <linux/nfsd/export.h>
16#include <linux/nfsd/stats.h>
17/*
18 * nfsd version
19 */
20#define NFSD_SUPPORTED_MINOR_VERSION 1
21
22struct readdir_cd {
23 __be32 err; /* 0, nfserr, or nfserr_eof */
24};
25
26
27extern struct svc_program nfsd_program;
28extern struct svc_version nfsd_version2, nfsd_version3,
29 nfsd_version4;
30extern u32 nfsd_supported_minorversion;
31extern struct mutex nfsd_mutex;
32extern struct svc_serv *nfsd_serv;
33extern spinlock_t nfsd_drc_lock;
34extern unsigned int nfsd_drc_max_mem;
35extern unsigned int nfsd_drc_mem_used;
36
37extern const struct seq_operations nfs_exports_op;
38
39/*
40 * Function prototypes.
41 */
42int nfsd_svc(unsigned short port, int nrservs);
43int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
44
45int nfsd_nrthreads(void);
46int nfsd_nrpools(void);
47int nfsd_get_nrthreads(int n, int *);
48int nfsd_set_nrthreads(int n, int *);
49
50#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
51#ifdef CONFIG_NFSD_V2_ACL
52extern struct svc_version nfsd_acl_version2;
53#else
54#define nfsd_acl_version2 NULL
55#endif
56#ifdef CONFIG_NFSD_V3_ACL
57extern struct svc_version nfsd_acl_version3;
58#else
59#define nfsd_acl_version3 NULL
60#endif
61#endif
62
63enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
64int nfsd_vers(int vers, enum vers_op change);
65int nfsd_minorversion(u32 minorversion, enum vers_op change);
66void nfsd_reset_versions(void);
67int nfsd_create_serv(void);
68
69extern int nfsd_max_blksize;
70
71static inline int nfsd_v4client(struct svc_rqst *rq)
72{
73 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
74}
75
76/*
77 * NFSv4 State
78 */
79#ifdef CONFIG_NFSD_V4
80extern unsigned int max_delegations;
81int nfs4_state_init(void);
82void nfsd4_free_slabs(void);
83int nfs4_state_start(void);
84void nfs4_state_shutdown(void);
85time_t nfs4_lease_time(void);
86void nfs4_reset_lease(time_t leasetime);
87int nfs4_reset_recoverydir(char *recdir);
88#else
89static inline int nfs4_state_init(void) { return 0; }
90static inline void nfsd4_free_slabs(void) { }
91static inline int nfs4_state_start(void) { return 0; }
92static inline void nfs4_state_shutdown(void) { }
93static inline time_t nfs4_lease_time(void) { return 0; }
94static inline void nfs4_reset_lease(time_t leasetime) { }
95static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
96#endif
97
98/*
99 * lockd binding
100 */
101void nfsd_lockd_init(void);
102void nfsd_lockd_shutdown(void);
103
104
105/*
106 * These macros provide pre-xdr'ed values for faster operation.
107 */
108#define nfs_ok cpu_to_be32(NFS_OK)
109#define nfserr_perm cpu_to_be32(NFSERR_PERM)
110#define nfserr_noent cpu_to_be32(NFSERR_NOENT)
111#define nfserr_io cpu_to_be32(NFSERR_IO)
112#define nfserr_nxio cpu_to_be32(NFSERR_NXIO)
113#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN)
114#define nfserr_acces cpu_to_be32(NFSERR_ACCES)
115#define nfserr_exist cpu_to_be32(NFSERR_EXIST)
116#define nfserr_xdev cpu_to_be32(NFSERR_XDEV)
117#define nfserr_nodev cpu_to_be32(NFSERR_NODEV)
118#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR)
119#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR)
120#define nfserr_inval cpu_to_be32(NFSERR_INVAL)
121#define nfserr_fbig cpu_to_be32(NFSERR_FBIG)
122#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
123#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
124#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
125#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
126#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
127#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
128#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
129#define nfserr_stale cpu_to_be32(NFSERR_STALE)
130#define nfserr_remote cpu_to_be32(NFSERR_REMOTE)
131#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH)
132#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE)
133#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC)
134#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE)
135#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP)
136#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL)
137#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT)
138#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE)
139#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX)
140#define nfserr_denied cpu_to_be32(NFSERR_DENIED)
141#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK)
142#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED)
143#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE)
144#define nfserr_same cpu_to_be32(NFSERR_SAME)
145#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE)
146#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID)
147#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE)
148#define nfserr_moved cpu_to_be32(NFSERR_MOVED)
149#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE)
150#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
151#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)
152#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID)
153#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID)
154#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID)
155#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID)
156#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK)
157#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME)
158#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH)
159#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP)
160#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR)
161#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE)
162#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD)
163#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL)
164#define nfserr_grace cpu_to_be32(NFSERR_GRACE)
165#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
166#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
167#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
168#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
169#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
170#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
171#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE)
172#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT)
173#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
174#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION)
175#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT)
176#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
177#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
178#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
179#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
180#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
181#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
182#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
183#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT)
184#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
185#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
186#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS)
187#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
188#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG)
189#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
190#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
191#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
192#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
193#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
194#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
195#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
196#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
197#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
198#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
199#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION)
200#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
201#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
202#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
203#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED)
204#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE)
205#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
206#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG)
207#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT)
208#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
209
210/* error codes for internal use */
211/* if a request fails due to kmalloc failure, it gets dropped.
212 * Client should resend eventually
213 */
214#define nfserr_dropit cpu_to_be32(30000)
215/* end-of-file indicator in readdir */
216#define nfserr_eof cpu_to_be32(30001)
217/* replay detected */
218#define nfserr_replay_me cpu_to_be32(11001)
219/* nfs41 replay detected */
220#define nfserr_replay_cache cpu_to_be32(11002)
221
222/* Check for dir entries '.' and '..' */
223#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
224
225/*
226 * Time of server startup
227 */
228extern struct timeval nfssvc_boot;
229
230#ifdef CONFIG_NFSD_V4
231
232/* before processing a COMPOUND operation, we have to check that there
233 * is enough space in the buffer for XDR encode to succeed. otherwise,
234 * we might process an operation with side effects, and be unable to
235 * tell the client that the operation succeeded.
236 *
237 * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
238 * needed to encode an "ordinary" _successful_ operation. (GETATTR,
239 * READ, READDIR, and READLINK have their own buffer checks.) if we
240 * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
241 *
242 * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
243 * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
244 * care is taken to ensure that we never fall below this level for any
245 * reason.
246 */
247#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
248#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
249
250#define NFSD_LEASE_TIME (nfs4_lease_time())
251#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */
252
253/*
254 * The following attributes are currently not supported by the NFSv4 server:
255 * ARCHIVE (deprecated anyway)
256 * HIDDEN (unlikely to be supported any time soon)
257 * MIMETYPE (unlikely to be supported any time soon)
258 * QUOTA_* (will be supported in a forthcoming patch)
259 * SYSTEM (unlikely to be supported any time soon)
260 * TIME_BACKUP (unlikely to be supported any time soon)
261 * TIME_CREATE (unlikely to be supported any time soon)
262 */
263#define NFSD4_SUPPORTED_ATTRS_WORD0 \
264(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \
265 | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \
266 | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \
267 | FATTR4_WORD0_UNIQUE_HANDLES | FATTR4_WORD0_LEASE_TIME | FATTR4_WORD0_RDATTR_ERROR \
268 | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_CANSETTIME | FATTR4_WORD0_CASE_INSENSITIVE \
269 | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED \
270 | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \
271 | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS \
272 | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \
273 | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL)
274
275#define NFSD4_SUPPORTED_ATTRS_WORD1 \
276(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \
277 | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
278 | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
279 | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \
280 | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
281 | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
282
283#define NFSD4_SUPPORTED_ATTRS_WORD2 0
284
285#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
286 NFSD4_SUPPORTED_ATTRS_WORD0
287
288#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
289 NFSD4_SUPPORTED_ATTRS_WORD1
290
291#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
292 (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
293
294static inline u32 nfsd_suppattrs0(u32 minorversion)
295{
296 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
297 : NFSD4_SUPPORTED_ATTRS_WORD0;
298}
299
300static inline u32 nfsd_suppattrs1(u32 minorversion)
301{
302 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
303 : NFSD4_SUPPORTED_ATTRS_WORD1;
304}
305
306static inline u32 nfsd_suppattrs2(u32 minorversion)
307{
308 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
309 : NFSD4_SUPPORTED_ATTRS_WORD2;
310}
311
312/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
313#define NFSD_WRITEONLY_ATTRS_WORD1 \
314(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
315
316/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
317#define NFSD_WRITEABLE_ATTRS_WORD0 \
318(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL )
319#define NFSD_WRITEABLE_ATTRS_WORD1 \
320(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
321 | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
322#define NFSD_WRITEABLE_ATTRS_WORD2 0
323
324#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
325 NFSD_WRITEABLE_ATTRS_WORD0
326/*
327 * we currently store the exclusive create verifier in the v_{a,m}time
328 * attributes so the client can't set these at create time using EXCLUSIVE4_1
329 */
330#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
331 (NFSD_WRITEABLE_ATTRS_WORD1 & \
332 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
333#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
334 NFSD_WRITEABLE_ATTRS_WORD2
335
336#endif /* CONFIG_NFSD_V4 */
337
338#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 01965b2f3a7..55c8e63af0b 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfsfh.c
3 *
4 * NFS server file handle treatment. 2 * NFS server file handle treatment.
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
@@ -9,19 +7,11 @@
9 * ... and again Southern-Winter 2001 to support export_operations 7 * ... and again Southern-Winter 2001 to support export_operations
10 */ 8 */
11 9
12#include <linux/slab.h>
13#include <linux/fs.h>
14#include <linux/unistd.h>
15#include <linux/string.h>
16#include <linux/stat.h>
17#include <linux/dcache.h>
18#include <linux/exportfs.h> 10#include <linux/exportfs.h>
19#include <linux/mount.h>
20 11
21#include <linux/sunrpc/clnt.h>
22#include <linux/sunrpc/svc.h>
23#include <linux/sunrpc/svcauth_gss.h> 12#include <linux/sunrpc/svcauth_gss.h>
24#include <linux/nfsd/nfsd.h> 13#include "nfsd.h"
14#include "vfs.h"
25#include "auth.h" 15#include "auth.h"
26 16
27#define NFSDDBG_FACILITY NFSDDBG_FH 17#define NFSDDBG_FACILITY NFSDDBG_FH
@@ -96,8 +86,10 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
96static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, 86static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
97 struct svc_export *exp) 87 struct svc_export *exp)
98{ 88{
89 int flags = nfsexp_flags(rqstp, exp);
90
99 /* Check if the request originated from a secure port. */ 91 /* Check if the request originated from a secure port. */
100 if (!rqstp->rq_secure && EX_SECURE(exp)) { 92 if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
101 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 93 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
102 dprintk(KERN_WARNING 94 dprintk(KERN_WARNING
103 "nfsd: request from insecure port %s!\n", 95 "nfsd: request from insecure port %s!\n",
@@ -109,6 +101,36 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
109 return nfserrno(nfsd_setuser(rqstp, exp)); 101 return nfserrno(nfsd_setuser(rqstp, exp));
110} 102}
111 103
104static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
105 struct dentry *dentry, struct svc_export *exp)
106{
107 if (!(exp->ex_flags & NFSEXP_V4ROOT))
108 return nfs_ok;
109 /*
110 * v2/v3 clients have no need for the V4ROOT export--they use
111 * the mount protocl instead; also, further V4ROOT checks may be
112 * in v4-specific code, in which case v2/v3 clients could bypass
113 * them.
114 */
115 if (!nfsd_v4client(rqstp))
116 return nfserr_stale;
117 /*
118 * We're exposing only the directories and symlinks that have to be
119 * traversed on the way to real exports:
120 */
121 if (unlikely(!S_ISDIR(dentry->d_inode->i_mode) &&
122 !S_ISLNK(dentry->d_inode->i_mode)))
123 return nfserr_stale;
124 /*
125 * A pseudoroot export gives permission to access only one
126 * single directory; the kernel has to make another upcall
127 * before granting access to anything else under it:
128 */
129 if (unlikely(dentry != exp->ex_path.dentry))
130 return nfserr_stale;
131 return nfs_ok;
132}
133
112/* 134/*
113 * Use the given filehandle to look up the corresponding export and 135 * Use the given filehandle to look up the corresponding export and
114 * dentry. On success, the results are used to set fh_export and 136 * dentry. On success, the results are used to set fh_export and
@@ -232,14 +254,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
232 goto out; 254 goto out;
233 } 255 }
234 256
235 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
236 error = nfsd_setuser_and_check_port(rqstp, exp);
237 if (error) {
238 dput(dentry);
239 goto out;
240 }
241 }
242
243 if (S_ISDIR(dentry->d_inode->i_mode) && 257 if (S_ISDIR(dentry->d_inode->i_mode) &&
244 (dentry->d_flags & DCACHE_DISCONNECTED)) { 258 (dentry->d_flags & DCACHE_DISCONNECTED)) {
245 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", 259 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -294,28 +308,32 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
294 error = nfsd_set_fh_dentry(rqstp, fhp); 308 error = nfsd_set_fh_dentry(rqstp, fhp);
295 if (error) 309 if (error)
296 goto out; 310 goto out;
297 dentry = fhp->fh_dentry;
298 exp = fhp->fh_export;
299 } else {
300 /*
301 * just rechecking permissions
302 * (e.g. nfsproc_create calls fh_verify, then nfsd_create
303 * does as well)
304 */
305 dprintk("nfsd: fh_verify - just checking\n");
306 dentry = fhp->fh_dentry;
307 exp = fhp->fh_export;
308 /*
309 * Set user creds for this exportpoint; necessary even
310 * in the "just checking" case because this may be a
311 * filehandle that was created by fh_compose, and that
312 * is about to be used in another nfsv4 compound
313 * operation.
314 */
315 error = nfsd_setuser_and_check_port(rqstp, exp);
316 if (error)
317 goto out;
318 } 311 }
312 dentry = fhp->fh_dentry;
313 exp = fhp->fh_export;
314 /*
315 * We still have to do all these permission checks, even when
316 * fh_dentry is already set:
317 * - fh_verify may be called multiple times with different
318 * "access" arguments (e.g. nfsd_proc_create calls
319 * fh_verify(...,NFSD_MAY_EXEC) first, then later (in
320 * nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE).
321 * - in the NFSv4 case, the filehandle may have been filled
322 * in by fh_compose, and given a dentry, but further
323 * compound operations performed with that filehandle
324 * still need permissions checks. In the worst case, a
325 * mountpoint crossing may have changed the export
326 * options, and we may now need to use a different uid
327 * (for example, if different id-squashing options are in
328 * effect on the new filesystem).
329 */
330 error = check_pseudo_root(rqstp, dentry, exp);
331 if (error)
332 goto out;
333
334 error = nfsd_setuser_and_check_port(rqstp, exp);
335 if (error)
336 goto out;
319 337
320 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); 338 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
321 if (error) 339 if (error)
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
new file mode 100644
index 00000000000..cdfb8c6a420
--- /dev/null
+++ b/fs/nfsd/nfsfh.h
@@ -0,0 +1,208 @@
1/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
2
3#ifndef _LINUX_NFSD_FH_INT_H
4#define _LINUX_NFSD_FH_INT_H
5
6#include <linux/nfsd/nfsfh.h>
7
8enum nfsd_fsid {
9 FSID_DEV = 0,
10 FSID_NUM,
11 FSID_MAJOR_MINOR,
12 FSID_ENCODE_DEV,
13 FSID_UUID4_INUM,
14 FSID_UUID8,
15 FSID_UUID16,
16 FSID_UUID16_INUM,
17};
18
19enum fsid_source {
20 FSIDSOURCE_DEV,
21 FSIDSOURCE_FSID,
22 FSIDSOURCE_UUID,
23};
24extern enum fsid_source fsid_source(struct svc_fh *fhp);
25
26
27/* This might look a little large to "inline" but in all calls except
28 * one, 'vers' is constant so moste of the function disappears.
29 */
30static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
31 u32 fsid, unsigned char *uuid)
32{
33 u32 *up;
34 switch(vers) {
35 case FSID_DEV:
36 fsidv[0] = htonl((MAJOR(dev)<<16) |
37 MINOR(dev));
38 fsidv[1] = ino_t_to_u32(ino);
39 break;
40 case FSID_NUM:
41 fsidv[0] = fsid;
42 break;
43 case FSID_MAJOR_MINOR:
44 fsidv[0] = htonl(MAJOR(dev));
45 fsidv[1] = htonl(MINOR(dev));
46 fsidv[2] = ino_t_to_u32(ino);
47 break;
48
49 case FSID_ENCODE_DEV:
50 fsidv[0] = new_encode_dev(dev);
51 fsidv[1] = ino_t_to_u32(ino);
52 break;
53
54 case FSID_UUID4_INUM:
55 /* 4 byte fsid and inode number */
56 up = (u32*)uuid;
57 fsidv[0] = ino_t_to_u32(ino);
58 fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
59 break;
60
61 case FSID_UUID8:
62 /* 8 byte fsid */
63 up = (u32*)uuid;
64 fsidv[0] = up[0] ^ up[2];
65 fsidv[1] = up[1] ^ up[3];
66 break;
67
68 case FSID_UUID16:
69 /* 16 byte fsid - NFSv3+ only */
70 memcpy(fsidv, uuid, 16);
71 break;
72
73 case FSID_UUID16_INUM:
74 /* 8 byte inode and 16 byte fsid */
75 *(u64*)fsidv = (u64)ino;
76 memcpy(fsidv+2, uuid, 16);
77 break;
78 default: BUG();
79 }
80}
81
82static inline int key_len(int type)
83{
84 switch(type) {
85 case FSID_DEV: return 8;
86 case FSID_NUM: return 4;
87 case FSID_MAJOR_MINOR: return 12;
88 case FSID_ENCODE_DEV: return 8;
89 case FSID_UUID4_INUM: return 8;
90 case FSID_UUID8: return 8;
91 case FSID_UUID16: return 16;
92 case FSID_UUID16_INUM: return 24;
93 default: return 0;
94 }
95}
96
97/*
98 * Shorthand for dprintk()'s
99 */
100extern char * SVCFH_fmt(struct svc_fh *fhp);
101
102/*
103 * Function prototypes
104 */
105__be32 fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
106__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
107__be32 fh_update(struct svc_fh *);
108void fh_put(struct svc_fh *);
109
110static __inline__ struct svc_fh *
111fh_copy(struct svc_fh *dst, struct svc_fh *src)
112{
113 WARN_ON(src->fh_dentry || src->fh_locked);
114
115 *dst = *src;
116 return dst;
117}
118
119static inline void
120fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
121{
122 dst->fh_size = src->fh_size;
123 memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
124}
125
126static __inline__ struct svc_fh *
127fh_init(struct svc_fh *fhp, int maxsize)
128{
129 memset(fhp, 0, sizeof(*fhp));
130 fhp->fh_maxsize = maxsize;
131 return fhp;
132}
133
134#ifdef CONFIG_NFSD_V3
135/*
136 * Fill in the pre_op attr for the wcc data
137 */
138static inline void
139fill_pre_wcc(struct svc_fh *fhp)
140{
141 struct inode *inode;
142
143 inode = fhp->fh_dentry->d_inode;
144 if (!fhp->fh_pre_saved) {
145 fhp->fh_pre_mtime = inode->i_mtime;
146 fhp->fh_pre_ctime = inode->i_ctime;
147 fhp->fh_pre_size = inode->i_size;
148 fhp->fh_pre_change = inode->i_version;
149 fhp->fh_pre_saved = 1;
150 }
151}
152
153extern void fill_post_wcc(struct svc_fh *);
154#else
155#define fill_pre_wcc(ignored)
156#define fill_post_wcc(notused)
157#endif /* CONFIG_NFSD_V3 */
158
159
160/*
161 * Lock a file handle/inode
162 * NOTE: both fh_lock and fh_unlock are done "by hand" in
163 * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
164 * so, any changes here should be reflected there.
165 */
166
167static inline void
168fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
169{
170 struct dentry *dentry = fhp->fh_dentry;
171 struct inode *inode;
172
173 BUG_ON(!dentry);
174
175 if (fhp->fh_locked) {
176 printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
177 dentry->d_parent->d_name.name, dentry->d_name.name);
178 return;
179 }
180
181 inode = dentry->d_inode;
182 mutex_lock_nested(&inode->i_mutex, subclass);
183 fill_pre_wcc(fhp);
184 fhp->fh_locked = 1;
185}
186
187static inline void
188fh_lock(struct svc_fh *fhp)
189{
190 fh_lock_nested(fhp, I_MUTEX_NORMAL);
191}
192
193/*
194 * Unlock a file handle/inode
195 */
196static inline void
197fh_unlock(struct svc_fh *fhp)
198{
199 BUG_ON(!fhp->fh_dentry);
200
201 if (fhp->fh_locked) {
202 fill_post_wcc(fhp);
203 mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
204 fhp->fh_locked = 0;
205 }
206}
207
208#endif /* _LINUX_NFSD_FH_INT_H */
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0eb9c820b7a..a047ad6111e 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -1,29 +1,14 @@
1/* 1/*
2 * nfsproc2.c Process version 2 NFS requests.
3 * linux/fs/nfsd/nfs2proc.c
4 *
5 * Process version 2 NFS requests. 2 * Process version 2 NFS requests.
6 * 3 *
7 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
8 */ 5 */
9 6
10#include <linux/linkage.h>
11#include <linux/time.h>
12#include <linux/errno.h>
13#include <linux/fs.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/net.h>
17#include <linux/in.h>
18#include <linux/namei.h> 7#include <linux/namei.h>
19#include <linux/unistd.h>
20#include <linux/slab.h>
21 8
22#include <linux/sunrpc/clnt.h> 9#include "cache.h"
23#include <linux/sunrpc/svc.h> 10#include "xdr.h"
24#include <linux/nfsd/nfsd.h> 11#include "vfs.h"
25#include <linux/nfsd/cache.h>
26#include <linux/nfsd/xdr.h>
27 12
28typedef struct svc_rqst svc_rqst; 13typedef struct svc_rqst svc_rqst;
29typedef struct svc_buf svc_buf; 14typedef struct svc_buf svc_buf;
@@ -758,6 +743,7 @@ nfserrno (int errno)
758 { nfserr_io, -ETXTBSY }, 743 { nfserr_io, -ETXTBSY },
759 { nfserr_notsupp, -EOPNOTSUPP }, 744 { nfserr_notsupp, -EOPNOTSUPP },
760 { nfserr_toosmall, -ETOOSMALL }, 745 { nfserr_toosmall, -ETOOSMALL },
746 { nfserr_serverfault, -ESERVERFAULT },
761 }; 747 };
762 int i; 748 int i;
763 749
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 67ea83eedd4..171699eb07c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfssvc.c
3 *
4 * Central processing for nfsd. 2 * Central processing for nfsd.
5 * 3 *
6 * Authors: Olaf Kirch (okir@monad.swb.de) 4 * Authors: Olaf Kirch (okir@monad.swb.de)
@@ -8,33 +6,19 @@
8 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
9 */ 7 */
10 8
11#include <linux/module.h>
12#include <linux/sched.h> 9#include <linux/sched.h>
13#include <linux/time.h>
14#include <linux/errno.h>
15#include <linux/nfs.h>
16#include <linux/in.h>
17#include <linux/uio.h>
18#include <linux/unistd.h>
19#include <linux/slab.h>
20#include <linux/smp.h>
21#include <linux/freezer.h> 10#include <linux/freezer.h>
22#include <linux/fs_struct.h> 11#include <linux/fs_struct.h>
23#include <linux/kthread.h>
24#include <linux/swap.h> 12#include <linux/swap.h>
25 13
26#include <linux/sunrpc/types.h>
27#include <linux/sunrpc/stats.h> 14#include <linux/sunrpc/stats.h>
28#include <linux/sunrpc/svc.h>
29#include <linux/sunrpc/svcsock.h> 15#include <linux/sunrpc/svcsock.h>
30#include <linux/sunrpc/cache.h>
31#include <linux/nfsd/nfsd.h>
32#include <linux/nfsd/stats.h>
33#include <linux/nfsd/cache.h>
34#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h> 16#include <linux/lockd/bind.h>
36#include <linux/nfsacl.h> 17#include <linux/nfsacl.h>
37#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include "nfsd.h"
20#include "cache.h"
21#include "vfs.h"
38 22
39#define NFSDDBG_FACILITY NFSDDBG_SVC 23#define NFSDDBG_FACILITY NFSDDBG_SVC
40 24
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index afd08e2c90a..4ce005dbf3e 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -1,20 +1,10 @@
1/* 1/*
2 * linux/fs/nfsd/nfsxdr.c
3 *
4 * XDR support for nfsd 2 * XDR support for nfsd
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/types.h> 7#include "xdr.h"
10#include <linux/time.h>
11#include <linux/nfs.h>
12#include <linux/vfs.h>
13#include <linux/sunrpc/xdr.h>
14#include <linux/sunrpc/svc.h>
15#include <linux/nfsd/nfsd.h>
16#include <linux/nfsd/xdr.h>
17#include <linux/mm.h>
18#include "auth.h" 8#include "auth.h"
19 9
20#define NFSDDBG_FACILITY NFSDDBG_XDR 10#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
new file mode 100644
index 00000000000..fefeae27f25
--- /dev/null
+++ b/fs/nfsd/state.h
@@ -0,0 +1,408 @@
1/*
2 * Copyright (c) 2001 The Regents of the University of Michigan.
3 * All rights reserved.
4 *
5 * Kendrick Smith <kmsmith@umich.edu>
6 * Andy Adamson <andros@umich.edu>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
22 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
23 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 */
34
35#ifndef _NFSD4_STATE_H
36#define _NFSD4_STATE_H
37
38#include <linux/nfsd/nfsfh.h>
39#include "nfsfh.h"
40
41typedef struct {
42 u32 cl_boot;
43 u32 cl_id;
44} clientid_t;
45
46typedef struct {
47 u32 so_boot;
48 u32 so_stateownerid;
49 u32 so_fileid;
50} stateid_opaque_t;
51
52typedef struct {
53 u32 si_generation;
54 stateid_opaque_t si_opaque;
55} stateid_t;
56#define si_boot si_opaque.so_boot
57#define si_stateownerid si_opaque.so_stateownerid
58#define si_fileid si_opaque.so_fileid
59
60#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
61#define STATEID_VAL(s) \
62 (s)->si_boot, \
63 (s)->si_stateownerid, \
64 (s)->si_fileid, \
65 (s)->si_generation
66
67struct nfsd4_cb_sequence {
68 /* args/res */
69 u32 cbs_minorversion;
70 struct nfs4_client *cbs_clp;
71};
72
73struct nfs4_delegation {
74 struct list_head dl_perfile;
75 struct list_head dl_perclnt;
76 struct list_head dl_recall_lru; /* delegation recalled */
77 atomic_t dl_count; /* ref count */
78 struct nfs4_client *dl_client;
79 struct nfs4_file *dl_file;
80 struct file_lock *dl_flock;
81 struct file *dl_vfs_file;
82 u32 dl_type;
83 time_t dl_time;
84/* For recall: */
85 u32 dl_ident;
86 stateid_t dl_stateid;
87 struct knfsd_fh dl_fh;
88 int dl_retries;
89};
90
91/* client delegation callback info */
92struct nfs4_cb_conn {
93 /* SETCLIENTID info */
94 struct sockaddr_storage cb_addr;
95 size_t cb_addrlen;
96 u32 cb_prog;
97 u32 cb_minorversion;
98 u32 cb_ident; /* minorversion 0 only */
99 /* RPC client info */
100 atomic_t cb_set; /* successful CB_NULL call */
101 struct rpc_clnt * cb_client;
102};
103
104/* Maximum number of slots per session. 160 is useful for long haul TCP */
105#define NFSD_MAX_SLOTS_PER_SESSION 160
106/* Maximum number of operations per session compound */
107#define NFSD_MAX_OPS_PER_COMPOUND 16
108/* Maximum session per slot cache size */
109#define NFSD_SLOT_CACHE_SIZE 1024
110/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
111#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
112#define NFSD_MAX_MEM_PER_SESSION \
113 (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
114
115struct nfsd4_slot {
116 bool sl_inuse;
117 bool sl_cachethis;
118 u16 sl_opcnt;
119 u32 sl_seqid;
120 __be32 sl_status;
121 u32 sl_datalen;
122 char sl_data[];
123};
124
125struct nfsd4_channel_attrs {
126 u32 headerpadsz;
127 u32 maxreq_sz;
128 u32 maxresp_sz;
129 u32 maxresp_cached;
130 u32 maxops;
131 u32 maxreqs;
132 u32 nr_rdma_attrs;
133 u32 rdma_attrs;
134};
135
136struct nfsd4_create_session {
137 clientid_t clientid;
138 struct nfs4_sessionid sessionid;
139 u32 seqid;
140 u32 flags;
141 struct nfsd4_channel_attrs fore_channel;
142 struct nfsd4_channel_attrs back_channel;
143 u32 callback_prog;
144 u32 uid;
145 u32 gid;
146};
147
148/* The single slot clientid cache structure */
149struct nfsd4_clid_slot {
150 u32 sl_seqid;
151 __be32 sl_status;
152 struct nfsd4_create_session sl_cr_ses;
153};
154
155struct nfsd4_session {
156 struct kref se_ref;
157 struct list_head se_hash; /* hash by sessionid */
158 struct list_head se_perclnt;
159 u32 se_flags;
160 struct nfs4_client *se_client; /* for expire_client */
161 struct nfs4_sessionid se_sessionid;
162 struct nfsd4_channel_attrs se_fchannel;
163 struct nfsd4_channel_attrs se_bchannel;
164 struct nfsd4_slot *se_slots[]; /* forward channel slots */
165};
166
167static inline void
168nfsd4_put_session(struct nfsd4_session *ses)
169{
170 extern void free_session(struct kref *kref);
171 kref_put(&ses->se_ref, free_session);
172}
173
174static inline void
175nfsd4_get_session(struct nfsd4_session *ses)
176{
177 kref_get(&ses->se_ref);
178}
179
180/* formatted contents of nfs4_sessionid */
181struct nfsd4_sessionid {
182 clientid_t clientid;
183 u32 sequence;
184 u32 reserved;
185};
186
187#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
188
189/*
190 * struct nfs4_client - one per client. Clientids live here.
191 * o Each nfs4_client is hashed by clientid.
192 *
193 * o Each nfs4_clients is also hashed by name
194 * (the opaque quantity initially sent by the client to identify itself).
195 *
196 * o cl_perclient list is used to ensure no dangling stateowner references
197 * when we expire the nfs4_client
198 */
199struct nfs4_client {
200 struct list_head cl_idhash; /* hash by cl_clientid.id */
201 struct list_head cl_strhash; /* hash by cl_name */
202 struct list_head cl_openowners;
203 struct list_head cl_delegations;
204 struct list_head cl_lru; /* tail queue */
205 struct xdr_netobj cl_name; /* id generated by client */
206 char cl_recdir[HEXDIR_LEN]; /* recovery dir */
207 nfs4_verifier cl_verifier; /* generated by client */
208 time_t cl_time; /* time of last lease renewal */
209 struct sockaddr_storage cl_addr; /* client ipaddress */
210 u32 cl_flavor; /* setclientid pseudoflavor */
211 char *cl_principal; /* setclientid principal name */
212 struct svc_cred cl_cred; /* setclientid principal */
213 clientid_t cl_clientid; /* generated by server */
214 nfs4_verifier cl_confirm; /* generated by server */
215 struct nfs4_cb_conn cl_cb_conn; /* callback info */
216 atomic_t cl_count; /* ref count */
217 u32 cl_firststate; /* recovery dir creation */
218
219 /* for nfs41 */
220 struct list_head cl_sessions;
221 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
222 u32 cl_exchange_flags;
223 struct nfs4_sessionid cl_sessionid;
224
225 /* for nfs41 callbacks */
226 /* We currently support a single back channel with a single slot */
227 unsigned long cl_cb_slot_busy;
228 u32 cl_cb_seq_nr;
229 struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
230 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
231 /* wait here for slots */
232};
233
234/* struct nfs4_client_reset
235 * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
236 * upon lease reset, or from upcall to state_daemon (to read in state
237 * from non-volitile storage) upon reboot.
238 */
239struct nfs4_client_reclaim {
240 struct list_head cr_strhash; /* hash by cr_name */
241 char cr_recdir[HEXDIR_LEN]; /* recover dir */
242};
243
244static inline void
245update_stateid(stateid_t *stateid)
246{
247 stateid->si_generation++;
248}
249
250/* A reasonable value for REPLAY_ISIZE was estimated as follows:
251 * The OPEN response, typically the largest, requires
252 * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
253 * 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) +
254 * 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes
255 */
256
257#define NFSD4_REPLAY_ISIZE 112
258
259/*
260 * Replay buffer, where the result of the last seqid-mutating operation
261 * is cached.
262 */
263struct nfs4_replay {
264 __be32 rp_status;
265 unsigned int rp_buflen;
266 char *rp_buf;
267 unsigned intrp_allocated;
268 struct knfsd_fh rp_openfh;
269 char rp_ibuf[NFSD4_REPLAY_ISIZE];
270};
271
272/*
273* nfs4_stateowner can either be an open_owner, or a lock_owner
274*
275* so_idhash: stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
276* for lock_owner
277* so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
278* for lock_owner
279* so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
280* struct is reaped.
281* so_perfilestate: heads the list of nfs4_stateid (either open or lock)
282* and is used to ensure no dangling nfs4_stateid references when we
283* release a stateowner.
284* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
285* close is called to reap associated byte-range locks
286* so_close_lru: (open) stateowner is placed on this list instead of being
287* reaped (when so_perfilestate is empty) to hold the last close replay.
288* reaped by laundramat thread after lease period.
289*/
290struct nfs4_stateowner {
291 struct kref so_ref;
292 struct list_head so_idhash; /* hash by so_id */
293 struct list_head so_strhash; /* hash by op_name */
294 struct list_head so_perclient;
295 struct list_head so_stateids;
296 struct list_head so_perstateid; /* for lockowners only */
297 struct list_head so_close_lru; /* tail queue */
298 time_t so_time; /* time of placement on so_close_lru */
299 int so_is_open_owner; /* 1=openowner,0=lockowner */
300 u32 so_id;
301 struct nfs4_client * so_client;
302 /* after increment in ENCODE_SEQID_OP_TAIL, represents the next
303 * sequence id expected from the client: */
304 u32 so_seqid;
305 struct xdr_netobj so_owner; /* open owner name */
306 int so_confirmed; /* successful OPEN_CONFIRM? */
307 struct nfs4_replay so_replay;
308};
309
310/*
311* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
312* o fi_perfile list is used to search for conflicting
313* share_acces, share_deny on the file.
314*/
315struct nfs4_file {
316 atomic_t fi_ref;
317 struct list_head fi_hash; /* hash by "struct inode *" */
318 struct list_head fi_stateids;
319 struct list_head fi_delegations;
320 struct inode *fi_inode;
321 u32 fi_id; /* used with stateowner->so_id
322 * for stateid_hashtbl hash */
323 bool fi_had_conflict;
324};
325
326/*
327* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
328*
329* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
330*
331* st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
332* st_perfile: file_hashtbl[] entry.
333* st_perfile_state: nfs4_stateowner->so_perfilestate
334* st_perlockowner: (open stateid) list of lock nfs4_stateowners
335* st_access_bmap: used only for open stateid
336* st_deny_bmap: used only for open stateid
337* st_openstp: open stateid lock stateid was derived from
338*
339* XXX: open stateids and lock stateids have diverged sufficiently that
340* we should consider defining separate structs for the two cases.
341*/
342
343struct nfs4_stateid {
344 struct list_head st_hash;
345 struct list_head st_perfile;
346 struct list_head st_perstateowner;
347 struct list_head st_lockowners;
348 struct nfs4_stateowner * st_stateowner;
349 struct nfs4_file * st_file;
350 stateid_t st_stateid;
351 struct file * st_vfs_file;
352 unsigned long st_access_bmap;
353 unsigned long st_deny_bmap;
354 struct nfs4_stateid * st_openstp;
355};
356
357/* flags for preprocess_seqid_op() */
358#define HAS_SESSION 0x00000001
359#define CONFIRM 0x00000002
360#define OPEN_STATE 0x00000004
361#define LOCK_STATE 0x00000008
362#define RD_STATE 0x00000010
363#define WR_STATE 0x00000020
364#define CLOSE_STATE 0x00000040
365
366#define seqid_mutating_err(err) \
367 (((err) != nfserr_stale_clientid) && \
368 ((err) != nfserr_bad_seqid) && \
369 ((err) != nfserr_stale_stateid) && \
370 ((err) != nfserr_bad_stateid))
371
372struct nfsd4_compound_state;
373
374extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
375 stateid_t *stateid, int flags, struct file **filp);
376extern void nfs4_lock_state(void);
377extern void nfs4_unlock_state(void);
378extern int nfs4_in_grace(void);
379extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
380extern void put_nfs4_client(struct nfs4_client *clp);
381extern void nfs4_free_stateowner(struct kref *kref);
382extern int set_callback_cred(void);
383extern void nfsd4_probe_callback(struct nfs4_client *clp);
384extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
385extern void nfs4_put_delegation(struct nfs4_delegation *dp);
386extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
387extern void nfsd4_init_recdir(char *recdir_name);
388extern int nfsd4_recdir_load(void);
389extern void nfsd4_shutdown_recdir(void);
390extern int nfs4_client_to_reclaim(const char *name);
391extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
392extern void nfsd4_recdir_purge_old(void);
393extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
394extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
395
396static inline void
397nfs4_put_stateowner(struct nfs4_stateowner *so)
398{
399 kref_put(&so->so_ref, nfs4_free_stateowner);
400}
401
402static inline void
403nfs4_get_stateowner(struct nfs4_stateowner *so)
404{
405 kref_get(&so->so_ref);
406}
407
408#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 71944cddf68..5232d3e8fb2 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/stats.c
3 *
4 * procfs-based user access to knfsd statistics 2 * procfs-based user access to knfsd statistics
5 * 3 *
6 * /proc/net/rpc/nfsd 4 * /proc/net/rpc/nfsd
@@ -23,18 +21,13 @@
23 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 21 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
24 */ 22 */
25 23
26#include <linux/kernel.h>
27#include <linux/time.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h> 24#include <linux/seq_file.h>
30#include <linux/stat.h>
31#include <linux/module.h> 25#include <linux/module.h>
32
33#include <linux/sunrpc/svc.h>
34#include <linux/sunrpc/stats.h> 26#include <linux/sunrpc/stats.h>
35#include <linux/nfsd/nfsd.h>
36#include <linux/nfsd/stats.h> 27#include <linux/nfsd/stats.h>
37 28
29#include "nfsd.h"
30
38struct nfsd_stats nfsdstats; 31struct nfsd_stats nfsdstats;
39struct svc_stat nfsd_svcstats = { 32struct svc_stat nfsd_svcstats = {
40 .program = &nfsd_program, 33 .program = &nfsd_program,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a293f027326..c194793b642 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1,7 +1,5 @@
1#define MSNFS /* HACK HACK */ 1#define MSNFS /* HACK HACK */
2/* 2/*
3 * linux/fs/nfsd/vfs.c
4 *
5 * File operations used by nfsd. Some of these have been ripped from 3 * File operations used by nfsd. Some of these have been ripped from
6 * other parts of the kernel because they weren't exported, others 4 * other parts of the kernel because they weren't exported, others
7 * are partial duplicates with added or changed functionality. 5 * are partial duplicates with added or changed functionality.
@@ -16,48 +14,31 @@
16 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> 14 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
17 */ 15 */
18 16
19#include <linux/string.h>
20#include <linux/time.h>
21#include <linux/errno.h>
22#include <linux/fs.h> 17#include <linux/fs.h>
23#include <linux/file.h> 18#include <linux/file.h>
24#include <linux/mount.h>
25#include <linux/major.h>
26#include <linux/splice.h> 19#include <linux/splice.h>
27#include <linux/proc_fs.h>
28#include <linux/stat.h>
29#include <linux/fcntl.h> 20#include <linux/fcntl.h>
30#include <linux/net.h>
31#include <linux/unistd.h>
32#include <linux/slab.h>
33#include <linux/pagemap.h>
34#include <linux/in.h>
35#include <linux/module.h>
36#include <linux/namei.h> 21#include <linux/namei.h>
37#include <linux/vfs.h>
38#include <linux/delay.h> 22#include <linux/delay.h>
39#include <linux/sunrpc/svc.h>
40#include <linux/nfsd/nfsd.h>
41#ifdef CONFIG_NFSD_V3
42#include <linux/nfs3.h>
43#include <linux/nfsd/xdr3.h>
44#endif /* CONFIG_NFSD_V3 */
45#include <linux/nfsd/nfsfh.h>
46#include <linux/quotaops.h> 23#include <linux/quotaops.h>
47#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
48#include <linux/posix_acl.h>
49#include <linux/posix_acl_xattr.h> 25#include <linux/posix_acl_xattr.h>
50#include <linux/xattr.h> 26#include <linux/xattr.h>
27#include <linux/jhash.h>
28#include <linux/ima.h>
29#include <asm/uaccess.h>
30
31#ifdef CONFIG_NFSD_V3
32#include "xdr3.h"
33#endif /* CONFIG_NFSD_V3 */
34
51#ifdef CONFIG_NFSD_V4 35#ifdef CONFIG_NFSD_V4
52#include <linux/nfs4.h>
53#include <linux/nfs4_acl.h> 36#include <linux/nfs4_acl.h>
54#include <linux/nfsd_idmap.h> 37#include <linux/nfsd_idmap.h>
55#include <linux/security.h>
56#endif /* CONFIG_NFSD_V4 */ 38#endif /* CONFIG_NFSD_V4 */
57#include <linux/jhash.h>
58#include <linux/ima.h>
59 39
60#include <asm/uaccess.h> 40#include "nfsd.h"
41#include "vfs.h"
61 42
62#define NFSDDBG_FACILITY NFSDDBG_FILEOP 43#define NFSDDBG_FACILITY NFSDDBG_FILEOP
63 44
@@ -89,12 +70,6 @@ struct raparm_hbucket {
89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 70#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 71static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
91 72
92static inline int
93nfsd_v4client(struct svc_rqst *rq)
94{
95 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
96}
97
98/* 73/*
99 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 74 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
100 * a mount point. 75 * a mount point.
@@ -116,8 +91,16 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
116 91
117 exp2 = rqst_exp_get_by_name(rqstp, &path); 92 exp2 = rqst_exp_get_by_name(rqstp, &path);
118 if (IS_ERR(exp2)) { 93 if (IS_ERR(exp2)) {
119 if (PTR_ERR(exp2) != -ENOENT) 94 err = PTR_ERR(exp2);
120 err = PTR_ERR(exp2); 95 /*
96 * We normally allow NFS clients to continue
97 * "underneath" a mountpoint that is not exported.
98 * The exception is V4ROOT, where no traversal is ever
99 * allowed without an explicit export of the new
100 * directory.
101 */
102 if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
103 err = 0;
121 path_put(&path); 104 path_put(&path);
122 goto out; 105 goto out;
123 } 106 }
@@ -141,6 +124,53 @@ out:
141 return err; 124 return err;
142} 125}
143 126
127static void follow_to_parent(struct path *path)
128{
129 struct dentry *dp;
130
131 while (path->dentry == path->mnt->mnt_root && follow_up(path))
132 ;
133 dp = dget_parent(path->dentry);
134 dput(path->dentry);
135 path->dentry = dp;
136}
137
138static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
139{
140 struct svc_export *exp2;
141 struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
142 .dentry = dget(dparent)};
143
144 follow_to_parent(&path);
145
146 exp2 = rqst_exp_parent(rqstp, &path);
147 if (PTR_ERR(exp2) == -ENOENT) {
148 *dentryp = dget(dparent);
149 } else if (IS_ERR(exp2)) {
150 path_put(&path);
151 return PTR_ERR(exp2);
152 } else {
153 *dentryp = dget(path.dentry);
154 exp_put(*exp);
155 *exp = exp2;
156 }
157 path_put(&path);
158 return 0;
159}
160
161/*
162 * For nfsd purposes, we treat V4ROOT exports as though there was an
163 * export at *every* directory.
164 */
165int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
166{
167 if (d_mountpoint(dentry))
168 return 1;
169 if (!(exp->ex_flags & NFSEXP_V4ROOT))
170 return 0;
171 return dentry->d_inode != NULL;
172}
173
144__be32 174__be32
145nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, 175nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
146 const char *name, unsigned int len, 176 const char *name, unsigned int len,
@@ -169,35 +199,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
169 dentry = dget(dparent); 199 dentry = dget(dparent);
170 else if (dparent != exp->ex_path.dentry) 200 else if (dparent != exp->ex_path.dentry)
171 dentry = dget_parent(dparent); 201 dentry = dget_parent(dparent);
172 else if (!EX_NOHIDE(exp)) 202 else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
173 dentry = dget(dparent); /* .. == . just like at / */ 203 dentry = dget(dparent); /* .. == . just like at / */
174 else { 204 else {
175 /* checking mountpoint crossing is very different when stepping up */ 205 /* checking mountpoint crossing is very different when stepping up */
176 struct svc_export *exp2 = NULL; 206 host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
177 struct dentry *dp; 207 if (host_err)
178 struct path path = {.mnt = mntget(exp->ex_path.mnt),
179 .dentry = dget(dparent)};
180
181 while (path.dentry == path.mnt->mnt_root &&
182 follow_up(&path))
183 ;
184 dp = dget_parent(path.dentry);
185 dput(path.dentry);
186 path.dentry = dp;
187
188 exp2 = rqst_exp_parent(rqstp, &path);
189 if (PTR_ERR(exp2) == -ENOENT) {
190 dentry = dget(dparent);
191 } else if (IS_ERR(exp2)) {
192 host_err = PTR_ERR(exp2);
193 path_put(&path);
194 goto out_nfserr; 208 goto out_nfserr;
195 } else {
196 dentry = dget(path.dentry);
197 exp_put(exp);
198 exp = exp2;
199 }
200 path_put(&path);
201 } 209 }
202 } else { 210 } else {
203 fh_lock(fhp); 211 fh_lock(fhp);
@@ -208,7 +216,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
208 /* 216 /*
209 * check if we have crossed a mount point ... 217 * check if we have crossed a mount point ...
210 */ 218 */
211 if (d_mountpoint(dentry)) { 219 if (nfsd_mountpoint(dentry, exp)) {
212 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { 220 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
213 dput(dentry); 221 dput(dentry);
214 goto out_nfserr; 222 goto out_nfserr;
@@ -744,8 +752,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
744 flags, current_cred()); 752 flags, current_cred());
745 if (IS_ERR(*filp)) 753 if (IS_ERR(*filp))
746 host_err = PTR_ERR(*filp); 754 host_err = PTR_ERR(*filp);
747 else
748 ima_counts_get(*filp);
749out_nfserr: 755out_nfserr:
750 err = nfserrno(host_err); 756 err = nfserrno(host_err);
751out: 757out:
@@ -774,12 +780,9 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
774 int (*fsync) (struct file *, struct dentry *, int); 780 int (*fsync) (struct file *, struct dentry *, int);
775 int err; 781 int err;
776 782
777 err = filemap_fdatawrite(inode->i_mapping); 783 err = filemap_write_and_wait(inode->i_mapping);
778 if (err == 0 && fop && (fsync = fop->fsync)) 784 if (err == 0 && fop && (fsync = fop->fsync))
779 err = fsync(filp, dp, 0); 785 err = fsync(filp, dp, 0);
780 if (err == 0)
781 err = filemap_fdatawait(inode->i_mapping);
782
783 return err; 786 return err;
784} 787}
785 788
@@ -2124,8 +2127,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2124 */ 2127 */
2125 path.mnt = exp->ex_path.mnt; 2128 path.mnt = exp->ex_path.mnt;
2126 path.dentry = dentry; 2129 path.dentry = dentry;
2127 err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC), 2130 err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
2128 IMA_COUNT_LEAVE);
2129nfsd_out: 2131nfsd_out:
2130 return err? nfserrno(err) : 0; 2132 return err? nfserrno(err) : 0;
2131} 2133}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
new file mode 100644
index 00000000000..4b1de0a9ea7
--- /dev/null
+++ b/fs/nfsd/vfs.h
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
3 */
4
5#ifndef LINUX_NFSD_VFS_H
6#define LINUX_NFSD_VFS_H
7
8#include "nfsfh.h"
9
10/*
11 * Flags for nfsd_permission
12 */
13#define NFSD_MAY_NOP 0
14#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */
15#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */
16#define NFSD_MAY_READ 4 /* == MAY_READ */
17#define NFSD_MAY_SATTR 8
18#define NFSD_MAY_TRUNC 16
19#define NFSD_MAY_LOCK 32
20#define NFSD_MAY_OWNER_OVERRIDE 64
21#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
22#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
23
24#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
25#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
26
27/*
28 * Callback function for readdir
29 */
30typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
31
32/* nfsd/vfs.c */
33int fh_lock_parent(struct svc_fh *, struct dentry *);
34int nfsd_racache_init(int);
35void nfsd_racache_shutdown(void);
36int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
37 struct svc_export **expp);
38__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
39 const char *, unsigned int, struct svc_fh *);
40__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
41 const char *, unsigned int,
42 struct svc_export **, struct dentry **);
43__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
44 struct iattr *, int, time_t);
45int nfsd_mountpoint(struct dentry *, struct svc_export *);
46#ifdef CONFIG_NFSD_V4
47__be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
48 struct nfs4_acl *);
49int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
50#endif /* CONFIG_NFSD_V4 */
51__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
52 char *name, int len, struct iattr *attrs,
53 int type, dev_t rdev, struct svc_fh *res);
54#ifdef CONFIG_NFSD_V3
55__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
56__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
57 char *name, int len, struct iattr *attrs,
58 struct svc_fh *res, int createmode,
59 u32 *verifier, int *truncp, int *created);
60__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
61 loff_t, unsigned long);
62#endif /* CONFIG_NFSD_V3 */
63__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int,
64 int, struct file **);
65void nfsd_close(struct file *);
66__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
67 loff_t, struct kvec *, int, unsigned long *);
68__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
69 loff_t, struct kvec *,int, unsigned long *, int *);
70__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
71 char *, int *);
72__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
73 char *name, int len, char *path, int plen,
74 struct svc_fh *res, struct iattr *);
75__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
76 char *, int, struct svc_fh *);
77__be32 nfsd_rename(struct svc_rqst *,
78 struct svc_fh *, char *, int,
79 struct svc_fh *, char *, int);
80__be32 nfsd_remove(struct svc_rqst *,
81 struct svc_fh *, char *, int);
82__be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
83 char *name, int len);
84int nfsd_truncate(struct svc_rqst *, struct svc_fh *,
85 unsigned long size);
86__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *,
87 loff_t *, struct readdir_cd *, filldir_t);
88__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
89 struct kstatfs *, int access);
90
91int nfsd_notify_change(struct inode *, struct iattr *);
92__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
93 struct dentry *, int);
94int nfsd_sync_dir(struct dentry *dp);
95
96#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
97struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
98int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
99#endif
100
101#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
new file mode 100644
index 00000000000..53b1863dd8f
--- /dev/null
+++ b/fs/nfsd/xdr.h
@@ -0,0 +1,173 @@
1/* XDR types for nfsd. This is mainly a typing exercise. */
2
3#ifndef LINUX_NFSD_H
4#define LINUX_NFSD_H
5
6#include <linux/vfs.h>
7#include "nfsd.h"
8#include "nfsfh.h"
9
10struct nfsd_fhandle {
11 struct svc_fh fh;
12};
13
14struct nfsd_sattrargs {
15 struct svc_fh fh;
16 struct iattr attrs;
17};
18
19struct nfsd_diropargs {
20 struct svc_fh fh;
21 char * name;
22 unsigned int len;
23};
24
25struct nfsd_readargs {
26 struct svc_fh fh;
27 __u32 offset;
28 __u32 count;
29 int vlen;
30};
31
32struct nfsd_writeargs {
33 svc_fh fh;
34 __u32 offset;
35 int len;
36 int vlen;
37};
38
39struct nfsd_createargs {
40 struct svc_fh fh;
41 char * name;
42 unsigned int len;
43 struct iattr attrs;
44};
45
46struct nfsd_renameargs {
47 struct svc_fh ffh;
48 char * fname;
49 unsigned int flen;
50 struct svc_fh tfh;
51 char * tname;
52 unsigned int tlen;
53};
54
55struct nfsd_readlinkargs {
56 struct svc_fh fh;
57 char * buffer;
58};
59
60struct nfsd_linkargs {
61 struct svc_fh ffh;
62 struct svc_fh tfh;
63 char * tname;
64 unsigned int tlen;
65};
66
67struct nfsd_symlinkargs {
68 struct svc_fh ffh;
69 char * fname;
70 unsigned int flen;
71 char * tname;
72 unsigned int tlen;
73 struct iattr attrs;
74};
75
76struct nfsd_readdirargs {
77 struct svc_fh fh;
78 __u32 cookie;
79 __u32 count;
80 __be32 * buffer;
81};
82
83struct nfsd_attrstat {
84 struct svc_fh fh;
85 struct kstat stat;
86};
87
88struct nfsd_diropres {
89 struct svc_fh fh;
90 struct kstat stat;
91};
92
93struct nfsd_readlinkres {
94 int len;
95};
96
97struct nfsd_readres {
98 struct svc_fh fh;
99 unsigned long count;
100 struct kstat stat;
101};
102
103struct nfsd_readdirres {
104 int count;
105
106 struct readdir_cd common;
107 __be32 * buffer;
108 int buflen;
109 __be32 * offset;
110};
111
112struct nfsd_statfsres {
113 struct kstatfs stats;
114};
115
116/*
117 * Storage requirements for XDR arguments and results.
118 */
119union nfsd_xdrstore {
120 struct nfsd_sattrargs sattr;
121 struct nfsd_diropargs dirop;
122 struct nfsd_readargs read;
123 struct nfsd_writeargs write;
124 struct nfsd_createargs create;
125 struct nfsd_renameargs rename;
126 struct nfsd_linkargs link;
127 struct nfsd_symlinkargs symlink;
128 struct nfsd_readdirargs readdir;
129};
130
131#define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
132
133
134int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
135int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
136int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
137 struct nfsd_sattrargs *);
138int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
139 struct nfsd_diropargs *);
140int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
141 struct nfsd_readargs *);
142int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
143 struct nfsd_writeargs *);
144int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
145 struct nfsd_createargs *);
146int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
147 struct nfsd_renameargs *);
148int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
149 struct nfsd_readlinkargs *);
150int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
151 struct nfsd_linkargs *);
152int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
153 struct nfsd_symlinkargs *);
154int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
155 struct nfsd_readdirargs *);
156int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
157int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
158int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
159int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
160int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
161int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
162int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
163
164int nfssvc_encode_entry(void *, const char *name,
165 int namlen, loff_t offset, u64 ino, unsigned int);
166
167int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
168
169/* Helper functions for NFSv2 ACL code */
170__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
171__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
172
173#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
new file mode 100644
index 00000000000..7df980eb056
--- /dev/null
+++ b/fs/nfsd/xdr3.h
@@ -0,0 +1,344 @@
1/*
2 * XDR types for NFSv3 in nfsd.
3 *
4 * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
5 */
6
7#ifndef _LINUX_NFSD_XDR3_H
8#define _LINUX_NFSD_XDR3_H
9
10#include "xdr.h"
11
12struct nfsd3_sattrargs {
13 struct svc_fh fh;
14 struct iattr attrs;
15 int check_guard;
16 time_t guardtime;
17};
18
19struct nfsd3_diropargs {
20 struct svc_fh fh;
21 char * name;
22 unsigned int len;
23};
24
25struct nfsd3_accessargs {
26 struct svc_fh fh;
27 unsigned int access;
28};
29
30struct nfsd3_readargs {
31 struct svc_fh fh;
32 __u64 offset;
33 __u32 count;
34 int vlen;
35};
36
37struct nfsd3_writeargs {
38 svc_fh fh;
39 __u64 offset;
40 __u32 count;
41 int stable;
42 __u32 len;
43 int vlen;
44};
45
46struct nfsd3_createargs {
47 struct svc_fh fh;
48 char * name;
49 unsigned int len;
50 int createmode;
51 struct iattr attrs;
52 __be32 * verf;
53};
54
55struct nfsd3_mknodargs {
56 struct svc_fh fh;
57 char * name;
58 unsigned int len;
59 __u32 ftype;
60 __u32 major, minor;
61 struct iattr attrs;
62};
63
64struct nfsd3_renameargs {
65 struct svc_fh ffh;
66 char * fname;
67 unsigned int flen;
68 struct svc_fh tfh;
69 char * tname;
70 unsigned int tlen;
71};
72
73struct nfsd3_readlinkargs {
74 struct svc_fh fh;
75 char * buffer;
76};
77
78struct nfsd3_linkargs {
79 struct svc_fh ffh;
80 struct svc_fh tfh;
81 char * tname;
82 unsigned int tlen;
83};
84
85struct nfsd3_symlinkargs {
86 struct svc_fh ffh;
87 char * fname;
88 unsigned int flen;
89 char * tname;
90 unsigned int tlen;
91 struct iattr attrs;
92};
93
94struct nfsd3_readdirargs {
95 struct svc_fh fh;
96 __u64 cookie;
97 __u32 dircount;
98 __u32 count;
99 __be32 * verf;
100 __be32 * buffer;
101};
102
103struct nfsd3_commitargs {
104 struct svc_fh fh;
105 __u64 offset;
106 __u32 count;
107};
108
109struct nfsd3_getaclargs {
110 struct svc_fh fh;
111 int mask;
112};
113
114struct posix_acl;
115struct nfsd3_setaclargs {
116 struct svc_fh fh;
117 int mask;
118 struct posix_acl *acl_access;
119 struct posix_acl *acl_default;
120};
121
122struct nfsd3_attrstat {
123 __be32 status;
124 struct svc_fh fh;
125 struct kstat stat;
126};
127
128/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
129struct nfsd3_diropres {
130 __be32 status;
131 struct svc_fh dirfh;
132 struct svc_fh fh;
133};
134
135struct nfsd3_accessres {
136 __be32 status;
137 struct svc_fh fh;
138 __u32 access;
139};
140
141struct nfsd3_readlinkres {
142 __be32 status;
143 struct svc_fh fh;
144 __u32 len;
145};
146
147struct nfsd3_readres {
148 __be32 status;
149 struct svc_fh fh;
150 unsigned long count;
151 int eof;
152};
153
154struct nfsd3_writeres {
155 __be32 status;
156 struct svc_fh fh;
157 unsigned long count;
158 int committed;
159};
160
161struct nfsd3_renameres {
162 __be32 status;
163 struct svc_fh ffh;
164 struct svc_fh tfh;
165};
166
167struct nfsd3_linkres {
168 __be32 status;
169 struct svc_fh tfh;
170 struct svc_fh fh;
171};
172
173struct nfsd3_readdirres {
174 __be32 status;
175 struct svc_fh fh;
176 int count;
177 __be32 verf[2];
178
179 struct readdir_cd common;
180 __be32 * buffer;
181 int buflen;
182 __be32 * offset;
183 __be32 * offset1;
184 struct svc_rqst * rqstp;
185
186};
187
188struct nfsd3_fsstatres {
189 __be32 status;
190 struct kstatfs stats;
191 __u32 invarsec;
192};
193
194struct nfsd3_fsinfores {
195 __be32 status;
196 __u32 f_rtmax;
197 __u32 f_rtpref;
198 __u32 f_rtmult;
199 __u32 f_wtmax;
200 __u32 f_wtpref;
201 __u32 f_wtmult;
202 __u32 f_dtpref;
203 __u64 f_maxfilesize;
204 __u32 f_properties;
205};
206
207struct nfsd3_pathconfres {
208 __be32 status;
209 __u32 p_link_max;
210 __u32 p_name_max;
211 __u32 p_no_trunc;
212 __u32 p_chown_restricted;
213 __u32 p_case_insensitive;
214 __u32 p_case_preserving;
215};
216
217struct nfsd3_commitres {
218 __be32 status;
219 struct svc_fh fh;
220};
221
222struct nfsd3_getaclres {
223 __be32 status;
224 struct svc_fh fh;
225 int mask;
226 struct posix_acl *acl_access;
227 struct posix_acl *acl_default;
228};
229
230/* dummy type for release */
231struct nfsd3_fhandle_pair {
232 __u32 dummy;
233 struct svc_fh fh1;
234 struct svc_fh fh2;
235};
236
237/*
238 * Storage requirements for XDR arguments and results.
239 */
240union nfsd3_xdrstore {
241 struct nfsd3_sattrargs sattrargs;
242 struct nfsd3_diropargs diropargs;
243 struct nfsd3_readargs readargs;
244 struct nfsd3_writeargs writeargs;
245 struct nfsd3_createargs createargs;
246 struct nfsd3_renameargs renameargs;
247 struct nfsd3_linkargs linkargs;
248 struct nfsd3_symlinkargs symlinkargs;
249 struct nfsd3_readdirargs readdirargs;
250 struct nfsd3_diropres diropres;
251 struct nfsd3_accessres accessres;
252 struct nfsd3_readlinkres readlinkres;
253 struct nfsd3_readres readres;
254 struct nfsd3_writeres writeres;
255 struct nfsd3_renameres renameres;
256 struct nfsd3_linkres linkres;
257 struct nfsd3_readdirres readdirres;
258 struct nfsd3_fsstatres fsstatres;
259 struct nfsd3_fsinfores fsinfores;
260 struct nfsd3_pathconfres pathconfres;
261 struct nfsd3_commitres commitres;
262 struct nfsd3_getaclres getaclres;
263};
264
265#define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
266
267int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
268int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
269 struct nfsd3_sattrargs *);
270int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
271 struct nfsd3_diropargs *);
272int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
273 struct nfsd3_accessargs *);
274int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
275 struct nfsd3_readargs *);
276int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
277 struct nfsd3_writeargs *);
278int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
279 struct nfsd3_createargs *);
280int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
281 struct nfsd3_createargs *);
282int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
283 struct nfsd3_mknodargs *);
284int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
285 struct nfsd3_renameargs *);
286int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
287 struct nfsd3_readlinkargs *);
288int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
289 struct nfsd3_linkargs *);
290int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
291 struct nfsd3_symlinkargs *);
292int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
293 struct nfsd3_readdirargs *);
294int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
295 struct nfsd3_readdirargs *);
296int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
297 struct nfsd3_commitargs *);
298int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
299int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
300 struct nfsd3_attrstat *);
301int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
302 struct nfsd3_attrstat *);
303int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
304 struct nfsd3_diropres *);
305int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
306 struct nfsd3_accessres *);
307int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
308 struct nfsd3_readlinkres *);
309int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
310int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
311int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
312 struct nfsd3_diropres *);
313int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
314 struct nfsd3_renameres *);
315int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
316 struct nfsd3_linkres *);
317int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
318 struct nfsd3_readdirres *);
319int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
320 struct nfsd3_fsstatres *);
321int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
322 struct nfsd3_fsinfores *);
323int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
324 struct nfsd3_pathconfres *);
325int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
326 struct nfsd3_commitres *);
327
328int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
329 struct nfsd3_attrstat *);
330int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
331 struct nfsd3_fhandle_pair *);
332int nfs3svc_encode_entry(void *, const char *name,
333 int namlen, loff_t offset, u64 ino,
334 unsigned int);
335int nfs3svc_encode_entry_plus(void *, const char *name,
336 int namlen, loff_t offset, u64 ino,
337 unsigned int);
338/* Helper functions for NFSv3 ACL code */
339__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
340 struct svc_fh *fhp);
341__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
342
343
344#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
new file mode 100644
index 00000000000..efa33773953
--- /dev/null
+++ b/fs/nfsd/xdr4.h
@@ -0,0 +1,562 @@
1/*
2 * Server-side types for NFSv4.
3 *
4 * Copyright (c) 2002 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Kendrick Smith <kmsmith@umich.edu>
8 * Andy Adamson <andros@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#ifndef _LINUX_NFSD_XDR4_H
38#define _LINUX_NFSD_XDR4_H
39
40#include "state.h"
41#include "nfsd.h"
42
43#define NFSD4_MAX_TAGLEN 128
44#define XDR_LEN(n) (((n) + 3) & ~3)
45
46struct nfsd4_compound_state {
47 struct svc_fh current_fh;
48 struct svc_fh save_fh;
49 struct nfs4_stateowner *replay_owner;
50 /* For sessions DRC */
51 struct nfsd4_session *session;
52 struct nfsd4_slot *slot;
53 __be32 *datap;
54 size_t iovlen;
55 u32 minorversion;
56 u32 status;
57};
58
59static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
60{
61 return cs->slot != NULL;
62}
63
64struct nfsd4_change_info {
65 u32 atomic;
66 bool change_supported;
67 u32 before_ctime_sec;
68 u32 before_ctime_nsec;
69 u64 before_change;
70 u32 after_ctime_sec;
71 u32 after_ctime_nsec;
72 u64 after_change;
73};
74
75struct nfsd4_access {
76 u32 ac_req_access; /* request */
77 u32 ac_supported; /* response */
78 u32 ac_resp_access; /* response */
79};
80
81struct nfsd4_close {
82 u32 cl_seqid; /* request */
83 stateid_t cl_stateid; /* request+response */
84 struct nfs4_stateowner * cl_stateowner; /* response */
85};
86
87struct nfsd4_commit {
88 u64 co_offset; /* request */
89 u32 co_count; /* request */
90 nfs4_verifier co_verf; /* response */
91};
92
93struct nfsd4_create {
94 u32 cr_namelen; /* request */
95 char * cr_name; /* request */
96 u32 cr_type; /* request */
97 union { /* request */
98 struct {
99 u32 namelen;
100 char *name;
101 } link; /* NF4LNK */
102 struct {
103 u32 specdata1;
104 u32 specdata2;
105 } dev; /* NF4BLK, NF4CHR */
106 } u;
107 u32 cr_bmval[3]; /* request */
108 struct iattr cr_iattr; /* request */
109 struct nfsd4_change_info cr_cinfo; /* response */
110 struct nfs4_acl *cr_acl;
111};
112#define cr_linklen u.link.namelen
113#define cr_linkname u.link.name
114#define cr_specdata1 u.dev.specdata1
115#define cr_specdata2 u.dev.specdata2
116
117struct nfsd4_delegreturn {
118 stateid_t dr_stateid;
119};
120
121struct nfsd4_getattr {
122 u32 ga_bmval[3]; /* request */
123 struct svc_fh *ga_fhp; /* response */
124};
125
126struct nfsd4_link {
127 u32 li_namelen; /* request */
128 char * li_name; /* request */
129 struct nfsd4_change_info li_cinfo; /* response */
130};
131
132struct nfsd4_lock_denied {
133 clientid_t ld_clientid;
134 struct nfs4_stateowner *ld_sop;
135 u64 ld_start;
136 u64 ld_length;
137 u32 ld_type;
138};
139
140struct nfsd4_lock {
141 /* request */
142 u32 lk_type;
143 u32 lk_reclaim; /* boolean */
144 u64 lk_offset;
145 u64 lk_length;
146 u32 lk_is_new;
147 union {
148 struct {
149 u32 open_seqid;
150 stateid_t open_stateid;
151 u32 lock_seqid;
152 clientid_t clientid;
153 struct xdr_netobj owner;
154 } new;
155 struct {
156 stateid_t lock_stateid;
157 u32 lock_seqid;
158 } old;
159 } v;
160
161 /* response */
162 union {
163 struct {
164 stateid_t stateid;
165 } ok;
166 struct nfsd4_lock_denied denied;
167 } u;
168 /* The lk_replay_owner is the open owner in the open_to_lock_owner
169 * case and the lock owner otherwise: */
170 struct nfs4_stateowner *lk_replay_owner;
171};
172#define lk_new_open_seqid v.new.open_seqid
173#define lk_new_open_stateid v.new.open_stateid
174#define lk_new_lock_seqid v.new.lock_seqid
175#define lk_new_clientid v.new.clientid
176#define lk_new_owner v.new.owner
177#define lk_old_lock_stateid v.old.lock_stateid
178#define lk_old_lock_seqid v.old.lock_seqid
179
180#define lk_rflags u.ok.rflags
181#define lk_resp_stateid u.ok.stateid
182#define lk_denied u.denied
183
184
185struct nfsd4_lockt {
186 u32 lt_type;
187 clientid_t lt_clientid;
188 struct xdr_netobj lt_owner;
189 u64 lt_offset;
190 u64 lt_length;
191 struct nfs4_stateowner * lt_stateowner;
192 struct nfsd4_lock_denied lt_denied;
193};
194
195
196struct nfsd4_locku {
197 u32 lu_type;
198 u32 lu_seqid;
199 stateid_t lu_stateid;
200 u64 lu_offset;
201 u64 lu_length;
202 struct nfs4_stateowner *lu_stateowner;
203};
204
205
206struct nfsd4_lookup {
207 u32 lo_len; /* request */
208 char * lo_name; /* request */
209};
210
211struct nfsd4_putfh {
212 u32 pf_fhlen; /* request */
213 char *pf_fhval; /* request */
214};
215
216struct nfsd4_open {
217 u32 op_claim_type; /* request */
218 struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
219 u32 op_delegate_type; /* request - CLAIM_PREV only */
220 stateid_t op_delegate_stateid; /* request - response */
221 u32 op_create; /* request */
222 u32 op_createmode; /* request */
223 u32 op_bmval[3]; /* request */
224 struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
225 nfs4_verifier verf; /* EXCLUSIVE4 */
226 clientid_t op_clientid; /* request */
227 struct xdr_netobj op_owner; /* request */
228 u32 op_seqid; /* request */
229 u32 op_share_access; /* request */
230 u32 op_share_deny; /* request */
231 stateid_t op_stateid; /* response */
232 u32 op_recall; /* recall */
233 struct nfsd4_change_info op_cinfo; /* response */
234 u32 op_rflags; /* response */
235 int op_truncate; /* used during processing */
236 struct nfs4_stateowner *op_stateowner; /* used during processing */
237 struct nfs4_acl *op_acl;
238};
239#define op_iattr iattr
240#define op_verf verf
241
242struct nfsd4_open_confirm {
243 stateid_t oc_req_stateid /* request */;
244 u32 oc_seqid /* request */;
245 stateid_t oc_resp_stateid /* response */;
246 struct nfs4_stateowner * oc_stateowner; /* response */
247};
248
249struct nfsd4_open_downgrade {
250 stateid_t od_stateid;
251 u32 od_seqid;
252 u32 od_share_access;
253 u32 od_share_deny;
254 struct nfs4_stateowner *od_stateowner;
255};
256
257
258struct nfsd4_read {
259 stateid_t rd_stateid; /* request */
260 u64 rd_offset; /* request */
261 u32 rd_length; /* request */
262 int rd_vlen;
263 struct file *rd_filp;
264
265 struct svc_rqst *rd_rqstp; /* response */
266 struct svc_fh * rd_fhp; /* response */
267};
268
269struct nfsd4_readdir {
270 u64 rd_cookie; /* request */
271 nfs4_verifier rd_verf; /* request */
272 u32 rd_dircount; /* request */
273 u32 rd_maxcount; /* request */
274 u32 rd_bmval[3]; /* request */
275 struct svc_rqst *rd_rqstp; /* response */
276 struct svc_fh * rd_fhp; /* response */
277
278 struct readdir_cd common;
279 __be32 * buffer;
280 int buflen;
281 __be32 * offset;
282};
283
284struct nfsd4_release_lockowner {
285 clientid_t rl_clientid;
286 struct xdr_netobj rl_owner;
287};
288struct nfsd4_readlink {
289 struct svc_rqst *rl_rqstp; /* request */
290 struct svc_fh * rl_fhp; /* request */
291};
292
293struct nfsd4_remove {
294 u32 rm_namelen; /* request */
295 char * rm_name; /* request */
296 struct nfsd4_change_info rm_cinfo; /* response */
297};
298
299struct nfsd4_rename {
300 u32 rn_snamelen; /* request */
301 char * rn_sname; /* request */
302 u32 rn_tnamelen; /* request */
303 char * rn_tname; /* request */
304 struct nfsd4_change_info rn_sinfo; /* response */
305 struct nfsd4_change_info rn_tinfo; /* response */
306};
307
308struct nfsd4_secinfo {
309 u32 si_namelen; /* request */
310 char *si_name; /* request */
311 struct svc_export *si_exp; /* response */
312};
313
314struct nfsd4_setattr {
315 stateid_t sa_stateid; /* request */
316 u32 sa_bmval[3]; /* request */
317 struct iattr sa_iattr; /* request */
318 struct nfs4_acl *sa_acl;
319};
320
321struct nfsd4_setclientid {
322 nfs4_verifier se_verf; /* request */
323 u32 se_namelen; /* request */
324 char * se_name; /* request */
325 u32 se_callback_prog; /* request */
326 u32 se_callback_netid_len; /* request */
327 char * se_callback_netid_val; /* request */
328 u32 se_callback_addr_len; /* request */
329 char * se_callback_addr_val; /* request */
330 u32 se_callback_ident; /* request */
331 clientid_t se_clientid; /* response */
332 nfs4_verifier se_confirm; /* response */
333};
334
335struct nfsd4_setclientid_confirm {
336 clientid_t sc_clientid;
337 nfs4_verifier sc_confirm;
338};
339
340/* also used for NVERIFY */
341struct nfsd4_verify {
342 u32 ve_bmval[3]; /* request */
343 u32 ve_attrlen; /* request */
344 char * ve_attrval; /* request */
345};
346
347struct nfsd4_write {
348 stateid_t wr_stateid; /* request */
349 u64 wr_offset; /* request */
350 u32 wr_stable_how; /* request */
351 u32 wr_buflen; /* request */
352 int wr_vlen;
353
354 u32 wr_bytes_written; /* response */
355 u32 wr_how_written; /* response */
356 nfs4_verifier wr_verifier; /* response */
357};
358
359struct nfsd4_exchange_id {
360 nfs4_verifier verifier;
361 struct xdr_netobj clname;
362 u32 flags;
363 clientid_t clientid;
364 u32 seqid;
365 int spa_how;
366};
367
368struct nfsd4_sequence {
369 struct nfs4_sessionid sessionid; /* request/response */
370 u32 seqid; /* request/response */
371 u32 slotid; /* request/response */
372 u32 maxslots; /* request/response */
373 u32 cachethis; /* request */
374#if 0
375 u32 target_maxslots; /* response */
376 u32 status_flags; /* response */
377#endif /* not yet */
378};
379
380struct nfsd4_destroy_session {
381 struct nfs4_sessionid sessionid;
382};
383
384struct nfsd4_op {
385 int opnum;
386 __be32 status;
387 union {
388 struct nfsd4_access access;
389 struct nfsd4_close close;
390 struct nfsd4_commit commit;
391 struct nfsd4_create create;
392 struct nfsd4_delegreturn delegreturn;
393 struct nfsd4_getattr getattr;
394 struct svc_fh * getfh;
395 struct nfsd4_link link;
396 struct nfsd4_lock lock;
397 struct nfsd4_lockt lockt;
398 struct nfsd4_locku locku;
399 struct nfsd4_lookup lookup;
400 struct nfsd4_verify nverify;
401 struct nfsd4_open open;
402 struct nfsd4_open_confirm open_confirm;
403 struct nfsd4_open_downgrade open_downgrade;
404 struct nfsd4_putfh putfh;
405 struct nfsd4_read read;
406 struct nfsd4_readdir readdir;
407 struct nfsd4_readlink readlink;
408 struct nfsd4_remove remove;
409 struct nfsd4_rename rename;
410 clientid_t renew;
411 struct nfsd4_secinfo secinfo;
412 struct nfsd4_setattr setattr;
413 struct nfsd4_setclientid setclientid;
414 struct nfsd4_setclientid_confirm setclientid_confirm;
415 struct nfsd4_verify verify;
416 struct nfsd4_write write;
417 struct nfsd4_release_lockowner release_lockowner;
418
419 /* NFSv4.1 */
420 struct nfsd4_exchange_id exchange_id;
421 struct nfsd4_create_session create_session;
422 struct nfsd4_destroy_session destroy_session;
423 struct nfsd4_sequence sequence;
424 } u;
425 struct nfs4_replay * replay;
426};
427
428struct nfsd4_compoundargs {
429 /* scratch variables for XDR decode */
430 __be32 * p;
431 __be32 * end;
432 struct page ** pagelist;
433 int pagelen;
434 __be32 tmp[8];
435 __be32 * tmpp;
436 struct tmpbuf {
437 struct tmpbuf *next;
438 void (*release)(const void *);
439 void *buf;
440 } *to_free;
441
442 struct svc_rqst *rqstp;
443
444 u32 taglen;
445 char * tag;
446 u32 minorversion;
447 u32 opcnt;
448 struct nfsd4_op *ops;
449 struct nfsd4_op iops[8];
450};
451
452struct nfsd4_compoundres {
453 /* scratch variables for XDR encode */
454 __be32 * p;
455 __be32 * end;
456 struct xdr_buf * xbuf;
457 struct svc_rqst * rqstp;
458
459 u32 taglen;
460 char * tag;
461 u32 opcnt;
462 __be32 * tagp; /* tag, opcount encode location */
463 struct nfsd4_compound_state cstate;
464};
465
466static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
467{
468 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
469 return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
470}
471
472static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
473{
474 return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
475}
476
477#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
478
479static inline void
480set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
481{
482 BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
483 cinfo->atomic = 1;
484 cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
485 if (cinfo->change_supported) {
486 cinfo->before_change = fhp->fh_pre_change;
487 cinfo->after_change = fhp->fh_post_change;
488 } else {
489 cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
490 cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
491 cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
492 cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
493 }
494}
495
496int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
497int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
498 struct nfsd4_compoundargs *);
499int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
500 struct nfsd4_compoundres *);
501void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
502void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
503__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
504 struct dentry *dentry, __be32 *buffer, int *countp,
505 u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
506extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
507 struct nfsd4_compound_state *,
508 struct nfsd4_setclientid *setclid);
509extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
510 struct nfsd4_compound_state *,
511 struct nfsd4_setclientid_confirm *setclientid_confirm);
512extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
513extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
514 struct nfsd4_sequence *seq);
515extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
516 struct nfsd4_compound_state *,
517struct nfsd4_exchange_id *);
518 extern __be32 nfsd4_create_session(struct svc_rqst *,
519 struct nfsd4_compound_state *,
520 struct nfsd4_create_session *);
521extern __be32 nfsd4_sequence(struct svc_rqst *,
522 struct nfsd4_compound_state *,
523 struct nfsd4_sequence *);
524extern __be32 nfsd4_destroy_session(struct svc_rqst *,
525 struct nfsd4_compound_state *,
526 struct nfsd4_destroy_session *);
527extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
528 struct nfsd4_open *open);
529extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
530 struct svc_fh *current_fh, struct nfsd4_open *open);
531extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
532 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
533extern __be32 nfsd4_close(struct svc_rqst *rqstp,
534 struct nfsd4_compound_state *,
535 struct nfsd4_close *close);
536extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
537 struct nfsd4_compound_state *,
538 struct nfsd4_open_downgrade *od);
539extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
540 struct nfsd4_lock *lock);
541extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
542 struct nfsd4_compound_state *,
543 struct nfsd4_lockt *lockt);
544extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
545 struct nfsd4_compound_state *,
546 struct nfsd4_locku *locku);
547extern __be32
548nfsd4_release_lockowner(struct svc_rqst *rqstp,
549 struct nfsd4_compound_state *,
550 struct nfsd4_release_lockowner *rlockowner);
551extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
552extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
553 struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
554extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
555 struct nfsd4_compound_state *, clientid_t *clid);
556#endif
557
558/*
559 * Local variables:
560 * c-basic-offset: 8
561 * End:
562 */
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index d69e6ae5925..3f959f1879d 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -142,29 +142,75 @@ static void nilfs_palloc_desc_block_init(struct inode *inode,
142 } 142 }
143} 143}
144 144
145static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
146 int create,
147 void (*init_block)(struct inode *,
148 struct buffer_head *,
149 void *),
150 struct buffer_head **bhp,
151 struct nilfs_bh_assoc *prev,
152 spinlock_t *lock)
153{
154 int ret;
155
156 spin_lock(lock);
157 if (prev->bh && blkoff == prev->blkoff) {
158 get_bh(prev->bh);
159 *bhp = prev->bh;
160 spin_unlock(lock);
161 return 0;
162 }
163 spin_unlock(lock);
164
165 ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp);
166 if (!ret) {
167 spin_lock(lock);
168 /*
169 * The following code must be safe for change of the
170 * cache contents during the get block call.
171 */
172 brelse(prev->bh);
173 get_bh(*bhp);
174 prev->bh = *bhp;
175 prev->blkoff = blkoff;
176 spin_unlock(lock);
177 }
178 return ret;
179}
180
145static int nilfs_palloc_get_desc_block(struct inode *inode, 181static int nilfs_palloc_get_desc_block(struct inode *inode,
146 unsigned long group, 182 unsigned long group,
147 int create, struct buffer_head **bhp) 183 int create, struct buffer_head **bhp)
148{ 184{
149 return nilfs_mdt_get_block(inode, 185 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
150 nilfs_palloc_desc_blkoff(inode, group), 186
151 create, nilfs_palloc_desc_block_init, bhp); 187 return nilfs_palloc_get_block(inode,
188 nilfs_palloc_desc_blkoff(inode, group),
189 create, nilfs_palloc_desc_block_init,
190 bhp, &cache->prev_desc, &cache->lock);
152} 191}
153 192
154static int nilfs_palloc_get_bitmap_block(struct inode *inode, 193static int nilfs_palloc_get_bitmap_block(struct inode *inode,
155 unsigned long group, 194 unsigned long group,
156 int create, struct buffer_head **bhp) 195 int create, struct buffer_head **bhp)
157{ 196{
158 return nilfs_mdt_get_block(inode, 197 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
159 nilfs_palloc_bitmap_blkoff(inode, group), 198
160 create, NULL, bhp); 199 return nilfs_palloc_get_block(inode,
200 nilfs_palloc_bitmap_blkoff(inode, group),
201 create, NULL, bhp,
202 &cache->prev_bitmap, &cache->lock);
161} 203}
162 204
163int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, 205int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
164 int create, struct buffer_head **bhp) 206 int create, struct buffer_head **bhp)
165{ 207{
166 return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr), 208 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
167 create, NULL, bhp); 209
210 return nilfs_palloc_get_block(inode,
211 nilfs_palloc_entry_blkoff(inode, nr),
212 create, NULL, bhp,
213 &cache->prev_entry, &cache->lock);
168} 214}
169 215
170static struct nilfs_palloc_group_desc * 216static struct nilfs_palloc_group_desc *
@@ -176,13 +222,6 @@ nilfs_palloc_block_get_group_desc(const struct inode *inode,
176 group % nilfs_palloc_groups_per_desc_block(inode); 222 group % nilfs_palloc_groups_per_desc_block(inode);
177} 223}
178 224
179static unsigned char *
180nilfs_palloc_block_get_bitmap(const struct inode *inode,
181 const struct buffer_head *bh, void *kaddr)
182{
183 return (unsigned char *)(kaddr + bh_offset(bh));
184}
185
186void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, 225void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
187 const struct buffer_head *bh, void *kaddr) 226 const struct buffer_head *bh, void *kaddr)
188{ 227{
@@ -289,8 +328,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
289 if (ret < 0) 328 if (ret < 0)
290 goto out_desc; 329 goto out_desc;
291 bitmap_kaddr = kmap(bitmap_bh->b_page); 330 bitmap_kaddr = kmap(bitmap_bh->b_page);
292 bitmap = nilfs_palloc_block_get_bitmap( 331 bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
293 inode, bitmap_bh, bitmap_kaddr);
294 pos = nilfs_palloc_find_available_slot( 332 pos = nilfs_palloc_find_available_slot(
295 inode, group, group_offset, bitmap, 333 inode, group, group_offset, bitmap,
296 entries_per_group); 334 entries_per_group);
@@ -351,8 +389,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
351 desc = nilfs_palloc_block_get_group_desc(inode, group, 389 desc = nilfs_palloc_block_get_group_desc(inode, group,
352 req->pr_desc_bh, desc_kaddr); 390 req->pr_desc_bh, desc_kaddr);
353 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); 391 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
354 bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, 392 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
355 bitmap_kaddr);
356 393
357 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), 394 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
358 group_offset, bitmap)) 395 group_offset, bitmap))
@@ -385,8 +422,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
385 desc = nilfs_palloc_block_get_group_desc(inode, group, 422 desc = nilfs_palloc_block_get_group_desc(inode, group,
386 req->pr_desc_bh, desc_kaddr); 423 req->pr_desc_bh, desc_kaddr);
387 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); 424 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
388 bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, 425 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
389 bitmap_kaddr);
390 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), 426 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
391 group_offset, bitmap)) 427 group_offset, bitmap))
392 printk(KERN_WARNING "%s: entry numer %llu already freed\n", 428 printk(KERN_WARNING "%s: entry numer %llu already freed\n",
@@ -472,8 +508,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
472 desc = nilfs_palloc_block_get_group_desc( 508 desc = nilfs_palloc_block_get_group_desc(
473 inode, group, desc_bh, desc_kaddr); 509 inode, group, desc_bh, desc_kaddr);
474 bitmap_kaddr = kmap(bitmap_bh->b_page); 510 bitmap_kaddr = kmap(bitmap_bh->b_page);
475 bitmap = nilfs_palloc_block_get_bitmap( 511 bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
476 inode, bitmap_bh, bitmap_kaddr);
477 for (j = i, n = 0; 512 for (j = i, n = 0;
478 (j < nitems) && nilfs_palloc_group_is_in(inode, group, 513 (j < nitems) && nilfs_palloc_group_is_in(inode, group,
479 entry_nrs[j]); 514 entry_nrs[j]);
@@ -502,3 +537,30 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
502 } 537 }
503 return 0; 538 return 0;
504} 539}
540
541void nilfs_palloc_setup_cache(struct inode *inode,
542 struct nilfs_palloc_cache *cache)
543{
544 NILFS_MDT(inode)->mi_palloc_cache = cache;
545 spin_lock_init(&cache->lock);
546}
547
548void nilfs_palloc_clear_cache(struct inode *inode)
549{
550 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
551
552 spin_lock(&cache->lock);
553 brelse(cache->prev_desc.bh);
554 brelse(cache->prev_bitmap.bh);
555 brelse(cache->prev_entry.bh);
556 cache->prev_desc.bh = NULL;
557 cache->prev_bitmap.bh = NULL;
558 cache->prev_entry.bh = NULL;
559 spin_unlock(&cache->lock);
560}
561
562void nilfs_palloc_destroy_cache(struct inode *inode)
563{
564 nilfs_palloc_clear_cache(inode);
565 NILFS_MDT(inode)->mi_palloc_cache = NULL;
566}
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 4ace5475c2c..f4543ac4f56 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -69,4 +69,25 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
69#define nilfs_clear_bit_atomic ext2_clear_bit_atomic 69#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
70#define nilfs_find_next_zero_bit ext2_find_next_zero_bit 70#define nilfs_find_next_zero_bit ext2_find_next_zero_bit
71 71
72/*
73 * persistent object allocator cache
74 */
75
76struct nilfs_bh_assoc {
77 unsigned long blkoff;
78 struct buffer_head *bh;
79};
80
81struct nilfs_palloc_cache {
82 spinlock_t lock;
83 struct nilfs_bh_assoc prev_desc;
84 struct nilfs_bh_assoc prev_bitmap;
85 struct nilfs_bh_assoc prev_entry;
86};
87
88void nilfs_palloc_setup_cache(struct inode *inode,
89 struct nilfs_palloc_cache *cache);
90void nilfs_palloc_clear_cache(struct inode *inode);
91void nilfs_palloc_destroy_cache(struct inode *inode);
92
72#endif /* _NILFS_ALLOC_H */ 93#endif /* _NILFS_ALLOC_H */
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 08834df6ec6..effdbdbe6c1 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -402,19 +402,11 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
402void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) 402void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n)
403{ 403{
404 inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); 404 inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
405 if (NILFS_MDT(bmap->b_inode))
406 nilfs_mdt_mark_dirty(bmap->b_inode);
407 else
408 mark_inode_dirty(bmap->b_inode);
409} 405}
410 406
411void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) 407void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n)
412{ 408{
413 inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); 409 inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
414 if (NILFS_MDT(bmap->b_inode))
415 nilfs_mdt_mark_dirty(bmap->b_inode);
416 else
417 mark_inode_dirty(bmap->b_inode);
418} 410}
419 411
420__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, 412__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
@@ -425,8 +417,8 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
425 417
426 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - 418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
427 bmap->b_inode->i_blkbits); 419 bmap->b_inode->i_blkbits);
428 for (pbh = page_buffers(bh->b_page); pbh != bh; 420 for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
429 pbh = pbh->b_this_page, key++); 421 key++;
430 422
431 return key; 423 return key;
432} 424}
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 84c25382f8e..471e269536a 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -68,9 +68,34 @@ void nilfs_btnode_cache_clear(struct address_space *btnc)
68 truncate_inode_pages(btnc, 0); 68 truncate_inode_pages(btnc, 0);
69} 69}
70 70
71struct buffer_head *
72nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
73{
74 struct inode *inode = NILFS_BTNC_I(btnc);
75 struct buffer_head *bh;
76
77 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
78 if (unlikely(!bh))
79 return NULL;
80
81 if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
82 buffer_dirty(bh))) {
83 brelse(bh);
84 BUG();
85 }
86 memset(bh->b_data, 0, 1 << inode->i_blkbits);
87 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
88 bh->b_blocknr = blocknr;
89 set_buffer_mapped(bh);
90 set_buffer_uptodate(bh);
91
92 unlock_page(bh->b_page);
93 page_cache_release(bh->b_page);
94 return bh;
95}
96
71int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, 97int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
72 sector_t pblocknr, struct buffer_head **pbh, 98 sector_t pblocknr, struct buffer_head **pbh)
73 int newblk)
74{ 99{
75 struct buffer_head *bh; 100 struct buffer_head *bh;
76 struct inode *inode = NILFS_BTNC_I(btnc); 101 struct inode *inode = NILFS_BTNC_I(btnc);
@@ -81,19 +106,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
81 return -ENOMEM; 106 return -ENOMEM;
82 107
83 err = -EEXIST; /* internal code */ 108 err = -EEXIST; /* internal code */
84 if (newblk) {
85 if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
86 buffer_dirty(bh))) {
87 brelse(bh);
88 BUG();
89 }
90 memset(bh->b_data, 0, 1 << inode->i_blkbits);
91 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
92 bh->b_blocknr = blocknr;
93 set_buffer_mapped(bh);
94 set_buffer_uptodate(bh);
95 goto found;
96 }
97 109
98 if (buffer_uptodate(bh) || buffer_dirty(bh)) 110 if (buffer_uptodate(bh) || buffer_dirty(bh))
99 goto found; 111 goto found;
@@ -135,27 +147,6 @@ out_locked:
135 return err; 147 return err;
136} 148}
137 149
138int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr,
139 sector_t pblocknr, struct buffer_head **pbh, int newblk)
140{
141 struct buffer_head *bh;
142 int err;
143
144 err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk);
145 if (err == -EEXIST) /* internal code (cache hit) */
146 return 0;
147 if (unlikely(err))
148 return err;
149
150 bh = *pbh;
151 wait_on_buffer(bh);
152 if (!buffer_uptodate(bh)) {
153 brelse(bh);
154 return -EIO;
155 }
156 return 0;
157}
158
159/** 150/**
160 * nilfs_btnode_delete - delete B-tree node buffer 151 * nilfs_btnode_delete - delete B-tree node buffer
161 * @bh: buffer to be deleted 152 * @bh: buffer to be deleted
@@ -244,12 +235,13 @@ retry:
244 unlock_page(obh->b_page); 235 unlock_page(obh->b_page);
245 } 236 }
246 237
247 err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1); 238 nbh = nilfs_btnode_create_block(btnc, newkey);
248 if (likely(!err)) { 239 if (!nbh)
249 BUG_ON(nbh == obh); 240 return -ENOMEM;
250 ctxt->newbh = nbh; 241
251 } 242 BUG_ON(nbh == obh);
252 return err; 243 ctxt->newbh = nbh;
244 return 0;
253 245
254 failed_unlock: 246 failed_unlock:
255 unlock_page(obh->b_page); 247 unlock_page(obh->b_page);
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 3e2275172ed..07da83f0771 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -40,10 +40,10 @@ struct nilfs_btnode_chkey_ctxt {
40void nilfs_btnode_cache_init_once(struct address_space *); 40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); 41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 42void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
44 __u64 blocknr);
43int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, 45int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t,
44 struct buffer_head **, int); 46 struct buffer_head **);
45int nilfs_btnode_get(struct address_space *, __u64, sector_t,
46 struct buffer_head **, int);
47void nilfs_btnode_delete(struct buffer_head *); 47void nilfs_btnode_delete(struct buffer_head *);
48int nilfs_btnode_prepare_change_key(struct address_space *, 48int nilfs_btnode_prepare_change_key(struct address_space *,
49 struct nilfs_btnode_chkey_ctxt *); 49 struct nilfs_btnode_chkey_ctxt *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index e25b507a474..7cdd98b8d51 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -114,7 +114,18 @@ static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
114{ 114{
115 struct address_space *btnc = 115 struct address_space *btnc =
116 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; 116 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
117 return nilfs_btnode_get(btnc, ptr, 0, bhp, 0); 117 int err;
118
119 err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
120 if (err)
121 return err == -EEXIST ? 0 : err;
122
123 wait_on_buffer(*bhp);
124 if (!buffer_uptodate(*bhp)) {
125 brelse(*bhp);
126 return -EIO;
127 }
128 return 0;
118} 129}
119 130
120static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, 131static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
@@ -122,12 +133,15 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
122{ 133{
123 struct address_space *btnc = 134 struct address_space *btnc =
124 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; 135 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
125 int ret; 136 struct buffer_head *bh;
126 137
127 ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1); 138 bh = nilfs_btnode_create_block(btnc, ptr);
128 if (!ret) 139 if (!bh)
129 set_buffer_nilfs_volatile(*bhp); 140 return -ENOMEM;
130 return ret; 141
142 set_buffer_nilfs_volatile(bh);
143 *bhp = bh;
144 return 0;
131} 145}
132 146
133static inline int 147static inline int
@@ -444,6 +458,18 @@ nilfs_btree_get_node(const struct nilfs_btree *btree,
444 nilfs_btree_get_nonroot_node(path, level); 458 nilfs_btree_get_nonroot_node(path, level);
445} 459}
446 460
461static inline int
462nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
463{
464 if (unlikely(nilfs_btree_node_get_level(node) != level)) {
465 dump_stack();
466 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
467 nilfs_btree_node_get_level(node), level);
468 return 1;
469 }
470 return 0;
471}
472
447static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, 473static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
448 struct nilfs_btree_path *path, 474 struct nilfs_btree_path *path,
449 __u64 key, __u64 *ptrp, int minlevel) 475 __u64 key, __u64 *ptrp, int minlevel)
@@ -467,7 +493,8 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
467 if (ret < 0) 493 if (ret < 0)
468 return ret; 494 return ret;
469 node = nilfs_btree_get_nonroot_node(path, level); 495 node = nilfs_btree_get_nonroot_node(path, level);
470 BUG_ON(level != nilfs_btree_node_get_level(node)); 496 if (nilfs_btree_bad_node(node, level))
497 return -EINVAL;
471 if (!found) 498 if (!found)
472 found = nilfs_btree_node_lookup(node, key, &index); 499 found = nilfs_btree_node_lookup(node, key, &index);
473 else 500 else
@@ -512,7 +539,8 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
512 if (ret < 0) 539 if (ret < 0)
513 return ret; 540 return ret;
514 node = nilfs_btree_get_nonroot_node(path, level); 541 node = nilfs_btree_get_nonroot_node(path, level);
515 BUG_ON(level != nilfs_btree_node_get_level(node)); 542 if (nilfs_btree_bad_node(node, level))
543 return -EINVAL;
516 index = nilfs_btree_node_get_nchildren(node) - 1; 544 index = nilfs_btree_node_get_nchildren(node) - 1;
517 ptr = nilfs_btree_node_get_ptr(btree, node, index); 545 ptr = nilfs_btree_node_get_ptr(btree, node, index);
518 path[level].bp_index = index; 546 path[level].bp_index = index;
@@ -638,13 +666,11 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
638{ 666{
639 if (level < nilfs_btree_height(btree) - 1) { 667 if (level < nilfs_btree_height(btree) - 1) {
640 do { 668 do {
641 lock_buffer(path[level].bp_bh);
642 nilfs_btree_node_set_key( 669 nilfs_btree_node_set_key(
643 nilfs_btree_get_nonroot_node(path, level), 670 nilfs_btree_get_nonroot_node(path, level),
644 path[level].bp_index, key); 671 path[level].bp_index, key);
645 if (!buffer_dirty(path[level].bp_bh)) 672 if (!buffer_dirty(path[level].bp_bh))
646 nilfs_btnode_mark_dirty(path[level].bp_bh); 673 nilfs_btnode_mark_dirty(path[level].bp_bh);
647 unlock_buffer(path[level].bp_bh);
648 } while ((path[level].bp_index == 0) && 674 } while ((path[level].bp_index == 0) &&
649 (++level < nilfs_btree_height(btree) - 1)); 675 (++level < nilfs_btree_height(btree) - 1));
650 } 676 }
@@ -663,13 +689,11 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
663 struct nilfs_btree_node *node; 689 struct nilfs_btree_node *node;
664 690
665 if (level < nilfs_btree_height(btree) - 1) { 691 if (level < nilfs_btree_height(btree) - 1) {
666 lock_buffer(path[level].bp_bh);
667 node = nilfs_btree_get_nonroot_node(path, level); 692 node = nilfs_btree_get_nonroot_node(path, level);
668 nilfs_btree_node_insert(btree, node, *keyp, *ptrp, 693 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
669 path[level].bp_index); 694 path[level].bp_index);
670 if (!buffer_dirty(path[level].bp_bh)) 695 if (!buffer_dirty(path[level].bp_bh))
671 nilfs_btnode_mark_dirty(path[level].bp_bh); 696 nilfs_btnode_mark_dirty(path[level].bp_bh);
672 unlock_buffer(path[level].bp_bh);
673 697
674 if (path[level].bp_index == 0) 698 if (path[level].bp_index == 0)
675 nilfs_btree_promote_key(btree, path, level + 1, 699 nilfs_btree_promote_key(btree, path, level + 1,
@@ -689,9 +713,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
689 struct nilfs_btree_node *node, *left; 713 struct nilfs_btree_node *node, *left;
690 int nchildren, lnchildren, n, move; 714 int nchildren, lnchildren, n, move;
691 715
692 lock_buffer(path[level].bp_bh);
693 lock_buffer(path[level].bp_sib_bh);
694
695 node = nilfs_btree_get_nonroot_node(path, level); 716 node = nilfs_btree_get_nonroot_node(path, level);
696 left = nilfs_btree_get_sib_node(path, level); 717 left = nilfs_btree_get_sib_node(path, level);
697 nchildren = nilfs_btree_node_get_nchildren(node); 718 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -712,9 +733,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
712 if (!buffer_dirty(path[level].bp_sib_bh)) 733 if (!buffer_dirty(path[level].bp_sib_bh))
713 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 734 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
714 735
715 unlock_buffer(path[level].bp_bh);
716 unlock_buffer(path[level].bp_sib_bh);
717
718 nilfs_btree_promote_key(btree, path, level + 1, 736 nilfs_btree_promote_key(btree, path, level + 1,
719 nilfs_btree_node_get_key(node, 0)); 737 nilfs_btree_node_get_key(node, 0));
720 738
@@ -740,9 +758,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
740 struct nilfs_btree_node *node, *right; 758 struct nilfs_btree_node *node, *right;
741 int nchildren, rnchildren, n, move; 759 int nchildren, rnchildren, n, move;
742 760
743 lock_buffer(path[level].bp_bh);
744 lock_buffer(path[level].bp_sib_bh);
745
746 node = nilfs_btree_get_nonroot_node(path, level); 761 node = nilfs_btree_get_nonroot_node(path, level);
747 right = nilfs_btree_get_sib_node(path, level); 762 right = nilfs_btree_get_sib_node(path, level);
748 nchildren = nilfs_btree_node_get_nchildren(node); 763 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -763,9 +778,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
763 if (!buffer_dirty(path[level].bp_sib_bh)) 778 if (!buffer_dirty(path[level].bp_sib_bh))
764 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 779 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
765 780
766 unlock_buffer(path[level].bp_bh);
767 unlock_buffer(path[level].bp_sib_bh);
768
769 path[level + 1].bp_index++; 781 path[level + 1].bp_index++;
770 nilfs_btree_promote_key(btree, path, level + 1, 782 nilfs_btree_promote_key(btree, path, level + 1,
771 nilfs_btree_node_get_key(right, 0)); 783 nilfs_btree_node_get_key(right, 0));
@@ -794,9 +806,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
794 __u64 newptr; 806 __u64 newptr;
795 int nchildren, n, move; 807 int nchildren, n, move;
796 808
797 lock_buffer(path[level].bp_bh);
798 lock_buffer(path[level].bp_sib_bh);
799
800 node = nilfs_btree_get_nonroot_node(path, level); 809 node = nilfs_btree_get_nonroot_node(path, level);
801 right = nilfs_btree_get_sib_node(path, level); 810 right = nilfs_btree_get_sib_node(path, level);
802 nchildren = nilfs_btree_node_get_nchildren(node); 811 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -815,9 +824,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
815 if (!buffer_dirty(path[level].bp_sib_bh)) 824 if (!buffer_dirty(path[level].bp_sib_bh))
816 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 825 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
817 826
818 unlock_buffer(path[level].bp_bh);
819 unlock_buffer(path[level].bp_sib_bh);
820
821 newkey = nilfs_btree_node_get_key(right, 0); 827 newkey = nilfs_btree_node_get_key(right, 0);
822 newptr = path[level].bp_newreq.bpr_ptr; 828 newptr = path[level].bp_newreq.bpr_ptr;
823 829
@@ -852,8 +858,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
852 struct nilfs_btree_node *root, *child; 858 struct nilfs_btree_node *root, *child;
853 int n; 859 int n;
854 860
855 lock_buffer(path[level].bp_sib_bh);
856
857 root = nilfs_btree_get_root(btree); 861 root = nilfs_btree_get_root(btree);
858 child = nilfs_btree_get_sib_node(path, level); 862 child = nilfs_btree_get_sib_node(path, level);
859 863
@@ -865,8 +869,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
865 if (!buffer_dirty(path[level].bp_sib_bh)) 869 if (!buffer_dirty(path[level].bp_sib_bh))
866 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 870 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
867 871
868 unlock_buffer(path[level].bp_sib_bh);
869
870 path[level].bp_bh = path[level].bp_sib_bh; 872 path[level].bp_bh = path[level].bp_sib_bh;
871 path[level].bp_sib_bh = NULL; 873 path[level].bp_sib_bh = NULL;
872 874
@@ -1023,11 +1025,9 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1023 1025
1024 stats->bs_nblocks++; 1026 stats->bs_nblocks++;
1025 1027
1026 lock_buffer(bh);
1027 nilfs_btree_node_init(btree, 1028 nilfs_btree_node_init(btree,
1028 (struct nilfs_btree_node *)bh->b_data, 1029 (struct nilfs_btree_node *)bh->b_data,
1029 0, level, 0, NULL, NULL); 1030 0, level, 0, NULL, NULL);
1030 unlock_buffer(bh);
1031 path[level].bp_sib_bh = bh; 1031 path[level].bp_sib_bh = bh;
1032 path[level].bp_op = nilfs_btree_split; 1032 path[level].bp_op = nilfs_btree_split;
1033 } 1033 }
@@ -1052,10 +1052,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1052 if (ret < 0) 1052 if (ret < 0)
1053 goto err_out_curr_node; 1053 goto err_out_curr_node;
1054 1054
1055 lock_buffer(bh);
1056 nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, 1055 nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1057 0, level, 0, NULL, NULL); 1056 0, level, 0, NULL, NULL);
1058 unlock_buffer(bh);
1059 path[level].bp_sib_bh = bh; 1057 path[level].bp_sib_bh = bh;
1060 path[level].bp_op = nilfs_btree_grow; 1058 path[level].bp_op = nilfs_btree_grow;
1061 1059
@@ -1154,13 +1152,11 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1154 struct nilfs_btree_node *node; 1152 struct nilfs_btree_node *node;
1155 1153
1156 if (level < nilfs_btree_height(btree) - 1) { 1154 if (level < nilfs_btree_height(btree) - 1) {
1157 lock_buffer(path[level].bp_bh);
1158 node = nilfs_btree_get_nonroot_node(path, level); 1155 node = nilfs_btree_get_nonroot_node(path, level);
1159 nilfs_btree_node_delete(btree, node, keyp, ptrp, 1156 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1160 path[level].bp_index); 1157 path[level].bp_index);
1161 if (!buffer_dirty(path[level].bp_bh)) 1158 if (!buffer_dirty(path[level].bp_bh))
1162 nilfs_btnode_mark_dirty(path[level].bp_bh); 1159 nilfs_btnode_mark_dirty(path[level].bp_bh);
1163 unlock_buffer(path[level].bp_bh);
1164 if (path[level].bp_index == 0) 1160 if (path[level].bp_index == 0)
1165 nilfs_btree_promote_key(btree, path, level + 1, 1161 nilfs_btree_promote_key(btree, path, level + 1,
1166 nilfs_btree_node_get_key(node, 0)); 1162 nilfs_btree_node_get_key(node, 0));
@@ -1180,9 +1176,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1180 1176
1181 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1177 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1182 1178
1183 lock_buffer(path[level].bp_bh);
1184 lock_buffer(path[level].bp_sib_bh);
1185
1186 node = nilfs_btree_get_nonroot_node(path, level); 1179 node = nilfs_btree_get_nonroot_node(path, level);
1187 left = nilfs_btree_get_sib_node(path, level); 1180 left = nilfs_btree_get_sib_node(path, level);
1188 nchildren = nilfs_btree_node_get_nchildren(node); 1181 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -1197,9 +1190,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1197 if (!buffer_dirty(path[level].bp_sib_bh)) 1190 if (!buffer_dirty(path[level].bp_sib_bh))
1198 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1191 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1199 1192
1200 unlock_buffer(path[level].bp_bh);
1201 unlock_buffer(path[level].bp_sib_bh);
1202
1203 nilfs_btree_promote_key(btree, path, level + 1, 1193 nilfs_btree_promote_key(btree, path, level + 1,
1204 nilfs_btree_node_get_key(node, 0)); 1194 nilfs_btree_node_get_key(node, 0));
1205 1195
@@ -1217,9 +1207,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1217 1207
1218 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1208 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1219 1209
1220 lock_buffer(path[level].bp_bh);
1221 lock_buffer(path[level].bp_sib_bh);
1222
1223 node = nilfs_btree_get_nonroot_node(path, level); 1210 node = nilfs_btree_get_nonroot_node(path, level);
1224 right = nilfs_btree_get_sib_node(path, level); 1211 right = nilfs_btree_get_sib_node(path, level);
1225 nchildren = nilfs_btree_node_get_nchildren(node); 1212 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -1234,9 +1221,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1234 if (!buffer_dirty(path[level].bp_sib_bh)) 1221 if (!buffer_dirty(path[level].bp_sib_bh))
1235 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1222 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1236 1223
1237 unlock_buffer(path[level].bp_bh);
1238 unlock_buffer(path[level].bp_sib_bh);
1239
1240 path[level + 1].bp_index++; 1224 path[level + 1].bp_index++;
1241 nilfs_btree_promote_key(btree, path, level + 1, 1225 nilfs_btree_promote_key(btree, path, level + 1,
1242 nilfs_btree_node_get_key(right, 0)); 1226 nilfs_btree_node_get_key(right, 0));
@@ -1255,9 +1239,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1255 1239
1256 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1240 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1257 1241
1258 lock_buffer(path[level].bp_bh);
1259 lock_buffer(path[level].bp_sib_bh);
1260
1261 node = nilfs_btree_get_nonroot_node(path, level); 1242 node = nilfs_btree_get_nonroot_node(path, level);
1262 left = nilfs_btree_get_sib_node(path, level); 1243 left = nilfs_btree_get_sib_node(path, level);
1263 1244
@@ -1268,9 +1249,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1268 if (!buffer_dirty(path[level].bp_sib_bh)) 1249 if (!buffer_dirty(path[level].bp_sib_bh))
1269 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1250 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1270 1251
1271 unlock_buffer(path[level].bp_bh);
1272 unlock_buffer(path[level].bp_sib_bh);
1273
1274 nilfs_btnode_delete(path[level].bp_bh); 1252 nilfs_btnode_delete(path[level].bp_bh);
1275 path[level].bp_bh = path[level].bp_sib_bh; 1253 path[level].bp_bh = path[level].bp_sib_bh;
1276 path[level].bp_sib_bh = NULL; 1254 path[level].bp_sib_bh = NULL;
@@ -1286,9 +1264,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1286 1264
1287 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1265 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1288 1266
1289 lock_buffer(path[level].bp_bh);
1290 lock_buffer(path[level].bp_sib_bh);
1291
1292 node = nilfs_btree_get_nonroot_node(path, level); 1267 node = nilfs_btree_get_nonroot_node(path, level);
1293 right = nilfs_btree_get_sib_node(path, level); 1268 right = nilfs_btree_get_sib_node(path, level);
1294 1269
@@ -1299,9 +1274,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1299 if (!buffer_dirty(path[level].bp_bh)) 1274 if (!buffer_dirty(path[level].bp_bh))
1300 nilfs_btnode_mark_dirty(path[level].bp_bh); 1275 nilfs_btnode_mark_dirty(path[level].bp_bh);
1301 1276
1302 unlock_buffer(path[level].bp_bh);
1303 unlock_buffer(path[level].bp_sib_bh);
1304
1305 nilfs_btnode_delete(path[level].bp_sib_bh); 1277 nilfs_btnode_delete(path[level].bp_sib_bh);
1306 path[level].bp_sib_bh = NULL; 1278 path[level].bp_sib_bh = NULL;
1307 path[level + 1].bp_index++; 1279 path[level + 1].bp_index++;
@@ -1316,7 +1288,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
1316 1288
1317 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1289 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1318 1290
1319 lock_buffer(path[level].bp_bh);
1320 root = nilfs_btree_get_root(btree); 1291 root = nilfs_btree_get_root(btree);
1321 child = nilfs_btree_get_nonroot_node(path, level); 1292 child = nilfs_btree_get_nonroot_node(path, level);
1322 1293
@@ -1324,7 +1295,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
1324 nilfs_btree_node_set_level(root, level); 1295 nilfs_btree_node_set_level(root, level);
1325 n = nilfs_btree_node_get_nchildren(child); 1296 n = nilfs_btree_node_get_nchildren(child);
1326 nilfs_btree_node_move_left(btree, root, child, n); 1297 nilfs_btree_node_move_left(btree, root, child, n);
1327 unlock_buffer(path[level].bp_bh);
1328 1298
1329 nilfs_btnode_delete(path[level].bp_bh); 1299 nilfs_btnode_delete(path[level].bp_bh);
1330 path[level].bp_bh = NULL; 1300 path[level].bp_bh = NULL;
@@ -1699,7 +1669,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1699 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat); 1669 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1700 1670
1701 /* create child node at level 1 */ 1671 /* create child node at level 1 */
1702 lock_buffer(bh);
1703 node = (struct nilfs_btree_node *)bh->b_data; 1672 node = (struct nilfs_btree_node *)bh->b_data;
1704 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); 1673 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1705 nilfs_btree_node_insert(btree, node, 1674 nilfs_btree_node_insert(btree, node,
@@ -1709,7 +1678,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1709 if (!nilfs_bmap_dirty(bmap)) 1678 if (!nilfs_bmap_dirty(bmap))
1710 nilfs_bmap_set_dirty(bmap); 1679 nilfs_bmap_set_dirty(bmap);
1711 1680
1712 unlock_buffer(bh);
1713 brelse(bh); 1681 brelse(bh);
1714 1682
1715 /* create root node at level 2 */ 1683 /* create root node at level 2 */
@@ -2050,7 +2018,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
2050 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2018 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2051 level < NILFS_BTREE_LEVEL_MAX; 2019 level < NILFS_BTREE_LEVEL_MAX;
2052 level++) 2020 level++)
2053 list_splice(&lists[level], listp->prev); 2021 list_splice_tail(&lists[level], listp);
2054} 2022}
2055 2023
2056static int nilfs_btree_assign_p(struct nilfs_btree *btree, 2024static int nilfs_btree_assign_p(struct nilfs_btree *btree,
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 0e72bbbc6b6..4b82d84ade7 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -34,28 +34,6 @@ struct nilfs_btree;
34struct nilfs_btree_path; 34struct nilfs_btree_path;
35 35
36/** 36/**
37 * struct nilfs_btree_node - B-tree node
38 * @bn_flags: flags
39 * @bn_level: level
40 * @bn_nchildren: number of children
41 * @bn_pad: padding
42 */
43struct nilfs_btree_node {
44 __u8 bn_flags;
45 __u8 bn_level;
46 __le16 bn_nchildren;
47 __le32 bn_pad;
48};
49
50/* flags */
51#define NILFS_BTREE_NODE_ROOT 0x01
52
53/* level */
54#define NILFS_BTREE_LEVEL_DATA 0
55#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1)
56#define NILFS_BTREE_LEVEL_MAX 14
57
58/**
59 * struct nilfs_btree - B-tree structure 37 * struct nilfs_btree - B-tree structure
60 * @bt_bmap: bmap base structure 38 * @bt_bmap: bmap base structure
61 */ 39 */
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 3f5d5d06f53..18737818db6 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -328,19 +328,24 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 nilfs_mdt_mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno) && 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 (count = nilfs_cpfile_block_sub_valid_checkpoints( 332 count =
333 cpfile, cp_bh, kaddr, nicps)) == 0) { 333 nilfs_cpfile_block_sub_valid_checkpoints(
334 /* make hole */ 334 cpfile, cp_bh, kaddr, nicps);
335 kunmap_atomic(kaddr, KM_USER0); 335 if (count == 0) {
336 brelse(cp_bh); 336 /* make hole */
337 ret = nilfs_cpfile_delete_checkpoint_block( 337 kunmap_atomic(kaddr, KM_USER0);
338 cpfile, cno); 338 brelse(cp_bh);
339 if (ret == 0) 339 ret =
340 continue; 340 nilfs_cpfile_delete_checkpoint_block(
341 printk(KERN_ERR "%s: cannot delete block\n", 341 cpfile, cno);
342 __func__); 342 if (ret == 0)
343 break; 343 continue;
344 printk(KERN_ERR
345 "%s: cannot delete block\n",
346 __func__);
347 break;
348 }
344 } 349 }
345 } 350 }
346 351
@@ -926,3 +931,29 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
926 up_read(&NILFS_MDT(cpfile)->mi_sem); 931 up_read(&NILFS_MDT(cpfile)->mi_sem);
927 return ret; 932 return ret;
928} 933}
934
935/**
936 * nilfs_cpfile_read - read cpfile inode
937 * @cpfile: cpfile inode
938 * @raw_inode: on-disk cpfile inode
939 */
940int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode)
941{
942 return nilfs_read_inode_common(cpfile, raw_inode);
943}
944
945/**
946 * nilfs_cpfile_new - create cpfile
947 * @nilfs: nilfs object
948 * @cpsize: size of a checkpoint entry
949 */
950struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize)
951{
952 struct inode *cpfile;
953
954 cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0);
955 if (cpfile)
956 nilfs_mdt_set_entry_size(cpfile, cpsize,
957 sizeof(struct nilfs_cpfile_header));
958 return cpfile;
959}
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index debea896e70..bc0809e0ab4 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -40,4 +40,7 @@ int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *);
40ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, 40ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned,
41 size_t); 41 size_t);
42 42
43int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode);
44struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize);
45
43#endif /* _NILFS_CPFILE_H */ 46#endif /* _NILFS_CPFILE_H */
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 1ff8e15bd36..187dd07ba86 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -33,6 +33,16 @@
33#define NILFS_CNO_MIN ((__u64)1) 33#define NILFS_CNO_MIN ((__u64)1)
34#define NILFS_CNO_MAX (~(__u64)0) 34#define NILFS_CNO_MAX (~(__u64)0)
35 35
36struct nilfs_dat_info {
37 struct nilfs_mdt_info mi;
38 struct nilfs_palloc_cache palloc_cache;
39};
40
41static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat)
42{
43 return (struct nilfs_dat_info *)NILFS_MDT(dat);
44}
45
36static int nilfs_dat_prepare_entry(struct inode *dat, 46static int nilfs_dat_prepare_entry(struct inode *dat,
37 struct nilfs_palloc_req *req, int create) 47 struct nilfs_palloc_req *req, int create)
38{ 48{
@@ -425,3 +435,40 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
425 435
426 return nvi; 436 return nvi;
427} 437}
438
439/**
440 * nilfs_dat_read - read dat inode
441 * @dat: dat inode
442 * @raw_inode: on-disk dat inode
443 */
444int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode)
445{
446 return nilfs_read_inode_common(dat, raw_inode);
447}
448
449/**
450 * nilfs_dat_new - create dat file
451 * @nilfs: nilfs object
452 * @entry_size: size of a dat entry
453 */
454struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size)
455{
456 static struct lock_class_key dat_lock_key;
457 struct inode *dat;
458 struct nilfs_dat_info *di;
459 int err;
460
461 dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, sizeof(*di));
462 if (dat) {
463 err = nilfs_palloc_init_blockgroup(dat, entry_size);
464 if (unlikely(err)) {
465 nilfs_mdt_destroy(dat);
466 return NULL;
467 }
468
469 di = NILFS_DAT_I(dat);
470 lockdep_set_class(&di->mi.mi_sem, &dat_lock_key);
471 nilfs_palloc_setup_cache(dat, &di->palloc_cache);
472 }
473 return dat;
474}
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index 406070d3ff4..d31c3aab0ef 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -53,4 +53,7 @@ int nilfs_dat_freev(struct inode *, __u64 *, size_t);
53int nilfs_dat_move(struct inode *, __u64, sector_t); 53int nilfs_dat_move(struct inode *, __u64, sector_t);
54ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); 54ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t);
55 55
56int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode);
57struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size);
58
56#endif /* _NILFS_DAT_H */ 59#endif /* _NILFS_DAT_H */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index e097099bfc8..76d803e060a 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -99,9 +99,9 @@ static int nilfs_prepare_chunk(struct page *page,
99 NULL, nilfs_get_block); 99 NULL, nilfs_get_block);
100} 100}
101 101
102static int nilfs_commit_chunk(struct page *page, 102static void nilfs_commit_chunk(struct page *page,
103 struct address_space *mapping, 103 struct address_space *mapping,
104 unsigned from, unsigned to) 104 unsigned from, unsigned to)
105{ 105{
106 struct inode *dir = mapping->host; 106 struct inode *dir = mapping->host;
107 struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); 107 struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb);
@@ -112,15 +112,13 @@ static int nilfs_commit_chunk(struct page *page,
112 112
113 nr_dirty = nilfs_page_count_clean_buffers(page, from, to); 113 nr_dirty = nilfs_page_count_clean_buffers(page, from, to);
114 copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); 114 copied = block_write_end(NULL, mapping, pos, len, len, page, NULL);
115 if (pos + copied > dir->i_size) { 115 if (pos + copied > dir->i_size)
116 i_size_write(dir, pos + copied); 116 i_size_write(dir, pos + copied);
117 mark_inode_dirty(dir);
118 }
119 if (IS_DIRSYNC(dir)) 117 if (IS_DIRSYNC(dir))
120 nilfs_set_transaction_flag(NILFS_TI_SYNC); 118 nilfs_set_transaction_flag(NILFS_TI_SYNC);
121 err = nilfs_set_file_dirty(sbi, dir, nr_dirty); 119 err = nilfs_set_file_dirty(sbi, dir, nr_dirty);
120 WARN_ON(err); /* do not happen */
122 unlock_page(page); 121 unlock_page(page);
123 return err;
124} 122}
125 123
126static void nilfs_check_page(struct page *page) 124static void nilfs_check_page(struct page *page)
@@ -455,11 +453,10 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
455 BUG_ON(err); 453 BUG_ON(err);
456 de->inode = cpu_to_le64(inode->i_ino); 454 de->inode = cpu_to_le64(inode->i_ino);
457 nilfs_set_de_type(de, inode); 455 nilfs_set_de_type(de, inode);
458 err = nilfs_commit_chunk(page, mapping, from, to); 456 nilfs_commit_chunk(page, mapping, from, to);
459 nilfs_put_page(page); 457 nilfs_put_page(page);
460 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 458 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
461/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ 459/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
462 mark_inode_dirty(dir);
463} 460}
464 461
465/* 462/*
@@ -548,10 +545,10 @@ got_it:
548 memcpy(de->name, name, namelen); 545 memcpy(de->name, name, namelen);
549 de->inode = cpu_to_le64(inode->i_ino); 546 de->inode = cpu_to_le64(inode->i_ino);
550 nilfs_set_de_type(de, inode); 547 nilfs_set_de_type(de, inode);
551 err = nilfs_commit_chunk(page, page->mapping, from, to); 548 nilfs_commit_chunk(page, page->mapping, from, to);
552 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 549 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
553/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ 550/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
554 mark_inode_dirty(dir); 551 nilfs_mark_inode_dirty(dir);
555 /* OFFSET_CACHE */ 552 /* OFFSET_CACHE */
556out_put: 553out_put:
557 nilfs_put_page(page); 554 nilfs_put_page(page);
@@ -595,10 +592,9 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
595 if (pde) 592 if (pde)
596 pde->rec_len = cpu_to_le16(to - from); 593 pde->rec_len = cpu_to_le16(to - from);
597 dir->inode = 0; 594 dir->inode = 0;
598 err = nilfs_commit_chunk(page, mapping, from, to); 595 nilfs_commit_chunk(page, mapping, from, to);
599 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 596 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
600/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ 597/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */
601 mark_inode_dirty(inode);
602out: 598out:
603 nilfs_put_page(page); 599 nilfs_put_page(page);
604 return err; 600 return err;
@@ -640,7 +636,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
640 memcpy(de->name, "..\0", 4); 636 memcpy(de->name, "..\0", 4);
641 nilfs_set_de_type(de, inode); 637 nilfs_set_de_type(de, inode);
642 kunmap_atomic(kaddr, KM_USER0); 638 kunmap_atomic(kaddr, KM_USER0);
643 err = nilfs_commit_chunk(page, mapping, 0, chunk_size); 639 nilfs_commit_chunk(page, mapping, 0, chunk_size);
644fail: 640fail:
645 page_cache_release(page); 641 page_cache_release(page);
646 return err; 642 return err;
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index d369ac71827..236753df5cd 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -51,11 +51,11 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
51 struct nilfs_direct *direct; 51 struct nilfs_direct *direct;
52 __u64 ptr; 52 __u64 ptr;
53 53
54 direct = (struct nilfs_direct *)bmap; 54 direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */
55 if ((key > NILFS_DIRECT_KEY_MAX) || 55 if (key > NILFS_DIRECT_KEY_MAX || level != 1)
56 (level != 1) || /* XXX: use macro for level 1 */ 56 return -ENOENT;
57 ((ptr = nilfs_direct_get_ptr(direct, key)) == 57 ptr = nilfs_direct_get_ptr(direct, key);
58 NILFS_BMAP_INVALID_PTR)) 58 if (ptr == NILFS_BMAP_INVALID_PTR)
59 return -ENOENT; 59 return -ENOENT;
60 60
61 if (ptrp != NULL) 61 if (ptrp != NULL)
@@ -73,9 +73,10 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
73 sector_t blocknr; 73 sector_t blocknr;
74 int ret, cnt; 74 int ret, cnt;
75 75
76 if (key > NILFS_DIRECT_KEY_MAX || 76 if (key > NILFS_DIRECT_KEY_MAX)
77 (ptr = nilfs_direct_get_ptr(direct, key)) == 77 return -ENOENT;
78 NILFS_BMAP_INVALID_PTR) 78 ptr = nilfs_direct_get_ptr(direct, key);
79 if (ptr == NILFS_BMAP_INVALID_PTR)
79 return -ENOENT; 80 return -ENOENT;
80 81
81 if (NILFS_BMAP_USE_VBN(bmap)) { 82 if (NILFS_BMAP_USE_VBN(bmap)) {
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
index 93383c5cee9..dd5f7e0a95f 100644
--- a/fs/nilfs2/gcdat.c
+++ b/fs/nilfs2/gcdat.c
@@ -61,6 +61,8 @@ void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs)
61 61
62 nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); 62 nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap);
63 63
64 nilfs_palloc_clear_cache(dat);
65 nilfs_palloc_clear_cache(gcdat);
64 nilfs_clear_dirty_pages(mapping); 66 nilfs_clear_dirty_pages(mapping);
65 nilfs_copy_back_pages(mapping, gmapping); 67 nilfs_copy_back_pages(mapping, gmapping);
66 /* note: mdt dirty flags should be cleared by segctor. */ 68 /* note: mdt dirty flags should be cleared by segctor. */
@@ -79,6 +81,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs)
79 gcdat->i_state = I_CLEAR; 81 gcdat->i_state = I_CLEAR;
80 gii->i_flags = 0; 82 gii->i_flags = 0;
81 83
84 nilfs_palloc_clear_cache(gcdat);
82 truncate_inode_pages(gcdat->i_mapping, 0); 85 truncate_inode_pages(gcdat->i_mapping, 0);
83 truncate_inode_pages(&gii->i_btnode_cache, 0); 86 truncate_inode_pages(&gii->i_btnode_cache, 0);
84} 87}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index e6de0a27ab5..e16a6664dfa 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -149,7 +149,7 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
149 __u64 vbn, struct buffer_head **out_bh) 149 __u64 vbn, struct buffer_head **out_bh)
150{ 150{
151 int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, 151 int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
152 vbn ? : pbn, pbn, out_bh, 0); 152 vbn ? : pbn, pbn, out_bh);
153 if (ret == -EEXIST) /* internal code (cache hit) */ 153 if (ret == -EEXIST) /* internal code (cache hit) */
154 ret = 0; 154 ret = 0;
155 return ret; 155 return ret;
@@ -212,9 +212,10 @@ void nilfs_destroy_gccache(struct the_nilfs *nilfs)
212static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, 212static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino,
213 __u64 cno) 213 __u64 cno)
214{ 214{
215 struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS); 215 struct inode *inode;
216 struct nilfs_inode_info *ii; 216 struct nilfs_inode_info *ii;
217 217
218 inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS, 0);
218 if (!inode) 219 if (!inode)
219 return NULL; 220 return NULL;
220 221
@@ -265,7 +266,6 @@ struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno)
265 */ 266 */
266void nilfs_clear_gcinode(struct inode *inode) 267void nilfs_clear_gcinode(struct inode *inode)
267{ 268{
268 nilfs_mdt_clear(inode);
269 nilfs_mdt_destroy(inode); 269 nilfs_mdt_destroy(inode);
270} 270}
271 271
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index de86401f209..922d9dd42c8 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -29,6 +29,17 @@
29#include "alloc.h" 29#include "alloc.h"
30#include "ifile.h" 30#include "ifile.h"
31 31
32
33struct nilfs_ifile_info {
34 struct nilfs_mdt_info mi;
35 struct nilfs_palloc_cache palloc_cache;
36};
37
38static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile)
39{
40 return (struct nilfs_ifile_info *)NILFS_MDT(ifile);
41}
42
32/** 43/**
33 * nilfs_ifile_create_inode - create a new disk inode 44 * nilfs_ifile_create_inode - create a new disk inode
34 * @ifile: ifile inode 45 * @ifile: ifile inode
@@ -148,3 +159,27 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
148 } 159 }
149 return err; 160 return err;
150} 161}
162
163/**
164 * nilfs_ifile_new - create inode file
165 * @sbi: nilfs_sb_info struct
166 * @inode_size: size of an inode
167 */
168struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size)
169{
170 struct inode *ifile;
171 int err;
172
173 ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO,
174 sizeof(struct nilfs_ifile_info));
175 if (ifile) {
176 err = nilfs_palloc_init_blockgroup(ifile, inode_size);
177 if (unlikely(err)) {
178 nilfs_mdt_destroy(ifile);
179 return NULL;
180 }
181 nilfs_palloc_setup_cache(ifile,
182 &NILFS_IFILE_I(ifile)->palloc_cache);
183 }
184 return ifile;
185}
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index ecc3ba76db4..cbca32e498f 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -49,4 +49,6 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **);
49int nilfs_ifile_delete_inode(struct inode *, ino_t); 49int nilfs_ifile_delete_inode(struct inode *, ino_t);
50int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); 50int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **);
51 51
52struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size);
53
52#endif /* _NILFS_IFILE_H */ 54#endif /* _NILFS_IFILE_H */
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2a0a5a3ac13..7868cc122ac 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -97,6 +97,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
97 nilfs_transaction_abort(inode->i_sb); 97 nilfs_transaction_abort(inode->i_sb);
98 goto out; 98 goto out;
99 } 99 }
100 nilfs_mark_inode_dirty(inode);
100 nilfs_transaction_commit(inode->i_sb); /* never fails */ 101 nilfs_transaction_commit(inode->i_sb); /* never fails */
101 /* Error handling should be detailed */ 102 /* Error handling should be detailed */
102 set_buffer_new(bh_result); 103 set_buffer_new(bh_result);
@@ -322,7 +323,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
322 nilfs_init_acl(), proper cancellation of 323 nilfs_init_acl(), proper cancellation of
323 above jobs should be considered */ 324 above jobs should be considered */
324 325
325 mark_inode_dirty(inode);
326 return inode; 326 return inode;
327 327
328 failed_acl: 328 failed_acl:
@@ -525,7 +525,6 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh)
525 525
526 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); 526 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh);
527 527
528 /* The buffer is guarded with lock_buffer() by the caller */
529 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) 528 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
530 memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); 529 memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size);
531 set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); 530 set_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
@@ -599,6 +598,7 @@ void nilfs_truncate(struct inode *inode)
599 if (IS_SYNC(inode)) 598 if (IS_SYNC(inode))
600 nilfs_set_transaction_flag(NILFS_TI_SYNC); 599 nilfs_set_transaction_flag(NILFS_TI_SYNC);
601 600
601 nilfs_mark_inode_dirty(inode);
602 nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); 602 nilfs_set_file_dirty(NILFS_SB(sb), inode, 0);
603 nilfs_transaction_commit(sb); 603 nilfs_transaction_commit(sb);
604 /* May construct a logical segment and may fail in sync mode. 604 /* May construct a logical segment and may fail in sync mode.
@@ -623,6 +623,7 @@ void nilfs_delete_inode(struct inode *inode)
623 truncate_inode_pages(&inode->i_data, 0); 623 truncate_inode_pages(&inode->i_data, 0);
624 624
625 nilfs_truncate_bmap(ii, 0); 625 nilfs_truncate_bmap(ii, 0);
626 nilfs_mark_inode_dirty(inode);
626 nilfs_free_inode(inode); 627 nilfs_free_inode(inode);
627 /* nilfs_free_inode() marks inode buffer dirty */ 628 /* nilfs_free_inode() marks inode buffer dirty */
628 if (IS_SYNC(inode)) 629 if (IS_SYNC(inode))
@@ -745,9 +746,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
745 "failed to reget inode block.\n"); 746 "failed to reget inode block.\n");
746 return err; 747 return err;
747 } 748 }
748 lock_buffer(ibh);
749 nilfs_update_inode(inode, ibh); 749 nilfs_update_inode(inode, ibh);
750 unlock_buffer(ibh);
751 nilfs_mdt_mark_buffer_dirty(ibh); 750 nilfs_mdt_mark_buffer_dirty(ibh);
752 nilfs_mdt_mark_dirty(sbi->s_ifile); 751 nilfs_mdt_mark_dirty(sbi->s_ifile);
753 brelse(ibh); 752 brelse(ibh);
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f6af76042d8..d6b2b83de36 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -480,7 +480,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
480 unsigned int cmd, void __user *argp) 480 unsigned int cmd, void __user *argp)
481{ 481{
482 struct nilfs_argv argv[5]; 482 struct nilfs_argv argv[5];
483 const static size_t argsz[5] = { 483 static const size_t argsz[5] = {
484 sizeof(struct nilfs_vdesc), 484 sizeof(struct nilfs_vdesc),
485 sizeof(struct nilfs_period), 485 sizeof(struct nilfs_period),
486 sizeof(__u64), 486 sizeof(__u64),
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index f6326112d64..06713ffcc7f 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -186,7 +186,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
186} 186}
187 187
188static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, 188static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
189 struct buffer_head **out_bh) 189 int readahead, struct buffer_head **out_bh)
190{ 190{
191 struct buffer_head *first_bh, *bh; 191 struct buffer_head *first_bh, *bh;
192 unsigned long blkoff; 192 unsigned long blkoff;
@@ -200,16 +200,18 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
200 if (unlikely(err)) 200 if (unlikely(err))
201 goto failed; 201 goto failed;
202 202
203 blkoff = block + 1; 203 if (readahead) {
204 for (i = 0; i < nr_ra_blocks; i++, blkoff++) { 204 blkoff = block + 1;
205 err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); 205 for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
206 if (likely(!err || err == -EEXIST)) 206 err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh);
207 brelse(bh); 207 if (likely(!err || err == -EEXIST))
208 else if (err != -EBUSY) 208 brelse(bh);
209 break; /* abort readahead if bmap lookup failed */ 209 else if (err != -EBUSY)
210 210 break;
211 if (!buffer_locked(first_bh)) 211 /* abort readahead if bmap lookup failed */
212 goto out_no_wait; 212 if (!buffer_locked(first_bh))
213 goto out_no_wait;
214 }
213 } 215 }
214 216
215 wait_on_buffer(first_bh); 217 wait_on_buffer(first_bh);
@@ -263,7 +265,7 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
263 265
264 /* Should be rewritten with merging nilfs_mdt_read_block() */ 266 /* Should be rewritten with merging nilfs_mdt_read_block() */
265 retry: 267 retry:
266 ret = nilfs_mdt_read_block(inode, blkoff, out_bh); 268 ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh);
267 if (!create || ret != -ENOENT) 269 if (!create || ret != -ENOENT)
268 return ret; 270 return ret;
269 271
@@ -371,7 +373,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
371 struct buffer_head *bh; 373 struct buffer_head *bh;
372 int err; 374 int err;
373 375
374 err = nilfs_mdt_read_block(inode, block, &bh); 376 err = nilfs_mdt_read_block(inode, block, 0, &bh);
375 if (unlikely(err)) 377 if (unlikely(err))
376 return err; 378 return err;
377 nilfs_mark_buffer_dirty(bh); 379 nilfs_mark_buffer_dirty(bh);
@@ -445,9 +447,17 @@ static const struct file_operations def_mdt_fops;
445 * longer than those of the super block structs; they may continue for 447 * longer than those of the super block structs; they may continue for
446 * several consecutive mounts/umounts. This would need discussions. 448 * several consecutive mounts/umounts. This would need discussions.
447 */ 449 */
450/**
451 * nilfs_mdt_new_common - allocate a pseudo inode for metadata file
452 * @nilfs: nilfs object
453 * @sb: super block instance the metadata file belongs to
454 * @ino: inode number
455 * @gfp_mask: gfp mask for data pages
456 * @objsz: size of the private object attached to inode->i_private
457 */
448struct inode * 458struct inode *
449nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, 459nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
450 ino_t ino, gfp_t gfp_mask) 460 ino_t ino, gfp_t gfp_mask, size_t objsz)
451{ 461{
452 struct inode *inode = nilfs_alloc_inode_common(nilfs); 462 struct inode *inode = nilfs_alloc_inode_common(nilfs);
453 463
@@ -455,8 +465,9 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
455 return NULL; 465 return NULL;
456 else { 466 else {
457 struct address_space * const mapping = &inode->i_data; 467 struct address_space * const mapping = &inode->i_data;
458 struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); 468 struct nilfs_mdt_info *mi;
459 469
470 mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS);
460 if (!mi) { 471 if (!mi) {
461 nilfs_destroy_inode(inode); 472 nilfs_destroy_inode(inode);
462 return NULL; 473 return NULL;
@@ -513,11 +524,11 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
513} 524}
514 525
515struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, 526struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb,
516 ino_t ino) 527 ino_t ino, size_t objsz)
517{ 528{
518 struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, 529 struct inode *inode;
519 NILFS_MDT_GFP);
520 530
531 inode = nilfs_mdt_new_common(nilfs, sb, ino, NILFS_MDT_GFP, objsz);
521 if (!inode) 532 if (!inode)
522 return NULL; 533 return NULL;
523 534
@@ -544,14 +555,15 @@ void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow)
544 &NILFS_I(orig)->i_btnode_cache; 555 &NILFS_I(orig)->i_btnode_cache;
545} 556}
546 557
547void nilfs_mdt_clear(struct inode *inode) 558static void nilfs_mdt_clear(struct inode *inode)
548{ 559{
549 struct nilfs_inode_info *ii = NILFS_I(inode); 560 struct nilfs_inode_info *ii = NILFS_I(inode);
550 561
551 invalidate_mapping_pages(inode->i_mapping, 0, -1); 562 invalidate_mapping_pages(inode->i_mapping, 0, -1);
552 truncate_inode_pages(inode->i_mapping, 0); 563 truncate_inode_pages(inode->i_mapping, 0);
553 564
554 nilfs_bmap_clear(ii->i_bmap); 565 if (test_bit(NILFS_I_BMAP, &ii->i_state))
566 nilfs_bmap_clear(ii->i_bmap);
555 nilfs_btnode_cache_clear(&ii->i_btnode_cache); 567 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
556} 568}
557 569
@@ -559,6 +571,10 @@ void nilfs_mdt_destroy(struct inode *inode)
559{ 571{
560 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 572 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
561 573
574 if (mdi->mi_palloc_cache)
575 nilfs_palloc_destroy_cache(inode);
576 nilfs_mdt_clear(inode);
577
562 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ 578 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
563 kfree(mdi); 579 kfree(mdi);
564 nilfs_destroy_inode(inode); 580 nilfs_destroy_inode(inode);
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index 431599733c9..6c4bbb0470f 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -36,6 +36,7 @@
36 * @mi_entry_size: size of an entry 36 * @mi_entry_size: size of an entry
37 * @mi_first_entry_offset: offset to the first entry 37 * @mi_first_entry_offset: offset to the first entry
38 * @mi_entries_per_block: number of entries in a block 38 * @mi_entries_per_block: number of entries in a block
39 * @mi_palloc_cache: persistent object allocator cache
39 * @mi_blocks_per_group: number of blocks in a group 40 * @mi_blocks_per_group: number of blocks in a group
40 * @mi_blocks_per_desc_block: number of blocks per descriptor block 41 * @mi_blocks_per_desc_block: number of blocks per descriptor block
41 */ 42 */
@@ -46,6 +47,7 @@ struct nilfs_mdt_info {
46 unsigned mi_entry_size; 47 unsigned mi_entry_size;
47 unsigned mi_first_entry_offset; 48 unsigned mi_first_entry_offset;
48 unsigned long mi_entries_per_block; 49 unsigned long mi_entries_per_block;
50 struct nilfs_palloc_cache *mi_palloc_cache;
49 unsigned long mi_blocks_per_group; 51 unsigned long mi_blocks_per_group;
50 unsigned long mi_blocks_per_desc_block; 52 unsigned long mi_blocks_per_desc_block;
51}; 53};
@@ -74,11 +76,11 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long);
74int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); 76int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
75int nilfs_mdt_fetch_dirty(struct inode *); 77int nilfs_mdt_fetch_dirty(struct inode *);
76 78
77struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t); 79struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t,
80 size_t);
78struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, 81struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *,
79 ino_t, gfp_t); 82 ino_t, gfp_t, size_t);
80void nilfs_mdt_destroy(struct inode *); 83void nilfs_mdt_destroy(struct inode *);
81void nilfs_mdt_clear(struct inode *);
82void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); 84void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned);
83void nilfs_mdt_set_shadow(struct inode *, struct inode *); 85void nilfs_mdt_set_shadow(struct inode *, struct inode *);
84 86
@@ -104,21 +106,4 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode)
104#define nilfs_mdt_bgl_lock(inode, bg) \ 106#define nilfs_mdt_bgl_lock(inode, bg) \
105 (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) 107 (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock)
106 108
107
108static inline int
109nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh,
110 unsigned n)
111{
112 return nilfs_read_inode_common(
113 inode, (struct nilfs_inode *)(bh->b_data + n));
114}
115
116static inline void
117nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh,
118 unsigned n)
119{
120 nilfs_write_inode_common(
121 inode, (struct nilfs_inode *)(bh->b_data + n), 1);
122}
123
124#endif /* _NILFS_MDT_H */ 109#endif /* _NILFS_MDT_H */
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index ed02e886fa7..07ba838ef08 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -120,7 +120,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode,
120 inode->i_op = &nilfs_file_inode_operations; 120 inode->i_op = &nilfs_file_inode_operations;
121 inode->i_fop = &nilfs_file_operations; 121 inode->i_fop = &nilfs_file_operations;
122 inode->i_mapping->a_ops = &nilfs_aops; 122 inode->i_mapping->a_ops = &nilfs_aops;
123 mark_inode_dirty(inode); 123 nilfs_mark_inode_dirty(inode);
124 err = nilfs_add_nondir(dentry, inode); 124 err = nilfs_add_nondir(dentry, inode);
125 } 125 }
126 if (!err) 126 if (!err)
@@ -148,7 +148,7 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
148 err = PTR_ERR(inode); 148 err = PTR_ERR(inode);
149 if (!IS_ERR(inode)) { 149 if (!IS_ERR(inode)) {
150 init_special_inode(inode, inode->i_mode, rdev); 150 init_special_inode(inode, inode->i_mode, rdev);
151 mark_inode_dirty(inode); 151 nilfs_mark_inode_dirty(inode);
152 err = nilfs_add_nondir(dentry, inode); 152 err = nilfs_add_nondir(dentry, inode);
153 } 153 }
154 if (!err) 154 if (!err)
@@ -188,7 +188,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
188 goto out_fail; 188 goto out_fail;
189 189
190 /* mark_inode_dirty(inode); */ 190 /* mark_inode_dirty(inode); */
191 /* nilfs_new_inode() and page_symlink() do this */ 191 /* page_symlink() do this */
192 192
193 err = nilfs_add_nondir(dentry, inode); 193 err = nilfs_add_nondir(dentry, inode);
194out: 194out:
@@ -200,7 +200,8 @@ out:
200 return err; 200 return err;
201 201
202out_fail: 202out_fail:
203 inode_dec_link_count(inode); 203 drop_nlink(inode);
204 nilfs_mark_inode_dirty(inode);
204 iput(inode); 205 iput(inode);
205 goto out; 206 goto out;
206} 207}
@@ -245,7 +246,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
245 if (err) 246 if (err)
246 return err; 247 return err;
247 248
248 inode_inc_link_count(dir); 249 inc_nlink(dir);
249 250
250 inode = nilfs_new_inode(dir, S_IFDIR | mode); 251 inode = nilfs_new_inode(dir, S_IFDIR | mode);
251 err = PTR_ERR(inode); 252 err = PTR_ERR(inode);
@@ -256,7 +257,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
256 inode->i_fop = &nilfs_dir_operations; 257 inode->i_fop = &nilfs_dir_operations;
257 inode->i_mapping->a_ops = &nilfs_aops; 258 inode->i_mapping->a_ops = &nilfs_aops;
258 259
259 inode_inc_link_count(inode); 260 inc_nlink(inode);
260 261
261 err = nilfs_make_empty(inode, dir); 262 err = nilfs_make_empty(inode, dir);
262 if (err) 263 if (err)
@@ -266,6 +267,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
266 if (err) 267 if (err)
267 goto out_fail; 268 goto out_fail;
268 269
270 nilfs_mark_inode_dirty(inode);
269 d_instantiate(dentry, inode); 271 d_instantiate(dentry, inode);
270out: 272out:
271 if (!err) 273 if (!err)
@@ -276,26 +278,23 @@ out:
276 return err; 278 return err;
277 279
278out_fail: 280out_fail:
279 inode_dec_link_count(inode); 281 drop_nlink(inode);
280 inode_dec_link_count(inode); 282 drop_nlink(inode);
283 nilfs_mark_inode_dirty(inode);
281 iput(inode); 284 iput(inode);
282out_dir: 285out_dir:
283 inode_dec_link_count(dir); 286 drop_nlink(dir);
287 nilfs_mark_inode_dirty(dir);
284 goto out; 288 goto out;
285} 289}
286 290
287static int nilfs_unlink(struct inode *dir, struct dentry *dentry) 291static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
288{ 292{
289 struct inode *inode; 293 struct inode *inode;
290 struct nilfs_dir_entry *de; 294 struct nilfs_dir_entry *de;
291 struct page *page; 295 struct page *page;
292 struct nilfs_transaction_info ti;
293 int err; 296 int err;
294 297
295 err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
296 if (err)
297 return err;
298
299 err = -ENOENT; 298 err = -ENOENT;
300 de = nilfs_find_entry(dir, dentry, &page); 299 de = nilfs_find_entry(dir, dentry, &page);
301 if (!de) 300 if (!de)
@@ -317,12 +316,28 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
317 goto out; 316 goto out;
318 317
319 inode->i_ctime = dir->i_ctime; 318 inode->i_ctime = dir->i_ctime;
320 inode_dec_link_count(inode); 319 drop_nlink(inode);
321 err = 0; 320 err = 0;
322out: 321out:
323 if (!err) 322 return err;
323}
324
325static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
326{
327 struct nilfs_transaction_info ti;
328 int err;
329
330 err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
331 if (err)
332 return err;
333
334 err = nilfs_do_unlink(dir, dentry);
335
336 if (!err) {
337 nilfs_mark_inode_dirty(dir);
338 nilfs_mark_inode_dirty(dentry->d_inode);
324 err = nilfs_transaction_commit(dir->i_sb); 339 err = nilfs_transaction_commit(dir->i_sb);
325 else 340 } else
326 nilfs_transaction_abort(dir->i_sb); 341 nilfs_transaction_abort(dir->i_sb);
327 342
328 return err; 343 return err;
@@ -340,11 +355,13 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
340 355
341 err = -ENOTEMPTY; 356 err = -ENOTEMPTY;
342 if (nilfs_empty_dir(inode)) { 357 if (nilfs_empty_dir(inode)) {
343 err = nilfs_unlink(dir, dentry); 358 err = nilfs_do_unlink(dir, dentry);
344 if (!err) { 359 if (!err) {
345 inode->i_size = 0; 360 inode->i_size = 0;
346 inode_dec_link_count(inode); 361 drop_nlink(inode);
347 inode_dec_link_count(dir); 362 nilfs_mark_inode_dirty(inode);
363 drop_nlink(dir);
364 nilfs_mark_inode_dirty(dir);
348 } 365 }
349 } 366 }
350 if (!err) 367 if (!err)
@@ -395,42 +412,48 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
395 new_de = nilfs_find_entry(new_dir, new_dentry, &new_page); 412 new_de = nilfs_find_entry(new_dir, new_dentry, &new_page);
396 if (!new_de) 413 if (!new_de)
397 goto out_dir; 414 goto out_dir;
398 inode_inc_link_count(old_inode); 415 inc_nlink(old_inode);
399 nilfs_set_link(new_dir, new_de, new_page, old_inode); 416 nilfs_set_link(new_dir, new_de, new_page, old_inode);
417 nilfs_mark_inode_dirty(new_dir);
400 new_inode->i_ctime = CURRENT_TIME; 418 new_inode->i_ctime = CURRENT_TIME;
401 if (dir_de) 419 if (dir_de)
402 drop_nlink(new_inode); 420 drop_nlink(new_inode);
403 inode_dec_link_count(new_inode); 421 drop_nlink(new_inode);
422 nilfs_mark_inode_dirty(new_inode);
404 } else { 423 } else {
405 if (dir_de) { 424 if (dir_de) {
406 err = -EMLINK; 425 err = -EMLINK;
407 if (new_dir->i_nlink >= NILFS_LINK_MAX) 426 if (new_dir->i_nlink >= NILFS_LINK_MAX)
408 goto out_dir; 427 goto out_dir;
409 } 428 }
410 inode_inc_link_count(old_inode); 429 inc_nlink(old_inode);
411 err = nilfs_add_link(new_dentry, old_inode); 430 err = nilfs_add_link(new_dentry, old_inode);
412 if (err) { 431 if (err) {
413 inode_dec_link_count(old_inode); 432 drop_nlink(old_inode);
433 nilfs_mark_inode_dirty(old_inode);
414 goto out_dir; 434 goto out_dir;
415 } 435 }
416 if (dir_de) 436 if (dir_de) {
417 inode_inc_link_count(new_dir); 437 inc_nlink(new_dir);
438 nilfs_mark_inode_dirty(new_dir);
439 }
418 } 440 }
419 441
420 /* 442 /*
421 * Like most other Unix systems, set the ctime for inodes on a 443 * Like most other Unix systems, set the ctime for inodes on a
422 * rename. 444 * rename.
423 * inode_dec_link_count() will mark the inode dirty.
424 */ 445 */
425 old_inode->i_ctime = CURRENT_TIME; 446 old_inode->i_ctime = CURRENT_TIME;
426 447
427 nilfs_delete_entry(old_de, old_page); 448 nilfs_delete_entry(old_de, old_page);
428 inode_dec_link_count(old_inode); 449 drop_nlink(old_inode);
429 450
430 if (dir_de) { 451 if (dir_de) {
431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir); 452 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
432 inode_dec_link_count(old_dir); 453 drop_nlink(old_dir);
433 } 454 }
455 nilfs_mark_inode_dirty(old_dir);
456 nilfs_mark_inode_dirty(old_inode);
434 457
435 err = nilfs_transaction_commit(old_dir->i_sb); 458 err = nilfs_transaction_commit(old_dir->i_sb);
436 return err; 459 return err;
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 6dc83591d11..c9c96c7825d 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -770,14 +770,8 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
770 nilfs_finish_roll_forward(nilfs, sbi, ri); 770 nilfs_finish_roll_forward(nilfs, sbi, ri);
771 } 771 }
772 772
773 nilfs_detach_checkpoint(sbi);
774 return 0;
775
776 failed: 773 failed:
777 nilfs_detach_checkpoint(sbi); 774 nilfs_detach_checkpoint(sbi);
778 nilfs_mdt_clear(nilfs->ns_cpfile);
779 nilfs_mdt_clear(nilfs->ns_sufile);
780 nilfs_mdt_clear(nilfs->ns_dat);
781 return err; 775 return err;
782} 776}
783 777
@@ -804,6 +798,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
804 struct nilfs_segsum_info ssi; 798 struct nilfs_segsum_info ssi;
805 sector_t pseg_start, pseg_end, sr_pseg_start = 0; 799 sector_t pseg_start, pseg_end, sr_pseg_start = 0;
806 sector_t seg_start, seg_end; /* range of full segment (block number) */ 800 sector_t seg_start, seg_end; /* range of full segment (block number) */
801 sector_t b, end;
807 u64 seg_seq; 802 u64 seg_seq;
808 __u64 segnum, nextnum = 0; 803 __u64 segnum, nextnum = 0;
809 __u64 cno; 804 __u64 cno;
@@ -819,6 +814,11 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
819 /* Calculate range of segment */ 814 /* Calculate range of segment */
820 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); 815 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
821 816
817 /* Read ahead segment */
818 b = seg_start;
819 while (b <= seg_end)
820 sb_breadahead(sbi->s_super, b++);
821
822 for (;;) { 822 for (;;) {
823 /* Load segment summary */ 823 /* Load segment summary */
824 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); 824 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
@@ -841,14 +841,20 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
841 ri->ri_nextnum = nextnum; 841 ri->ri_nextnum = nextnum;
842 empty_seg = 0; 842 empty_seg = 0;
843 843
844 if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) {
845 /* This will never happen because a superblock
846 (last_segment) always points to a pseg
847 having a super root. */
848 ret = NILFS_SEG_FAIL_CONSISTENCY;
849 goto failed;
850 }
851
852 if (pseg_start == seg_start) {
853 nilfs_get_segment_range(nilfs, nextnum, &b, &end);
854 while (b <= end)
855 sb_breadahead(sbi->s_super, b++);
856 }
844 if (!NILFS_SEG_HAS_SR(&ssi)) { 857 if (!NILFS_SEG_HAS_SR(&ssi)) {
845 if (!scan_newer) {
846 /* This will never happen because a superblock
847 (last_segment) always points to a pseg
848 having a super root. */
849 ret = NILFS_SEG_FAIL_CONSISTENCY;
850 goto failed;
851 }
852 if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { 858 if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
853 ri->ri_lsegs_start = pseg_start; 859 ri->ri_lsegs_start = pseg_start;
854 ri->ri_lsegs_start_seq = seg_seq; 860 ri->ri_lsegs_start_seq = seg_seq;
@@ -919,7 +925,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
919 925
920 super_root_found: 926 super_root_found:
921 /* Updating pointers relating to the latest checkpoint */ 927 /* Updating pointers relating to the latest checkpoint */
922 list_splice(&segments, ri->ri_used_segments.prev); 928 list_splice_tail(&segments, &ri->ri_used_segments);
923 nilfs->ns_last_pseg = sr_pseg_start; 929 nilfs->ns_last_pseg = sr_pseg_start;
924 nilfs->ns_last_seq = nilfs->ns_seg_seq; 930 nilfs->ns_last_seq = nilfs->ns_seg_seq;
925 nilfs->ns_last_cno = ri->ri_cno; 931 nilfs->ns_last_cno = ri->ri_cno;
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index e6d9e37fa24..645c78656aa 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -24,10 +24,22 @@
24#include <linux/buffer_head.h> 24#include <linux/buffer_head.h>
25#include <linux/writeback.h> 25#include <linux/writeback.h>
26#include <linux/crc32.h> 26#include <linux/crc32.h>
27#include <linux/backing-dev.h>
27#include "page.h" 28#include "page.h"
28#include "segbuf.h" 29#include "segbuf.h"
29 30
30 31
32struct nilfs_write_info {
33 struct the_nilfs *nilfs;
34 struct bio *bio;
35 int start, end; /* The region to be submitted */
36 int rest_blocks;
37 int max_pages;
38 int nr_vecs;
39 sector_t blocknr;
40};
41
42
31static struct kmem_cache *nilfs_segbuf_cachep; 43static struct kmem_cache *nilfs_segbuf_cachep;
32 44
33static void nilfs_segbuf_init_once(void *obj) 45static void nilfs_segbuf_init_once(void *obj)
@@ -63,6 +75,11 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb)
63 INIT_LIST_HEAD(&segbuf->sb_list); 75 INIT_LIST_HEAD(&segbuf->sb_list);
64 INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); 76 INIT_LIST_HEAD(&segbuf->sb_segsum_buffers);
65 INIT_LIST_HEAD(&segbuf->sb_payload_buffers); 77 INIT_LIST_HEAD(&segbuf->sb_payload_buffers);
78
79 init_completion(&segbuf->sb_bio_event);
80 atomic_set(&segbuf->sb_err, 0);
81 segbuf->sb_nbio = 0;
82
66 return segbuf; 83 return segbuf;
67} 84}
68 85
@@ -83,6 +100,22 @@ void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum,
83 segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; 100 segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1;
84} 101}
85 102
103/**
104 * nilfs_segbuf_map_cont - map a new log behind a given log
105 * @segbuf: new segment buffer
106 * @prev: segment buffer containing a log to be continued
107 */
108void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
109 struct nilfs_segment_buffer *prev)
110{
111 segbuf->sb_segnum = prev->sb_segnum;
112 segbuf->sb_fseg_start = prev->sb_fseg_start;
113 segbuf->sb_fseg_end = prev->sb_fseg_end;
114 segbuf->sb_pseg_start = prev->sb_pseg_start + prev->sb_sum.nblocks;
115 segbuf->sb_rest_blocks =
116 segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1;
117}
118
86void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, 119void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf,
87 __u64 nextnum, struct the_nilfs *nilfs) 120 __u64 nextnum, struct the_nilfs *nilfs)
88{ 121{
@@ -132,8 +165,6 @@ int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
132 segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); 165 segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary);
133 segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; 166 segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0;
134 segbuf->sb_sum.ctime = ctime; 167 segbuf->sb_sum.ctime = ctime;
135
136 segbuf->sb_io_error = 0;
137 return 0; 168 return 0;
138} 169}
139 170
@@ -219,7 +250,7 @@ void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
219 raw_sum->ss_datasum = cpu_to_le32(crc); 250 raw_sum->ss_datasum = cpu_to_le32(crc);
220} 251}
221 252
222void nilfs_release_buffers(struct list_head *list) 253static void nilfs_release_buffers(struct list_head *list)
223{ 254{
224 struct buffer_head *bh, *n; 255 struct buffer_head *bh, *n;
225 256
@@ -241,13 +272,56 @@ void nilfs_release_buffers(struct list_head *list)
241 } 272 }
242} 273}
243 274
275static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf)
276{
277 nilfs_release_buffers(&segbuf->sb_segsum_buffers);
278 nilfs_release_buffers(&segbuf->sb_payload_buffers);
279}
280
281/*
282 * Iterators for segment buffers
283 */
284void nilfs_clear_logs(struct list_head *logs)
285{
286 struct nilfs_segment_buffer *segbuf;
287
288 list_for_each_entry(segbuf, logs, sb_list)
289 nilfs_segbuf_clear(segbuf);
290}
291
292void nilfs_truncate_logs(struct list_head *logs,
293 struct nilfs_segment_buffer *last)
294{
295 struct nilfs_segment_buffer *n, *segbuf;
296
297 segbuf = list_prepare_entry(last, logs, sb_list);
298 list_for_each_entry_safe_continue(segbuf, n, logs, sb_list) {
299 list_del_init(&segbuf->sb_list);
300 nilfs_segbuf_clear(segbuf);
301 nilfs_segbuf_free(segbuf);
302 }
303}
304
305int nilfs_wait_on_logs(struct list_head *logs)
306{
307 struct nilfs_segment_buffer *segbuf;
308 int err;
309
310 list_for_each_entry(segbuf, logs, sb_list) {
311 err = nilfs_segbuf_wait(segbuf);
312 if (err)
313 return err;
314 }
315 return 0;
316}
317
244/* 318/*
245 * BIO operations 319 * BIO operations
246 */ 320 */
247static void nilfs_end_bio_write(struct bio *bio, int err) 321static void nilfs_end_bio_write(struct bio *bio, int err)
248{ 322{
249 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 323 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
250 struct nilfs_write_info *wi = bio->bi_private; 324 struct nilfs_segment_buffer *segbuf = bio->bi_private;
251 325
252 if (err == -EOPNOTSUPP) { 326 if (err == -EOPNOTSUPP) {
253 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 327 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
@@ -256,21 +330,22 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
256 } 330 }
257 331
258 if (!uptodate) 332 if (!uptodate)
259 atomic_inc(&wi->err); 333 atomic_inc(&segbuf->sb_err);
260 334
261 bio_put(bio); 335 bio_put(bio);
262 complete(&wi->bio_event); 336 complete(&segbuf->sb_bio_event);
263} 337}
264 338
265static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) 339static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
340 struct nilfs_write_info *wi, int mode)
266{ 341{
267 struct bio *bio = wi->bio; 342 struct bio *bio = wi->bio;
268 int err; 343 int err;
269 344
270 if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) { 345 if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->nilfs->ns_bdi)) {
271 wait_for_completion(&wi->bio_event); 346 wait_for_completion(&segbuf->sb_bio_event);
272 wi->nbio--; 347 segbuf->sb_nbio--;
273 if (unlikely(atomic_read(&wi->err))) { 348 if (unlikely(atomic_read(&segbuf->sb_err))) {
274 bio_put(bio); 349 bio_put(bio);
275 err = -EIO; 350 err = -EIO;
276 goto failed; 351 goto failed;
@@ -278,7 +353,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
278 } 353 }
279 354
280 bio->bi_end_io = nilfs_end_bio_write; 355 bio->bi_end_io = nilfs_end_bio_write;
281 bio->bi_private = wi; 356 bio->bi_private = segbuf;
282 bio_get(bio); 357 bio_get(bio);
283 submit_bio(mode, bio); 358 submit_bio(mode, bio);
284 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 359 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
@@ -286,7 +361,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
286 err = -EOPNOTSUPP; 361 err = -EOPNOTSUPP;
287 goto failed; 362 goto failed;
288 } 363 }
289 wi->nbio++; 364 segbuf->sb_nbio++;
290 bio_put(bio); 365 bio_put(bio);
291 366
292 wi->bio = NULL; 367 wi->bio = NULL;
@@ -301,17 +376,15 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
301} 376}
302 377
303/** 378/**
304 * nilfs_alloc_seg_bio - allocate a bio for writing segment. 379 * nilfs_alloc_seg_bio - allocate a new bio for writing log
305 * @sb: super block 380 * @nilfs: nilfs object
306 * @start: beginning disk block number of this BIO. 381 * @start: start block number of the bio
307 * @nr_vecs: request size of page vector. 382 * @nr_vecs: request size of page vector.
308 * 383 *
309 * alloc_seg_bio() allocates a new BIO structure and initialize it.
310 *
311 * Return Value: On success, pointer to the struct bio is returned. 384 * Return Value: On success, pointer to the struct bio is returned.
312 * On error, NULL is returned. 385 * On error, NULL is returned.
313 */ 386 */
314static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, 387static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
315 int nr_vecs) 388 int nr_vecs)
316{ 389{
317 struct bio *bio; 390 struct bio *bio;
@@ -322,36 +395,33 @@ static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start,
322 bio = bio_alloc(GFP_NOIO, nr_vecs); 395 bio = bio_alloc(GFP_NOIO, nr_vecs);
323 } 396 }
324 if (likely(bio)) { 397 if (likely(bio)) {
325 bio->bi_bdev = sb->s_bdev; 398 bio->bi_bdev = nilfs->ns_bdev;
326 bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9); 399 bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9);
327 } 400 }
328 return bio; 401 return bio;
329} 402}
330 403
331void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, 404static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
332 struct nilfs_write_info *wi) 405 struct nilfs_write_info *wi)
333{ 406{
334 wi->bio = NULL; 407 wi->bio = NULL;
335 wi->rest_blocks = segbuf->sb_sum.nblocks; 408 wi->rest_blocks = segbuf->sb_sum.nblocks;
336 wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev); 409 wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev);
337 wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); 410 wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
338 wi->start = wi->end = 0; 411 wi->start = wi->end = 0;
339 wi->nbio = 0;
340 wi->blocknr = segbuf->sb_pseg_start; 412 wi->blocknr = segbuf->sb_pseg_start;
341
342 atomic_set(&wi->err, 0);
343 init_completion(&wi->bio_event);
344} 413}
345 414
346static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, 415static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
347 int mode) 416 struct nilfs_write_info *wi,
417 struct buffer_head *bh, int mode)
348{ 418{
349 int len, err; 419 int len, err;
350 420
351 BUG_ON(wi->nr_vecs <= 0); 421 BUG_ON(wi->nr_vecs <= 0);
352 repeat: 422 repeat:
353 if (!wi->bio) { 423 if (!wi->bio) {
354 wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end, 424 wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end,
355 wi->nr_vecs); 425 wi->nr_vecs);
356 if (unlikely(!wi->bio)) 426 if (unlikely(!wi->bio))
357 return -ENOMEM; 427 return -ENOMEM;
@@ -363,76 +433,83 @@ static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh,
363 return 0; 433 return 0;
364 } 434 }
365 /* bio is FULL */ 435 /* bio is FULL */
366 err = nilfs_submit_seg_bio(wi, mode); 436 err = nilfs_segbuf_submit_bio(segbuf, wi, mode);
367 /* never submit current bh */ 437 /* never submit current bh */
368 if (likely(!err)) 438 if (likely(!err))
369 goto repeat; 439 goto repeat;
370 return err; 440 return err;
371} 441}
372 442
443/**
444 * nilfs_segbuf_write - submit write requests of a log
445 * @segbuf: buffer storing a log to be written
446 * @nilfs: nilfs object
447 *
448 * Return Value: On Success, 0 is returned. On Error, one of the following
449 * negative error code is returned.
450 *
451 * %-EIO - I/O error
452 *
453 * %-ENOMEM - Insufficient memory available.
454 */
373int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, 455int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
374 struct nilfs_write_info *wi) 456 struct the_nilfs *nilfs)
375{ 457{
458 struct nilfs_write_info wi;
376 struct buffer_head *bh; 459 struct buffer_head *bh;
377 int res, rw = WRITE; 460 int res = 0, rw = WRITE;
461
462 wi.nilfs = nilfs;
463 nilfs_segbuf_prepare_write(segbuf, &wi);
378 464
379 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { 465 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
380 res = nilfs_submit_bh(wi, bh, rw); 466 res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
381 if (unlikely(res)) 467 if (unlikely(res))
382 goto failed_bio; 468 goto failed_bio;
383 } 469 }
384 470
385 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 471 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
386 res = nilfs_submit_bh(wi, bh, rw); 472 res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
387 if (unlikely(res)) 473 if (unlikely(res))
388 goto failed_bio; 474 goto failed_bio;
389 } 475 }
390 476
391 if (wi->bio) { 477 if (wi.bio) {
392 /* 478 /*
393 * Last BIO is always sent through the following 479 * Last BIO is always sent through the following
394 * submission. 480 * submission.
395 */ 481 */
396 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); 482 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
397 res = nilfs_submit_seg_bio(wi, rw); 483 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
398 if (unlikely(res))
399 goto failed_bio;
400 } 484 }
401 485
402 res = 0;
403 out:
404 return res;
405
406 failed_bio: 486 failed_bio:
407 atomic_inc(&wi->err); 487 return res;
408 goto out;
409} 488}
410 489
411/** 490/**
412 * nilfs_segbuf_wait - wait for completion of requested BIOs 491 * nilfs_segbuf_wait - wait for completion of requested BIOs
413 * @wi: nilfs_write_info 492 * @segbuf: segment buffer
414 * 493 *
415 * Return Value: On Success, 0 is returned. On Error, one of the following 494 * Return Value: On Success, 0 is returned. On Error, one of the following
416 * negative error code is returned. 495 * negative error code is returned.
417 * 496 *
418 * %-EIO - I/O error 497 * %-EIO - I/O error
419 */ 498 */
420int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf, 499int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf)
421 struct nilfs_write_info *wi)
422{ 500{
423 int err = 0; 501 int err = 0;
424 502
425 if (!wi->nbio) 503 if (!segbuf->sb_nbio)
426 return 0; 504 return 0;
427 505
428 do { 506 do {
429 wait_for_completion(&wi->bio_event); 507 wait_for_completion(&segbuf->sb_bio_event);
430 } while (--wi->nbio > 0); 508 } while (--segbuf->sb_nbio > 0);
431 509
432 if (unlikely(atomic_read(&wi->err) > 0)) { 510 if (unlikely(atomic_read(&segbuf->sb_err) > 0)) {
433 printk(KERN_ERR "NILFS: IO error writing segment\n"); 511 printk(KERN_ERR "NILFS: IO error writing segment\n");
434 err = -EIO; 512 err = -EIO;
435 segbuf->sb_io_error = 1;
436 } 513 }
437 return err; 514 return err;
438} 515}
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index 0c3076f4e59..6af1630fb40 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -27,7 +27,6 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/bio.h> 28#include <linux/bio.h>
29#include <linux/completion.h> 29#include <linux/completion.h>
30#include <linux/backing-dev.h>
31 30
32/** 31/**
33 * struct nilfs_segsum_info - On-memory segment summary 32 * struct nilfs_segsum_info - On-memory segment summary
@@ -77,7 +76,9 @@ struct nilfs_segsum_info {
77 * @sb_rest_blocks: Number of residual blocks in the current segment 76 * @sb_rest_blocks: Number of residual blocks in the current segment
78 * @sb_segsum_buffers: List of buffers for segment summaries 77 * @sb_segsum_buffers: List of buffers for segment summaries
79 * @sb_payload_buffers: List of buffers for segment payload 78 * @sb_payload_buffers: List of buffers for segment payload
80 * @sb_io_error: I/O error status 79 * @sb_nbio: Number of flying bio requests
80 * @sb_err: I/O error status
81 * @sb_bio_event: Completion event of log writing
81 */ 82 */
82struct nilfs_segment_buffer { 83struct nilfs_segment_buffer {
83 struct super_block *sb_super; 84 struct super_block *sb_super;
@@ -96,7 +97,9 @@ struct nilfs_segment_buffer {
96 struct list_head sb_payload_buffers; /* including super root */ 97 struct list_head sb_payload_buffers; /* including super root */
97 98
98 /* io status */ 99 /* io status */
99 int sb_io_error; 100 int sb_nbio;
101 atomic_t sb_err;
102 struct completion sb_bio_event;
100}; 103};
101 104
102#define NILFS_LIST_SEGBUF(head) \ 105#define NILFS_LIST_SEGBUF(head) \
@@ -125,6 +128,8 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
125void nilfs_segbuf_free(struct nilfs_segment_buffer *); 128void nilfs_segbuf_free(struct nilfs_segment_buffer *);
126void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, 129void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
127 struct the_nilfs *); 130 struct the_nilfs *);
131void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
132 struct nilfs_segment_buffer *prev);
128void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, 133void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
129 struct the_nilfs *); 134 struct the_nilfs *);
130int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); 135int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t);
@@ -161,41 +166,18 @@ nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf,
161 segbuf->sb_sum.nfileblk++; 166 segbuf->sb_sum.nfileblk++;
162} 167}
163 168
164void nilfs_release_buffers(struct list_head *); 169int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
170 struct the_nilfs *nilfs);
171int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf);
165 172
166static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) 173void nilfs_clear_logs(struct list_head *logs);
174void nilfs_truncate_logs(struct list_head *logs,
175 struct nilfs_segment_buffer *last);
176int nilfs_wait_on_logs(struct list_head *logs);
177
178static inline void nilfs_destroy_logs(struct list_head *logs)
167{ 179{
168 nilfs_release_buffers(&segbuf->sb_segsum_buffers); 180 nilfs_truncate_logs(logs, NULL);
169 nilfs_release_buffers(&segbuf->sb_payload_buffers);
170} 181}
171 182
172struct nilfs_write_info {
173 struct bio *bio;
174 int start, end; /* The region to be submitted */
175 int rest_blocks;
176 int max_pages;
177 int nr_vecs;
178 sector_t blocknr;
179
180 int nbio;
181 atomic_t err;
182 struct completion bio_event;
183 /* completion event of segment write */
184
185 /*
186 * The following fields must be set explicitly
187 */
188 struct super_block *sb;
189 struct backing_dev_info *bdi; /* backing dev info */
190 struct buffer_head *bh_sr;
191};
192
193
194void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *,
195 struct nilfs_write_info *);
196int nilfs_segbuf_write(struct nilfs_segment_buffer *,
197 struct nilfs_write_info *);
198int nilfs_segbuf_wait(struct nilfs_segment_buffer *,
199 struct nilfs_write_info *);
200
201#endif /* _NILFS_SEGBUF_H */ 183#endif /* _NILFS_SEGBUF_H */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6eff66a070d..17584c52448 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -974,12 +974,12 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
974 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 974 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
975 raw_sr->sr_flags = 0; 975 raw_sr->sr_flags = 0;
976 976
977 nilfs_mdt_write_inode_direct( 977 nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr +
978 nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz)); 978 NILFS_SR_DAT_OFFSET(isz), 1);
979 nilfs_mdt_write_inode_direct( 979 nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
980 nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz)); 980 NILFS_SR_CPFILE_OFFSET(isz), 1);
981 nilfs_mdt_write_inode_direct( 981 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
982 nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz)); 982 NILFS_SR_SUFILE_OFFSET(isz), 1);
983} 983}
984 984
985static void nilfs_redirty_inodes(struct list_head *head) 985static void nilfs_redirty_inodes(struct list_head *head)
@@ -1273,73 +1273,75 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1273 return err; 1273 return err;
1274} 1274}
1275 1275
1276static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) 1276/**
1277{ 1277 * nilfs_segctor_begin_construction - setup segment buffer to make a new log
1278 struct buffer_head *bh_su; 1278 * @sci: nilfs_sc_info
1279 struct nilfs_segment_usage *raw_su; 1279 * @nilfs: nilfs object
1280 int err; 1280 */
1281
1282 err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su);
1283 if (unlikely(err))
1284 return err;
1285 nilfs_mdt_mark_buffer_dirty(bh_su);
1286 nilfs_mdt_mark_dirty(sufile);
1287 nilfs_sufile_put_segment_usage(sufile, segnum, bh_su);
1288 return 0;
1289}
1290
1291static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, 1281static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
1292 struct the_nilfs *nilfs) 1282 struct the_nilfs *nilfs)
1293{ 1283{
1294 struct nilfs_segment_buffer *segbuf, *n; 1284 struct nilfs_segment_buffer *segbuf, *prev;
1295 __u64 nextnum; 1285 __u64 nextnum;
1296 int err; 1286 int err, alloc = 0;
1297 1287
1298 if (list_empty(&sci->sc_segbufs)) { 1288 segbuf = nilfs_segbuf_new(sci->sc_super);
1299 segbuf = nilfs_segbuf_new(sci->sc_super); 1289 if (unlikely(!segbuf))
1300 if (unlikely(!segbuf)) 1290 return -ENOMEM;
1301 return -ENOMEM; 1291
1302 list_add(&segbuf->sb_list, &sci->sc_segbufs); 1292 if (list_empty(&sci->sc_write_logs)) {
1303 } else 1293 nilfs_segbuf_map(segbuf, nilfs->ns_segnum,
1304 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1294 nilfs->ns_pseg_offset, nilfs);
1295 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1296 nilfs_shift_to_next_segment(nilfs);
1297 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
1298 }
1305 1299
1306 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset, 1300 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1307 nilfs); 1301 nextnum = nilfs->ns_nextnum;
1308 1302
1309 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1303 if (nilfs->ns_segnum == nilfs->ns_nextnum)
1310 nilfs_shift_to_next_segment(nilfs); 1304 /* Start from the head of a new full segment */
1311 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); 1305 alloc++;
1306 } else {
1307 /* Continue logs */
1308 prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1309 nilfs_segbuf_map_cont(segbuf, prev);
1310 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq;
1311 nextnum = prev->sb_nextnum;
1312
1313 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1314 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1315 segbuf->sb_sum.seg_seq++;
1316 alloc++;
1317 }
1312 } 1318 }
1313 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1314 1319
1315 err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum); 1320 err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum);
1316 if (unlikely(err)) 1321 if (err)
1317 return err; 1322 goto failed;
1318 1323
1319 if (nilfs->ns_segnum == nilfs->ns_nextnum) { 1324 if (alloc) {
1320 /* Start from the head of a new full segment */
1321 err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); 1325 err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum);
1322 if (unlikely(err)) 1326 if (err)
1323 return err; 1327 goto failed;
1324 } else 1328 }
1325 nextnum = nilfs->ns_nextnum;
1326
1327 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1328 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); 1329 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
1329 1330
1330 /* truncating segment buffers */ 1331 BUG_ON(!list_empty(&sci->sc_segbufs));
1331 list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, 1332 list_add_tail(&segbuf->sb_list, &sci->sc_segbufs);
1332 sb_list) { 1333 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1333 list_del_init(&segbuf->sb_list);
1334 nilfs_segbuf_free(segbuf);
1335 }
1336 return 0; 1334 return 0;
1335
1336 failed:
1337 nilfs_segbuf_free(segbuf);
1338 return err;
1337} 1339}
1338 1340
1339static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, 1341static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1340 struct the_nilfs *nilfs, int nadd) 1342 struct the_nilfs *nilfs, int nadd)
1341{ 1343{
1342 struct nilfs_segment_buffer *segbuf, *prev, *n; 1344 struct nilfs_segment_buffer *segbuf, *prev;
1343 struct inode *sufile = nilfs->ns_sufile; 1345 struct inode *sufile = nilfs->ns_sufile;
1344 __u64 nextnextnum; 1346 __u64 nextnextnum;
1345 LIST_HEAD(list); 1347 LIST_HEAD(list);
@@ -1352,7 +1354,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1352 * not be dirty. The following call ensures that the buffer is dirty 1354 * not be dirty. The following call ensures that the buffer is dirty
1353 * and will pin the buffer on memory until the sufile is written. 1355 * and will pin the buffer on memory until the sufile is written.
1354 */ 1356 */
1355 err = nilfs_touch_segusage(sufile, prev->sb_nextnum); 1357 err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum);
1356 if (unlikely(err)) 1358 if (unlikely(err))
1357 return err; 1359 return err;
1358 1360
@@ -1378,33 +1380,33 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1378 list_add_tail(&segbuf->sb_list, &list); 1380 list_add_tail(&segbuf->sb_list, &list);
1379 prev = segbuf; 1381 prev = segbuf;
1380 } 1382 }
1381 list_splice(&list, sci->sc_segbufs.prev); 1383 list_splice_tail(&list, &sci->sc_segbufs);
1382 return 0; 1384 return 0;
1383 1385
1384 failed_segbuf: 1386 failed_segbuf:
1385 nilfs_segbuf_free(segbuf); 1387 nilfs_segbuf_free(segbuf);
1386 failed: 1388 failed:
1387 list_for_each_entry_safe(segbuf, n, &list, sb_list) { 1389 list_for_each_entry(segbuf, &list, sb_list) {
1388 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1390 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1389 WARN_ON(ret); /* never fails */ 1391 WARN_ON(ret); /* never fails */
1390 list_del_init(&segbuf->sb_list);
1391 nilfs_segbuf_free(segbuf);
1392 } 1392 }
1393 nilfs_destroy_logs(&list);
1393 return err; 1394 return err;
1394} 1395}
1395 1396
1396static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, 1397static void nilfs_free_incomplete_logs(struct list_head *logs,
1397 struct the_nilfs *nilfs) 1398 struct the_nilfs *nilfs)
1398{ 1399{
1399 struct nilfs_segment_buffer *segbuf; 1400 struct nilfs_segment_buffer *segbuf, *prev;
1400 int ret, done = 0; 1401 struct inode *sufile = nilfs->ns_sufile;
1402 int ret;
1401 1403
1402 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1404 segbuf = NILFS_FIRST_SEGBUF(logs);
1403 if (nilfs->ns_nextnum != segbuf->sb_nextnum) { 1405 if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
1404 ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); 1406 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1405 WARN_ON(ret); /* never fails */ 1407 WARN_ON(ret); /* never fails */
1406 } 1408 }
1407 if (segbuf->sb_io_error) { 1409 if (atomic_read(&segbuf->sb_err)) {
1408 /* Case 1: The first segment failed */ 1410 /* Case 1: The first segment failed */
1409 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) 1411 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
1410 /* Case 1a: Partial segment appended into an existing 1412 /* Case 1a: Partial segment appended into an existing
@@ -1413,106 +1415,54 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci,
1413 segbuf->sb_fseg_end); 1415 segbuf->sb_fseg_end);
1414 else /* Case 1b: New full segment */ 1416 else /* Case 1b: New full segment */
1415 set_nilfs_discontinued(nilfs); 1417 set_nilfs_discontinued(nilfs);
1416 done++;
1417 } 1418 }
1418 1419
1419 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { 1420 prev = segbuf;
1420 ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); 1421 list_for_each_entry_continue(segbuf, logs, sb_list) {
1421 WARN_ON(ret); /* never fails */ 1422 if (prev->sb_nextnum != segbuf->sb_nextnum) {
1422 if (!done && segbuf->sb_io_error) { 1423 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1423 if (segbuf->sb_segnum != nilfs->ns_nextnum) 1424 WARN_ON(ret); /* never fails */
1424 /* Case 2: extended segment (!= next) failed */
1425 nilfs_sufile_set_error(nilfs->ns_sufile,
1426 segbuf->sb_segnum);
1427 done++;
1428 }
1429 }
1430}
1431
1432static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci)
1433{
1434 struct nilfs_segment_buffer *segbuf;
1435
1436 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list)
1437 nilfs_segbuf_clear(segbuf);
1438 sci->sc_super_root = NULL;
1439}
1440
1441static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci)
1442{
1443 struct nilfs_segment_buffer *segbuf;
1444
1445 while (!list_empty(&sci->sc_segbufs)) {
1446 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1447 list_del_init(&segbuf->sb_list);
1448 nilfs_segbuf_free(segbuf);
1449 }
1450 /* sci->sc_curseg = NULL; */
1451}
1452
1453static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci,
1454 struct the_nilfs *nilfs, int err)
1455{
1456 if (unlikely(err)) {
1457 nilfs_segctor_free_incomplete_segments(sci, nilfs);
1458 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1459 int ret;
1460
1461 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1462 sci->sc_freesegs,
1463 sci->sc_nfreesegs,
1464 NULL);
1465 WARN_ON(ret); /* do not happen */
1466 } 1425 }
1426 if (atomic_read(&segbuf->sb_err) &&
1427 segbuf->sb_segnum != nilfs->ns_nextnum)
1428 /* Case 2: extended segment (!= next) failed */
1429 nilfs_sufile_set_error(sufile, segbuf->sb_segnum);
1430 prev = segbuf;
1467 } 1431 }
1468 nilfs_segctor_clear_segment_buffers(sci);
1469} 1432}
1470 1433
1471static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, 1434static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
1472 struct inode *sufile) 1435 struct inode *sufile)
1473{ 1436{
1474 struct nilfs_segment_buffer *segbuf; 1437 struct nilfs_segment_buffer *segbuf;
1475 struct buffer_head *bh_su;
1476 struct nilfs_segment_usage *raw_su;
1477 unsigned long live_blocks; 1438 unsigned long live_blocks;
1478 int ret; 1439 int ret;
1479 1440
1480 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1441 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1481 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
1482 &raw_su, &bh_su);
1483 WARN_ON(ret); /* always succeed because bh_su is dirty */
1484 live_blocks = segbuf->sb_sum.nblocks + 1442 live_blocks = segbuf->sb_sum.nblocks +
1485 (segbuf->sb_pseg_start - segbuf->sb_fseg_start); 1443 (segbuf->sb_pseg_start - segbuf->sb_fseg_start);
1486 raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); 1444 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1487 raw_su->su_nblocks = cpu_to_le32(live_blocks); 1445 live_blocks,
1488 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, 1446 sci->sc_seg_ctime);
1489 bh_su); 1447 WARN_ON(ret); /* always succeed because the segusage is dirty */
1490 } 1448 }
1491} 1449}
1492 1450
1493static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, 1451static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile)
1494 struct inode *sufile)
1495{ 1452{
1496 struct nilfs_segment_buffer *segbuf; 1453 struct nilfs_segment_buffer *segbuf;
1497 struct buffer_head *bh_su;
1498 struct nilfs_segment_usage *raw_su;
1499 int ret; 1454 int ret;
1500 1455
1501 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1456 segbuf = NILFS_FIRST_SEGBUF(logs);
1502 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, 1457 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1503 &raw_su, &bh_su); 1458 segbuf->sb_pseg_start -
1504 WARN_ON(ret); /* always succeed because bh_su is dirty */ 1459 segbuf->sb_fseg_start, 0);
1505 raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - 1460 WARN_ON(ret); /* always succeed because the segusage is dirty */
1506 segbuf->sb_fseg_start);
1507 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su);
1508 1461
1509 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { 1462 list_for_each_entry_continue(segbuf, logs, sb_list) {
1510 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, 1463 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1511 &raw_su, &bh_su); 1464 0, 0);
1512 WARN_ON(ret); /* always succeed */ 1465 WARN_ON(ret); /* always succeed */
1513 raw_su->su_nblocks = 0;
1514 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum,
1515 bh_su);
1516 } 1466 }
1517} 1467}
1518 1468
@@ -1520,17 +1470,15 @@ static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
1520 struct nilfs_segment_buffer *last, 1470 struct nilfs_segment_buffer *last,
1521 struct inode *sufile) 1471 struct inode *sufile)
1522{ 1472{
1523 struct nilfs_segment_buffer *segbuf = last, *n; 1473 struct nilfs_segment_buffer *segbuf = last;
1524 int ret; 1474 int ret;
1525 1475
1526 list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, 1476 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
1527 sb_list) {
1528 list_del_init(&segbuf->sb_list);
1529 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; 1477 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
1530 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1478 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1531 WARN_ON(ret); 1479 WARN_ON(ret);
1532 nilfs_segbuf_free(segbuf);
1533 } 1480 }
1481 nilfs_truncate_logs(&sci->sc_segbufs, last);
1534} 1482}
1535 1483
1536 1484
@@ -1569,7 +1517,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1569 NULL); 1517 NULL);
1570 WARN_ON(err); /* do not happen */ 1518 WARN_ON(err); /* do not happen */
1571 } 1519 }
1572 nilfs_segctor_clear_segment_buffers(sci); 1520 nilfs_clear_logs(&sci->sc_segbufs);
1573 1521
1574 err = nilfs_segctor_extend_segments(sci, nilfs, nadd); 1522 err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
1575 if (unlikely(err)) 1523 if (unlikely(err))
@@ -1814,26 +1762,18 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1814} 1762}
1815 1763
1816static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1764static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1817 struct backing_dev_info *bdi) 1765 struct the_nilfs *nilfs)
1818{ 1766{
1819 struct nilfs_segment_buffer *segbuf; 1767 struct nilfs_segment_buffer *segbuf;
1820 struct nilfs_write_info wi; 1768 int ret = 0;
1821 int err, res;
1822
1823 wi.sb = sci->sc_super;
1824 wi.bh_sr = sci->sc_super_root;
1825 wi.bdi = bdi;
1826 1769
1827 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1770 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1828 nilfs_segbuf_prepare_write(segbuf, &wi); 1771 ret = nilfs_segbuf_write(segbuf, nilfs);
1829 err = nilfs_segbuf_write(segbuf, &wi); 1772 if (ret)
1830 1773 break;
1831 res = nilfs_segbuf_wait(segbuf, &wi);
1832 err = err ? : res;
1833 if (err)
1834 return err;
1835 } 1774 }
1836 return 0; 1775 list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs);
1776 return ret;
1837} 1777}
1838 1778
1839static void __nilfs_end_page_io(struct page *page, int err) 1779static void __nilfs_end_page_io(struct page *page, int err)
@@ -1911,15 +1851,17 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err)
1911 } 1851 }
1912} 1852}
1913 1853
1914static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, 1854static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1915 struct page *failed_page, int err) 1855 struct buffer_head *bh_sr, int err)
1916{ 1856{
1917 struct nilfs_segment_buffer *segbuf; 1857 struct nilfs_segment_buffer *segbuf;
1918 struct page *bd_page = NULL, *fs_page = NULL; 1858 struct page *bd_page = NULL, *fs_page = NULL;
1859 struct buffer_head *bh;
1919 1860
1920 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1861 if (list_empty(logs))
1921 struct buffer_head *bh; 1862 return;
1922 1863
1864 list_for_each_entry(segbuf, logs, sb_list) {
1923 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1865 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1924 b_assoc_buffers) { 1866 b_assoc_buffers) {
1925 if (bh->b_page != bd_page) { 1867 if (bh->b_page != bd_page) {
@@ -1931,7 +1873,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1931 1873
1932 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1874 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1933 b_assoc_buffers) { 1875 b_assoc_buffers) {
1934 if (bh == sci->sc_super_root) { 1876 if (bh == bh_sr) {
1935 if (bh->b_page != bd_page) { 1877 if (bh->b_page != bd_page) {
1936 end_page_writeback(bd_page); 1878 end_page_writeback(bd_page);
1937 bd_page = bh->b_page; 1879 bd_page = bh->b_page;
@@ -1941,7 +1883,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1941 if (bh->b_page != fs_page) { 1883 if (bh->b_page != fs_page) {
1942 nilfs_end_page_io(fs_page, err); 1884 nilfs_end_page_io(fs_page, err);
1943 if (fs_page && fs_page == failed_page) 1885 if (fs_page && fs_page == failed_page)
1944 goto done; 1886 return;
1945 fs_page = bh->b_page; 1887 fs_page = bh->b_page;
1946 } 1888 }
1947 } 1889 }
@@ -1950,8 +1892,34 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1950 end_page_writeback(bd_page); 1892 end_page_writeback(bd_page);
1951 1893
1952 nilfs_end_page_io(fs_page, err); 1894 nilfs_end_page_io(fs_page, err);
1953 done: 1895}
1896
1897static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1898 struct the_nilfs *nilfs, int err)
1899{
1900 LIST_HEAD(logs);
1901 int ret;
1902
1903 list_splice_tail_init(&sci->sc_write_logs, &logs);
1904 ret = nilfs_wait_on_logs(&logs);
1905 if (ret)
1906 nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret);
1907
1908 list_splice_tail_init(&sci->sc_segbufs, &logs);
1909 nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1910 nilfs_free_incomplete_logs(&logs, nilfs);
1954 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); 1911 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
1912
1913 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1914 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1915 sci->sc_freesegs,
1916 sci->sc_nfreesegs,
1917 NULL);
1918 WARN_ON(ret); /* do not happen */
1919 }
1920
1921 nilfs_destroy_logs(&logs);
1922 sci->sc_super_root = NULL;
1955} 1923}
1956 1924
1957static void nilfs_set_next_segment(struct the_nilfs *nilfs, 1925static void nilfs_set_next_segment(struct the_nilfs *nilfs,
@@ -1973,7 +1941,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1973 struct the_nilfs *nilfs = sbi->s_nilfs; 1941 struct the_nilfs *nilfs = sbi->s_nilfs;
1974 int update_sr = (sci->sc_super_root != NULL); 1942 int update_sr = (sci->sc_super_root != NULL);
1975 1943
1976 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1944 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
1977 struct buffer_head *bh; 1945 struct buffer_head *bh;
1978 1946
1979 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1947 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
@@ -2046,7 +2014,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
2046 2014
2047 sci->sc_nblk_inc += sci->sc_nblk_this_inc; 2015 sci->sc_nblk_inc += sci->sc_nblk_this_inc;
2048 2016
2049 segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs); 2017 segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
2050 nilfs_set_next_segment(nilfs, segbuf); 2018 nilfs_set_next_segment(nilfs, segbuf);
2051 2019
2052 if (update_sr) { 2020 if (update_sr) {
@@ -2057,10 +2025,23 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
2057 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 2025 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
2058 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); 2026 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
2059 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 2027 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
2028 nilfs_segctor_clear_metadata_dirty(sci);
2060 } else 2029 } else
2061 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 2030 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
2062} 2031}
2063 2032
2033static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
2034{
2035 int ret;
2036
2037 ret = nilfs_wait_on_logs(&sci->sc_write_logs);
2038 if (!ret) {
2039 nilfs_segctor_complete_write(sci);
2040 nilfs_destroy_logs(&sci->sc_write_logs);
2041 }
2042 return ret;
2043}
2044
2064static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, 2045static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
2065 struct nilfs_sb_info *sbi) 2046 struct nilfs_sb_info *sbi)
2066{ 2047{
@@ -2173,7 +2154,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2173 /* Avoid empty segment */ 2154 /* Avoid empty segment */
2174 if (sci->sc_stage.scnt == NILFS_ST_DONE && 2155 if (sci->sc_stage.scnt == NILFS_ST_DONE &&
2175 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { 2156 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) {
2176 nilfs_segctor_end_construction(sci, nilfs, 1); 2157 nilfs_segctor_abort_construction(sci, nilfs, 1);
2177 goto out; 2158 goto out;
2178 } 2159 }
2179 2160
@@ -2187,7 +2168,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2187 if (has_sr) { 2168 if (has_sr) {
2188 err = nilfs_segctor_fill_in_checkpoint(sci); 2169 err = nilfs_segctor_fill_in_checkpoint(sci);
2189 if (unlikely(err)) 2170 if (unlikely(err))
2190 goto failed_to_make_up; 2171 goto failed_to_write;
2191 2172
2192 nilfs_segctor_fill_in_super_root(sci, nilfs); 2173 nilfs_segctor_fill_in_super_root(sci, nilfs);
2193 } 2174 }
@@ -2195,42 +2176,46 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2195 2176
2196 /* Write partial segments */ 2177 /* Write partial segments */
2197 err = nilfs_segctor_prepare_write(sci, &failed_page); 2178 err = nilfs_segctor_prepare_write(sci, &failed_page);
2198 if (unlikely(err)) 2179 if (err) {
2180 nilfs_abort_logs(&sci->sc_segbufs, failed_page,
2181 sci->sc_super_root, err);
2199 goto failed_to_write; 2182 goto failed_to_write;
2200 2183 }
2201 nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); 2184 nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed);
2202 2185
2203 err = nilfs_segctor_write(sci, nilfs->ns_bdi); 2186 err = nilfs_segctor_write(sci, nilfs);
2204 if (unlikely(err)) 2187 if (unlikely(err))
2205 goto failed_to_write; 2188 goto failed_to_write;
2206 2189
2207 nilfs_segctor_complete_write(sci); 2190 if (sci->sc_stage.scnt == NILFS_ST_DONE ||
2208 2191 nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
2209 /* Commit segments */ 2192 /*
2210 if (has_sr) 2193 * At this point, we avoid double buffering
2211 nilfs_segctor_clear_metadata_dirty(sci); 2194 * for blocksize < pagesize because page dirty
2212 2195 * flag is turned off during write and dirty
2213 nilfs_segctor_end_construction(sci, nilfs, 0); 2196 * buffers are not properly collected for
2214 2197 * pages crossing over segments.
2198 */
2199 err = nilfs_segctor_wait(sci);
2200 if (err)
2201 goto failed_to_write;
2202 }
2215 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2203 } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2216 2204
2205 sci->sc_super_root = NULL;
2206
2217 out: 2207 out:
2218 nilfs_segctor_destroy_segment_buffers(sci);
2219 nilfs_segctor_check_out_files(sci, sbi); 2208 nilfs_segctor_check_out_files(sci, sbi);
2220 return err; 2209 return err;
2221 2210
2222 failed_to_write: 2211 failed_to_write:
2223 nilfs_segctor_abort_write(sci, failed_page, err);
2224 nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile);
2225
2226 failed_to_make_up:
2227 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2212 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2228 nilfs_redirty_inodes(&sci->sc_dirty_files); 2213 nilfs_redirty_inodes(&sci->sc_dirty_files);
2229 2214
2230 failed: 2215 failed:
2231 if (nilfs_doing_gc()) 2216 if (nilfs_doing_gc())
2232 nilfs_redirty_inodes(&sci->sc_gc_inodes); 2217 nilfs_redirty_inodes(&sci->sc_gc_inodes);
2233 nilfs_segctor_end_construction(sci, nilfs, err); 2218 nilfs_segctor_abort_construction(sci, nilfs, err);
2234 goto out; 2219 goto out;
2235} 2220}
2236 2221
@@ -2559,7 +2544,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2559 2544
2560 sci->sc_freesegs = kbufs[4]; 2545 sci->sc_freesegs = kbufs[4];
2561 sci->sc_nfreesegs = argv[4].v_nmembs; 2546 sci->sc_nfreesegs = argv[4].v_nmembs;
2562 list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); 2547 list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes);
2563 2548
2564 for (;;) { 2549 for (;;) {
2565 nilfs_segctor_accept(sci, &req); 2550 nilfs_segctor_accept(sci, &req);
@@ -2788,6 +2773,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi)
2788 spin_lock_init(&sci->sc_state_lock); 2773 spin_lock_init(&sci->sc_state_lock);
2789 INIT_LIST_HEAD(&sci->sc_dirty_files); 2774 INIT_LIST_HEAD(&sci->sc_dirty_files);
2790 INIT_LIST_HEAD(&sci->sc_segbufs); 2775 INIT_LIST_HEAD(&sci->sc_segbufs);
2776 INIT_LIST_HEAD(&sci->sc_write_logs);
2791 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2777 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2792 INIT_LIST_HEAD(&sci->sc_copied_buffers); 2778 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2793 2779
@@ -2855,6 +2841,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2855 } 2841 }
2856 2842
2857 WARN_ON(!list_empty(&sci->sc_segbufs)); 2843 WARN_ON(!list_empty(&sci->sc_segbufs));
2844 WARN_ON(!list_empty(&sci->sc_write_logs));
2858 2845
2859 down_write(&sbi->s_nilfs->ns_segctor_sem); 2846 down_write(&sbi->s_nilfs->ns_segctor_sem);
2860 2847
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 0d2a475a741..3d3ab2f9864 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -97,6 +97,7 @@ struct nilfs_segsum_pointer {
97 * @sc_dsync_start: start byte offset of data pages 97 * @sc_dsync_start: start byte offset of data pages
98 * @sc_dsync_end: end byte offset of data pages (inclusive) 98 * @sc_dsync_end: end byte offset of data pages (inclusive)
99 * @sc_segbufs: List of segment buffers 99 * @sc_segbufs: List of segment buffers
100 * @sc_write_logs: List of segment buffers to hold logs under writing
100 * @sc_segbuf_nblocks: Number of available blocks in segment buffers. 101 * @sc_segbuf_nblocks: Number of available blocks in segment buffers.
101 * @sc_curseg: Current segment buffer 102 * @sc_curseg: Current segment buffer
102 * @sc_super_root: Pointer to the super root buffer 103 * @sc_super_root: Pointer to the super root buffer
@@ -143,6 +144,7 @@ struct nilfs_sc_info {
143 144
144 /* Segment buffers */ 145 /* Segment buffers */
145 struct list_head sc_segbufs; 146 struct list_head sc_segbufs;
147 struct list_head sc_write_logs;
146 unsigned long sc_segbuf_nblocks; 148 unsigned long sc_segbuf_nblocks;
147 struct nilfs_segment_buffer *sc_curseg; 149 struct nilfs_segment_buffer *sc_curseg;
148 struct buffer_head *sc_super_root; 150 struct buffer_head *sc_super_root;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 37994d4a59c..b6c36d0cc33 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -31,6 +31,16 @@
31#include "sufile.h" 31#include "sufile.h"
32 32
33 33
34struct nilfs_sufile_info {
35 struct nilfs_mdt_info mi;
36 unsigned long ncleansegs;
37};
38
39static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
40{
41 return (struct nilfs_sufile_info *)NILFS_MDT(sufile);
42}
43
34static inline unsigned long 44static inline unsigned long
35nilfs_sufile_segment_usages_per_block(const struct inode *sufile) 45nilfs_sufile_segment_usages_per_block(const struct inode *sufile)
36{ 46{
@@ -62,14 +72,6 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr,
62 max - curr + 1); 72 max - curr + 1);
63} 73}
64 74
65static inline struct nilfs_sufile_header *
66nilfs_sufile_block_get_header(const struct inode *sufile,
67 struct buffer_head *bh,
68 void *kaddr)
69{
70 return kaddr + bh_offset(bh);
71}
72
73static struct nilfs_segment_usage * 75static struct nilfs_segment_usage *
74nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, 76nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum,
75 struct buffer_head *bh, void *kaddr) 77 struct buffer_head *bh, void *kaddr)
@@ -110,6 +112,15 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
110} 112}
111 113
112/** 114/**
115 * nilfs_sufile_get_ncleansegs - return the number of clean segments
116 * @sufile: inode of segment usage file
117 */
118unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile)
119{
120 return NILFS_SUI(sufile)->ncleansegs;
121}
122
123/**
113 * nilfs_sufile_updatev - modify multiple segment usages at a time 124 * nilfs_sufile_updatev - modify multiple segment usages at a time
114 * @sufile: inode of segment usage file 125 * @sufile: inode of segment usage file
115 * @segnumv: array of segment numbers 126 * @segnumv: array of segment numbers
@@ -270,7 +281,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
270 if (ret < 0) 281 if (ret < 0)
271 goto out_sem; 282 goto out_sem;
272 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 283 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
273 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); 284 header = kaddr + bh_offset(header_bh);
274 ncleansegs = le64_to_cpu(header->sh_ncleansegs); 285 ncleansegs = le64_to_cpu(header->sh_ncleansegs);
275 last_alloc = le64_to_cpu(header->sh_last_alloc); 286 last_alloc = le64_to_cpu(header->sh_last_alloc);
276 kunmap_atomic(kaddr, KM_USER0); 287 kunmap_atomic(kaddr, KM_USER0);
@@ -302,13 +313,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
302 kunmap_atomic(kaddr, KM_USER0); 313 kunmap_atomic(kaddr, KM_USER0);
303 314
304 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 315 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
305 header = nilfs_sufile_block_get_header( 316 header = kaddr + bh_offset(header_bh);
306 sufile, header_bh, kaddr);
307 le64_add_cpu(&header->sh_ncleansegs, -1); 317 le64_add_cpu(&header->sh_ncleansegs, -1);
308 le64_add_cpu(&header->sh_ndirtysegs, 1); 318 le64_add_cpu(&header->sh_ndirtysegs, 1);
309 header->sh_last_alloc = cpu_to_le64(segnum); 319 header->sh_last_alloc = cpu_to_le64(segnum);
310 kunmap_atomic(kaddr, KM_USER0); 320 kunmap_atomic(kaddr, KM_USER0);
311 321
322 NILFS_SUI(sufile)->ncleansegs--;
312 nilfs_mdt_mark_buffer_dirty(header_bh); 323 nilfs_mdt_mark_buffer_dirty(header_bh);
313 nilfs_mdt_mark_buffer_dirty(su_bh); 324 nilfs_mdt_mark_buffer_dirty(su_bh);
314 nilfs_mdt_mark_dirty(sufile); 325 nilfs_mdt_mark_dirty(sufile);
@@ -351,6 +362,8 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
351 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr, KM_USER0);
352 363
353 nilfs_sufile_mod_counter(header_bh, -1, 1); 364 nilfs_sufile_mod_counter(header_bh, -1, 1);
365 NILFS_SUI(sufile)->ncleansegs--;
366
354 nilfs_mdt_mark_buffer_dirty(su_bh); 367 nilfs_mdt_mark_buffer_dirty(su_bh);
355 nilfs_mdt_mark_dirty(sufile); 368 nilfs_mdt_mark_dirty(sufile);
356} 369}
@@ -380,6 +393,8 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
380 kunmap_atomic(kaddr, KM_USER0); 393 kunmap_atomic(kaddr, KM_USER0);
381 394
382 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 395 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
396 NILFS_SUI(sufile)->ncleansegs -= clean;
397
383 nilfs_mdt_mark_buffer_dirty(su_bh); 398 nilfs_mdt_mark_buffer_dirty(su_bh);
384 nilfs_mdt_mark_dirty(sufile); 399 nilfs_mdt_mark_dirty(sufile);
385} 400}
@@ -409,79 +424,65 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
409 nilfs_mdt_mark_buffer_dirty(su_bh); 424 nilfs_mdt_mark_buffer_dirty(su_bh);
410 425
411 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 426 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
427 NILFS_SUI(sufile)->ncleansegs++;
428
412 nilfs_mdt_mark_dirty(sufile); 429 nilfs_mdt_mark_dirty(sufile);
413} 430}
414 431
415/** 432/**
416 * nilfs_sufile_get_segment_usage - get a segment usage 433 * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty
417 * @sufile: inode of segment usage file 434 * @sufile: inode of segment usage file
418 * @segnum: segment number 435 * @segnum: segment number
419 * @sup: pointer to segment usage
420 * @bhp: pointer to buffer head
421 *
422 * Description: nilfs_sufile_get_segment_usage() acquires the segment usage
423 * specified by @segnum.
424 *
425 * Return Value: On success, 0 is returned, and the segment usage and the
426 * buffer head of the buffer on which the segment usage is located are stored
427 * in the place pointed by @sup and @bhp, respectively. On error, one of the
428 * following negative error codes is returned.
429 *
430 * %-EIO - I/O error.
431 *
432 * %-ENOMEM - Insufficient amount of memory available.
433 *
434 * %-EINVAL - Invalid segment usage number.
435 */ 436 */
436int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, 437int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
437 struct nilfs_segment_usage **sup,
438 struct buffer_head **bhp)
439{ 438{
440 struct buffer_head *bh; 439 struct buffer_head *bh;
441 struct nilfs_segment_usage *su;
442 void *kaddr;
443 int ret; 440 int ret;
444 441
445 /* segnum is 0 origin */ 442 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
446 if (segnum >= nilfs_sufile_get_nsegments(sufile)) 443 if (!ret) {
447 return -EINVAL; 444 nilfs_mdt_mark_buffer_dirty(bh);
448 down_write(&NILFS_MDT(sufile)->mi_sem); 445 nilfs_mdt_mark_dirty(sufile);
449 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh);
450 if (ret < 0)
451 goto out_sem;
452 kaddr = kmap(bh->b_page);
453 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
454 if (nilfs_segment_usage_error(su)) {
455 kunmap(bh->b_page);
456 brelse(bh); 446 brelse(bh);
457 ret = -EINVAL;
458 goto out_sem;
459 } 447 }
460
461 if (sup != NULL)
462 *sup = su;
463 *bhp = bh;
464
465 out_sem:
466 up_write(&NILFS_MDT(sufile)->mi_sem);
467 return ret; 448 return ret;
468} 449}
469 450
470/** 451/**
471 * nilfs_sufile_put_segment_usage - put a segment usage 452 * nilfs_sufile_set_segment_usage - set usage of a segment
472 * @sufile: inode of segment usage file 453 * @sufile: inode of segment usage file
473 * @segnum: segment number 454 * @segnum: segment number
474 * @bh: buffer head 455 * @nblocks: number of live blocks in the segment
475 * 456 * @modtime: modification time (option)
476 * Description: nilfs_sufile_put_segment_usage() releases the segment usage
477 * specified by @segnum. @bh must be the buffer head which have been returned
478 * by a previous call to nilfs_sufile_get_segment_usage() with @segnum.
479 */ 457 */
480void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum, 458int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
481 struct buffer_head *bh) 459 unsigned long nblocks, time_t modtime)
482{ 460{
483 kunmap(bh->b_page); 461 struct buffer_head *bh;
462 struct nilfs_segment_usage *su;
463 void *kaddr;
464 int ret;
465
466 down_write(&NILFS_MDT(sufile)->mi_sem);
467 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
468 if (ret < 0)
469 goto out_sem;
470
471 kaddr = kmap_atomic(bh->b_page, KM_USER0);
472 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
473 WARN_ON(nilfs_segment_usage_error(su));
474 if (modtime)
475 su->su_lastmod = cpu_to_le64(modtime);
476 su->su_nblocks = cpu_to_le32(nblocks);
477 kunmap_atomic(kaddr, KM_USER0);
478
479 nilfs_mdt_mark_buffer_dirty(bh);
480 nilfs_mdt_mark_dirty(sufile);
484 brelse(bh); 481 brelse(bh);
482
483 out_sem:
484 up_write(&NILFS_MDT(sufile)->mi_sem);
485 return ret;
485} 486}
486 487
487/** 488/**
@@ -515,7 +516,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
515 goto out_sem; 516 goto out_sem;
516 517
517 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 518 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
518 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); 519 header = kaddr + bh_offset(header_bh);
519 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); 520 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
520 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); 521 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
521 sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); 522 sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs);
@@ -532,33 +533,6 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
532 return ret; 533 return ret;
533} 534}
534 535
535/**
536 * nilfs_sufile_get_ncleansegs - get the number of clean segments
537 * @sufile: inode of segment usage file
538 * @nsegsp: pointer to the number of clean segments
539 *
540 * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean
541 * segments.
542 *
543 * Return Value: On success, 0 is returned and the number of clean segments is
544 * stored in the place pointed by @nsegsp. On error, one of the following
545 * negative error codes is returned.
546 *
547 * %-EIO - I/O error.
548 *
549 * %-ENOMEM - Insufficient amount of memory available.
550 */
551int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp)
552{
553 struct nilfs_sustat sustat;
554 int ret;
555
556 ret = nilfs_sufile_get_stat(sufile, &sustat);
557 if (ret == 0)
558 *nsegsp = sustat.ss_ncleansegs;
559 return ret;
560}
561
562void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, 536void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
563 struct buffer_head *header_bh, 537 struct buffer_head *header_bh,
564 struct buffer_head *su_bh) 538 struct buffer_head *su_bh)
@@ -577,8 +551,10 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
577 nilfs_segment_usage_set_error(su); 551 nilfs_segment_usage_set_error(su);
578 kunmap_atomic(kaddr, KM_USER0); 552 kunmap_atomic(kaddr, KM_USER0);
579 553
580 if (suclean) 554 if (suclean) {
581 nilfs_sufile_mod_counter(header_bh, -1, 0); 555 nilfs_sufile_mod_counter(header_bh, -1, 0);
556 NILFS_SUI(sufile)->ncleansegs--;
557 }
582 nilfs_mdt_mark_buffer_dirty(su_bh); 558 nilfs_mdt_mark_buffer_dirty(su_bh);
583 nilfs_mdt_mark_dirty(sufile); 559 nilfs_mdt_mark_dirty(sufile);
584} 560}
@@ -657,3 +633,48 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
657 up_read(&NILFS_MDT(sufile)->mi_sem); 633 up_read(&NILFS_MDT(sufile)->mi_sem);
658 return ret; 634 return ret;
659} 635}
636
637/**
638 * nilfs_sufile_read - read sufile inode
639 * @sufile: sufile inode
640 * @raw_inode: on-disk sufile inode
641 */
642int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode)
643{
644 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
645 struct buffer_head *header_bh;
646 struct nilfs_sufile_header *header;
647 void *kaddr;
648 int ret;
649
650 ret = nilfs_read_inode_common(sufile, raw_inode);
651 if (ret < 0)
652 return ret;
653
654 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
655 if (!ret) {
656 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
657 header = kaddr + bh_offset(header_bh);
658 sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
659 kunmap_atomic(kaddr, KM_USER0);
660 brelse(header_bh);
661 }
662 return ret;
663}
664
665/**
666 * nilfs_sufile_new - create sufile
667 * @nilfs: nilfs object
668 * @susize: size of a segment usage entry
669 */
670struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize)
671{
672 struct inode *sufile;
673
674 sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO,
675 sizeof(struct nilfs_sufile_info));
676 if (sufile)
677 nilfs_mdt_set_entry_size(sufile, susize,
678 sizeof(struct nilfs_sufile_header));
679 return sufile;
680}
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index 0e99e5c0bd0..15163b8aff7 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -34,14 +34,13 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
34 return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; 34 return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments;
35} 35}
36 36
37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
38
37int nilfs_sufile_alloc(struct inode *, __u64 *); 39int nilfs_sufile_alloc(struct inode *, __u64 *);
38int nilfs_sufile_get_segment_usage(struct inode *, __u64, 40int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
39 struct nilfs_segment_usage **, 41int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
40 struct buffer_head **); 42 unsigned long nblocks, time_t modtime);
41void nilfs_sufile_put_segment_usage(struct inode *, __u64,
42 struct buffer_head *);
43int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); 43int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
44int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
45ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, 44ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned,
46 size_t); 45 size_t);
47 46
@@ -62,6 +61,9 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
62void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, 61void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
63 struct buffer_head *); 62 struct buffer_head *);
64 63
64int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode);
65struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize);
66
65/** 67/**
66 * nilfs_sufile_scrap - make a segment garbage 68 * nilfs_sufile_scrap - make a segment garbage
67 * @sufile: inode of segment usage file 69 * @sufile: inode of segment usage file
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 644e66727dd..8173faee31e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -363,14 +363,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
363 list_add(&sbi->s_list, &nilfs->ns_supers); 363 list_add(&sbi->s_list, &nilfs->ns_supers);
364 up_write(&nilfs->ns_super_sem); 364 up_write(&nilfs->ns_super_sem);
365 365
366 sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO); 366 sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size);
367 if (!sbi->s_ifile) 367 if (!sbi->s_ifile)
368 return -ENOMEM; 368 return -ENOMEM;
369 369
370 err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size);
371 if (unlikely(err))
372 goto failed;
373
374 down_read(&nilfs->ns_segctor_sem); 370 down_read(&nilfs->ns_segctor_sem);
375 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, 371 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
376 &bh_cp); 372 &bh_cp);
@@ -411,7 +407,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
411{ 407{
412 struct the_nilfs *nilfs = sbi->s_nilfs; 408 struct the_nilfs *nilfs = sbi->s_nilfs;
413 409
414 nilfs_mdt_clear(sbi->s_ifile);
415 nilfs_mdt_destroy(sbi->s_ifile); 410 nilfs_mdt_destroy(sbi->s_ifile);
416 sbi->s_ifile = NULL; 411 sbi->s_ifile = NULL;
417 down_write(&nilfs->ns_super_sem); 412 down_write(&nilfs->ns_super_sem);
@@ -419,22 +414,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
419 up_write(&nilfs->ns_super_sem); 414 up_write(&nilfs->ns_super_sem);
420} 415}
421 416
422static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi)
423{
424 struct the_nilfs *nilfs = sbi->s_nilfs;
425 int err = 0;
426
427 down_write(&nilfs->ns_sem);
428 if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) {
429 nilfs->ns_mount_state |= NILFS_VALID_FS;
430 err = nilfs_commit_super(sbi, 1);
431 if (likely(!err))
432 printk(KERN_INFO "NILFS: recovery complete.\n");
433 }
434 up_write(&nilfs->ns_sem);
435 return err;
436}
437
438static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) 417static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
439{ 418{
440 struct super_block *sb = dentry->d_sb; 419 struct super_block *sb = dentry->d_sb;
@@ -490,7 +469,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
490 struct nilfs_sb_info *sbi = NILFS_SB(sb); 469 struct nilfs_sb_info *sbi = NILFS_SB(sb);
491 470
492 if (!nilfs_test_opt(sbi, BARRIER)) 471 if (!nilfs_test_opt(sbi, BARRIER))
493 seq_printf(seq, ",barrier=off"); 472 seq_printf(seq, ",nobarrier");
494 if (nilfs_test_opt(sbi, SNAPSHOT)) 473 if (nilfs_test_opt(sbi, SNAPSHOT))
495 seq_printf(seq, ",cp=%llu", 474 seq_printf(seq, ",cp=%llu",
496 (unsigned long long int)sbi->s_snapshot_cno); 475 (unsigned long long int)sbi->s_snapshot_cno);
@@ -500,6 +479,8 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
500 seq_printf(seq, ",errors=panic"); 479 seq_printf(seq, ",errors=panic");
501 if (nilfs_test_opt(sbi, STRICT_ORDER)) 480 if (nilfs_test_opt(sbi, STRICT_ORDER))
502 seq_printf(seq, ",order=strict"); 481 seq_printf(seq, ",order=strict");
482 if (nilfs_test_opt(sbi, NORECOVERY))
483 seq_printf(seq, ",norecovery");
503 484
504 return 0; 485 return 0;
505} 486}
@@ -568,7 +549,7 @@ static const struct export_operations nilfs_export_ops = {
568 549
569enum { 550enum {
570 Opt_err_cont, Opt_err_panic, Opt_err_ro, 551 Opt_err_cont, Opt_err_panic, Opt_err_ro,
571 Opt_barrier, Opt_snapshot, Opt_order, 552 Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
572 Opt_err, 553 Opt_err,
573}; 554};
574 555
@@ -576,25 +557,13 @@ static match_table_t tokens = {
576 {Opt_err_cont, "errors=continue"}, 557 {Opt_err_cont, "errors=continue"},
577 {Opt_err_panic, "errors=panic"}, 558 {Opt_err_panic, "errors=panic"},
578 {Opt_err_ro, "errors=remount-ro"}, 559 {Opt_err_ro, "errors=remount-ro"},
579 {Opt_barrier, "barrier=%s"}, 560 {Opt_nobarrier, "nobarrier"},
580 {Opt_snapshot, "cp=%u"}, 561 {Opt_snapshot, "cp=%u"},
581 {Opt_order, "order=%s"}, 562 {Opt_order, "order=%s"},
563 {Opt_norecovery, "norecovery"},
582 {Opt_err, NULL} 564 {Opt_err, NULL}
583}; 565};
584 566
585static int match_bool(substring_t *s, int *result)
586{
587 int len = s->to - s->from;
588
589 if (strncmp(s->from, "on", len) == 0)
590 *result = 1;
591 else if (strncmp(s->from, "off", len) == 0)
592 *result = 0;
593 else
594 return 1;
595 return 0;
596}
597
598static int parse_options(char *options, struct super_block *sb) 567static int parse_options(char *options, struct super_block *sb)
599{ 568{
600 struct nilfs_sb_info *sbi = NILFS_SB(sb); 569 struct nilfs_sb_info *sbi = NILFS_SB(sb);
@@ -612,13 +581,8 @@ static int parse_options(char *options, struct super_block *sb)
612 581
613 token = match_token(p, tokens, args); 582 token = match_token(p, tokens, args);
614 switch (token) { 583 switch (token) {
615 case Opt_barrier: 584 case Opt_nobarrier:
616 if (match_bool(&args[0], &option)) 585 nilfs_clear_opt(sbi, BARRIER);
617 return 0;
618 if (option)
619 nilfs_set_opt(sbi, BARRIER);
620 else
621 nilfs_clear_opt(sbi, BARRIER);
622 break; 586 break;
623 case Opt_order: 587 case Opt_order:
624 if (strcmp(args[0].from, "relaxed") == 0) 588 if (strcmp(args[0].from, "relaxed") == 0)
@@ -647,6 +611,9 @@ static int parse_options(char *options, struct super_block *sb)
647 sbi->s_snapshot_cno = option; 611 sbi->s_snapshot_cno = option;
648 nilfs_set_opt(sbi, SNAPSHOT); 612 nilfs_set_opt(sbi, SNAPSHOT);
649 break; 613 break;
614 case Opt_norecovery:
615 nilfs_set_opt(sbi, NORECOVERY);
616 break;
650 default: 617 default:
651 printk(KERN_ERR 618 printk(KERN_ERR
652 "NILFS: Unrecognized mount option \"%s\"\n", p); 619 "NILFS: Unrecognized mount option \"%s\"\n", p);
@@ -672,9 +639,7 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi)
672 int mnt_count = le16_to_cpu(sbp->s_mnt_count); 639 int mnt_count = le16_to_cpu(sbp->s_mnt_count);
673 640
674 /* nilfs->sem must be locked by the caller. */ 641 /* nilfs->sem must be locked by the caller. */
675 if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { 642 if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
676 printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
677 } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
678 printk(KERN_WARNING 643 printk(KERN_WARNING
679 "NILFS warning: mounting fs with errors\n"); 644 "NILFS warning: mounting fs with errors\n");
680#if 0 645#if 0
@@ -782,11 +747,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
782 sb->s_root = NULL; 747 sb->s_root = NULL;
783 sb->s_time_gran = 1; 748 sb->s_time_gran = 1;
784 749
785 if (!nilfs_loaded(nilfs)) { 750 err = load_nilfs(nilfs, sbi);
786 err = load_nilfs(nilfs, sbi); 751 if (err)
787 if (err) 752 goto failed_sbi;
788 goto failed_sbi; 753
789 }
790 cno = nilfs_last_cno(nilfs); 754 cno = nilfs_last_cno(nilfs);
791 755
792 if (sb->s_flags & MS_RDONLY) { 756 if (sb->s_flags & MS_RDONLY) {
@@ -854,12 +818,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
854 up_write(&nilfs->ns_sem); 818 up_write(&nilfs->ns_sem);
855 } 819 }
856 820
857 err = nilfs_mark_recovery_complete(sbi);
858 if (unlikely(err)) {
859 printk(KERN_ERR "NILFS: recovery failed.\n");
860 goto failed_root;
861 }
862
863 down_write(&nilfs->ns_super_sem); 821 down_write(&nilfs->ns_super_sem);
864 if (!nilfs_test_opt(sbi, SNAPSHOT)) 822 if (!nilfs_test_opt(sbi, SNAPSHOT))
865 nilfs->ns_current = sbi; 823 nilfs->ns_current = sbi;
@@ -867,10 +825,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
867 825
868 return 0; 826 return 0;
869 827
870 failed_root:
871 dput(sb->s_root);
872 sb->s_root = NULL;
873
874 failed_segctor: 828 failed_segctor:
875 nilfs_detach_segment_constructor(sbi); 829 nilfs_detach_segment_constructor(sbi);
876 830
@@ -915,6 +869,14 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
915 goto restore_opts; 869 goto restore_opts;
916 } 870 }
917 871
872 if (!nilfs_valid_fs(nilfs)) {
873 printk(KERN_WARNING "NILFS (device %s): couldn't "
874 "remount because the filesystem is in an "
875 "incomplete recovery state.\n", sb->s_id);
876 err = -EINVAL;
877 goto restore_opts;
878 }
879
918 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 880 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
919 goto out; 881 goto out;
920 if (*flags & MS_RDONLY) { 882 if (*flags & MS_RDONLY) {
@@ -1156,8 +1118,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1156 /* Abandoning the newly allocated superblock */ 1118 /* Abandoning the newly allocated superblock */
1157 mutex_unlock(&nilfs->ns_mount_mutex); 1119 mutex_unlock(&nilfs->ns_mount_mutex);
1158 put_nilfs(nilfs); 1120 put_nilfs(nilfs);
1159 up_write(&s->s_umount); 1121 deactivate_locked_super(s);
1160 deactivate_super(s);
1161 /* 1122 /*
1162 * deactivate_super() invokes close_bdev_exclusive(). 1123 * deactivate_super() invokes close_bdev_exclusive().
1163 * We must finish all post-cleaning before this call; 1124 * We must finish all post-cleaning before this call;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index ad391a8c3e7..6241e1722ef 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -146,13 +146,9 @@ void put_nilfs(struct the_nilfs *nilfs)
146 146
147 might_sleep(); 147 might_sleep();
148 if (nilfs_loaded(nilfs)) { 148 if (nilfs_loaded(nilfs)) {
149 nilfs_mdt_clear(nilfs->ns_sufile);
150 nilfs_mdt_destroy(nilfs->ns_sufile); 149 nilfs_mdt_destroy(nilfs->ns_sufile);
151 nilfs_mdt_clear(nilfs->ns_cpfile);
152 nilfs_mdt_destroy(nilfs->ns_cpfile); 150 nilfs_mdt_destroy(nilfs->ns_cpfile);
153 nilfs_mdt_clear(nilfs->ns_dat);
154 nilfs_mdt_destroy(nilfs->ns_dat); 151 nilfs_mdt_destroy(nilfs->ns_dat);
155 /* XXX: how and when to clear nilfs->ns_gc_dat? */
156 nilfs_mdt_destroy(nilfs->ns_gc_dat); 152 nilfs_mdt_destroy(nilfs->ns_gc_dat);
157 } 153 }
158 if (nilfs_init(nilfs)) { 154 if (nilfs_init(nilfs)) {
@@ -166,7 +162,6 @@ void put_nilfs(struct the_nilfs *nilfs)
166static int nilfs_load_super_root(struct the_nilfs *nilfs, 162static int nilfs_load_super_root(struct the_nilfs *nilfs,
167 struct nilfs_sb_info *sbi, sector_t sr_block) 163 struct nilfs_sb_info *sbi, sector_t sr_block)
168{ 164{
169 static struct lock_class_key dat_lock_key;
170 struct buffer_head *bh_sr; 165 struct buffer_head *bh_sr;
171 struct nilfs_super_root *raw_sr; 166 struct nilfs_super_root *raw_sr;
172 struct nilfs_super_block **sbp = nilfs->ns_sbp; 167 struct nilfs_super_block **sbp = nilfs->ns_sbp;
@@ -187,51 +182,36 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
187 inode_size = nilfs->ns_inode_size; 182 inode_size = nilfs->ns_inode_size;
188 183
189 err = -ENOMEM; 184 err = -ENOMEM;
190 nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); 185 nilfs->ns_dat = nilfs_dat_new(nilfs, dat_entry_size);
191 if (unlikely(!nilfs->ns_dat)) 186 if (unlikely(!nilfs->ns_dat))
192 goto failed; 187 goto failed;
193 188
194 nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); 189 nilfs->ns_gc_dat = nilfs_dat_new(nilfs, dat_entry_size);
195 if (unlikely(!nilfs->ns_gc_dat)) 190 if (unlikely(!nilfs->ns_gc_dat))
196 goto failed_dat; 191 goto failed_dat;
197 192
198 nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO); 193 nilfs->ns_cpfile = nilfs_cpfile_new(nilfs, checkpoint_size);
199 if (unlikely(!nilfs->ns_cpfile)) 194 if (unlikely(!nilfs->ns_cpfile))
200 goto failed_gc_dat; 195 goto failed_gc_dat;
201 196
202 nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO); 197 nilfs->ns_sufile = nilfs_sufile_new(nilfs, segment_usage_size);
203 if (unlikely(!nilfs->ns_sufile)) 198 if (unlikely(!nilfs->ns_sufile))
204 goto failed_cpfile; 199 goto failed_cpfile;
205 200
206 err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size);
207 if (unlikely(err))
208 goto failed_sufile;
209
210 err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size);
211 if (unlikely(err))
212 goto failed_sufile;
213
214 lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key);
215 lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key);
216
217 nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); 201 nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat);
218 nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size,
219 sizeof(struct nilfs_cpfile_header));
220 nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size,
221 sizeof(struct nilfs_sufile_header));
222 202
223 err = nilfs_mdt_read_inode_direct( 203 err = nilfs_dat_read(nilfs->ns_dat, (void *)bh_sr->b_data +
224 nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); 204 NILFS_SR_DAT_OFFSET(inode_size));
225 if (unlikely(err)) 205 if (unlikely(err))
226 goto failed_sufile; 206 goto failed_sufile;
227 207
228 err = nilfs_mdt_read_inode_direct( 208 err = nilfs_cpfile_read(nilfs->ns_cpfile, (void *)bh_sr->b_data +
229 nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size)); 209 NILFS_SR_CPFILE_OFFSET(inode_size));
230 if (unlikely(err)) 210 if (unlikely(err))
231 goto failed_sufile; 211 goto failed_sufile;
232 212
233 err = nilfs_mdt_read_inode_direct( 213 err = nilfs_sufile_read(nilfs->ns_sufile, (void *)bh_sr->b_data +
234 nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size)); 214 NILFS_SR_SUFILE_OFFSET(inode_size));
235 if (unlikely(err)) 215 if (unlikely(err))
236 goto failed_sufile; 216 goto failed_sufile;
237 217
@@ -281,29 +261,30 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
281 struct nilfs_recovery_info ri; 261 struct nilfs_recovery_info ri;
282 unsigned int s_flags = sbi->s_super->s_flags; 262 unsigned int s_flags = sbi->s_super->s_flags;
283 int really_read_only = bdev_read_only(nilfs->ns_bdev); 263 int really_read_only = bdev_read_only(nilfs->ns_bdev);
284 unsigned valid_fs; 264 int valid_fs = nilfs_valid_fs(nilfs);
285 int err = 0; 265 int err;
286
287 nilfs_init_recovery_info(&ri);
288 266
289 down_write(&nilfs->ns_sem); 267 if (nilfs_loaded(nilfs)) {
290 valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); 268 if (valid_fs ||
291 up_write(&nilfs->ns_sem); 269 ((s_flags & MS_RDONLY) && nilfs_test_opt(sbi, NORECOVERY)))
270 return 0;
271 printk(KERN_ERR "NILFS: the filesystem is in an incomplete "
272 "recovery state.\n");
273 return -EINVAL;
274 }
292 275
293 if (!valid_fs && (s_flags & MS_RDONLY)) { 276 if (!valid_fs) {
294 printk(KERN_INFO "NILFS: INFO: recovery " 277 printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
295 "required for readonly filesystem.\n"); 278 if (s_flags & MS_RDONLY) {
296 if (really_read_only) { 279 printk(KERN_INFO "NILFS: INFO: recovery "
297 printk(KERN_ERR "NILFS: write access " 280 "required for readonly filesystem.\n");
298 "unavailable, cannot proceed.\n"); 281 printk(KERN_INFO "NILFS: write access will "
299 err = -EROFS; 282 "be enabled during recovery.\n");
300 goto failed;
301 } 283 }
302 printk(KERN_INFO "NILFS: write access will "
303 "be enabled during recovery.\n");
304 sbi->s_super->s_flags &= ~MS_RDONLY;
305 } 284 }
306 285
286 nilfs_init_recovery_info(&ri);
287
307 err = nilfs_search_super_root(nilfs, sbi, &ri); 288 err = nilfs_search_super_root(nilfs, sbi, &ri);
308 if (unlikely(err)) { 289 if (unlikely(err)) {
309 printk(KERN_ERR "NILFS: error searching super root.\n"); 290 printk(KERN_ERR "NILFS: error searching super root.\n");
@@ -316,19 +297,56 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
316 goto failed; 297 goto failed;
317 } 298 }
318 299
319 if (!valid_fs) { 300 if (valid_fs)
320 err = nilfs_recover_logical_segments(nilfs, sbi, &ri); 301 goto skip_recovery;
321 if (unlikely(err)) { 302
322 nilfs_mdt_destroy(nilfs->ns_cpfile); 303 if (s_flags & MS_RDONLY) {
323 nilfs_mdt_destroy(nilfs->ns_sufile); 304 if (nilfs_test_opt(sbi, NORECOVERY)) {
324 nilfs_mdt_destroy(nilfs->ns_dat); 305 printk(KERN_INFO "NILFS: norecovery option specified. "
325 goto failed; 306 "skipping roll-forward recovery\n");
307 goto skip_recovery;
326 } 308 }
327 if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) 309 if (really_read_only) {
328 sbi->s_super->s_dirt = 1; 310 printk(KERN_ERR "NILFS: write access "
311 "unavailable, cannot proceed.\n");
312 err = -EROFS;
313 goto failed_unload;
314 }
315 sbi->s_super->s_flags &= ~MS_RDONLY;
316 } else if (nilfs_test_opt(sbi, NORECOVERY)) {
317 printk(KERN_ERR "NILFS: recovery cancelled because norecovery "
318 "option was specified for a read/write mount\n");
319 err = -EINVAL;
320 goto failed_unload;
329 } 321 }
330 322
323 err = nilfs_recover_logical_segments(nilfs, sbi, &ri);
324 if (err)
325 goto failed_unload;
326
327 down_write(&nilfs->ns_sem);
328 nilfs->ns_mount_state |= NILFS_VALID_FS;
329 nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
330 err = nilfs_commit_super(sbi, 1);
331 up_write(&nilfs->ns_sem);
332
333 if (err) {
334 printk(KERN_ERR "NILFS: failed to update super block. "
335 "recovery unfinished.\n");
336 goto failed_unload;
337 }
338 printk(KERN_INFO "NILFS: recovery complete.\n");
339
340 skip_recovery:
331 set_nilfs_loaded(nilfs); 341 set_nilfs_loaded(nilfs);
342 nilfs_clear_recovery_info(&ri);
343 sbi->s_super->s_flags = s_flags;
344 return 0;
345
346 failed_unload:
347 nilfs_mdt_destroy(nilfs->ns_cpfile);
348 nilfs_mdt_destroy(nilfs->ns_sufile);
349 nilfs_mdt_destroy(nilfs->ns_dat);
332 350
333 failed: 351 failed:
334 nilfs_clear_recovery_info(&ri); 352 nilfs_clear_recovery_info(&ri);
@@ -632,30 +650,23 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
632{ 650{
633 struct inode *dat = nilfs_dat_inode(nilfs); 651 struct inode *dat = nilfs_dat_inode(nilfs);
634 unsigned long ncleansegs; 652 unsigned long ncleansegs;
635 int err;
636 653
637 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 654 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
638 err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs); 655 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
639 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 656 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
640 if (likely(!err)) 657 *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
641 *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; 658 return 0;
642 return err;
643} 659}
644 660
645int nilfs_near_disk_full(struct the_nilfs *nilfs) 661int nilfs_near_disk_full(struct the_nilfs *nilfs)
646{ 662{
647 struct inode *sufile = nilfs->ns_sufile;
648 unsigned long ncleansegs, nincsegs; 663 unsigned long ncleansegs, nincsegs;
649 int ret;
650 664
651 ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs); 665 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
652 if (likely(!ret)) { 666 nincsegs = atomic_read(&nilfs->ns_ndirtyblks) /
653 nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / 667 nilfs->ns_blocks_per_segment + 1;
654 nilfs->ns_blocks_per_segment + 1; 668
655 if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs) 669 return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs;
656 ret++;
657 }
658 return ret;
659} 670}
660 671
661/** 672/**
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 20abd55881e..589786e3346 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -258,6 +258,16 @@ static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
258 kfree(sbi); 258 kfree(sbi);
259} 259}
260 260
261static inline int nilfs_valid_fs(struct the_nilfs *nilfs)
262{
263 unsigned valid_fs;
264
265 down_read(&nilfs->ns_sem);
266 valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS);
267 up_read(&nilfs->ns_sem);
268 return valid_fs;
269}
270
261static inline void 271static inline void
262nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, 272nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
263 sector_t *seg_start, sector_t *seg_end) 273 sector_t *seg_start, sector_t *seg_end)
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index c9ee67b442e..1afb0a10229 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -121,7 +121,7 @@ static int idr_callback(int id, void *p, void *data)
121 if (warned) 121 if (warned)
122 return 0; 122 return 0;
123 123
124 warned = false; 124 warned = true;
125 entry = p; 125 entry = p;
126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
127 127
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index dcd2040d330..a94e8bd8eb1 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -69,36 +69,30 @@ static int zero;
69 69
70ctl_table inotify_table[] = { 70ctl_table inotify_table[] = {
71 { 71 {
72 .ctl_name = INOTIFY_MAX_USER_INSTANCES,
73 .procname = "max_user_instances", 72 .procname = "max_user_instances",
74 .data = &inotify_max_user_instances, 73 .data = &inotify_max_user_instances,
75 .maxlen = sizeof(int), 74 .maxlen = sizeof(int),
76 .mode = 0644, 75 .mode = 0644,
77 .proc_handler = &proc_dointvec_minmax, 76 .proc_handler = proc_dointvec_minmax,
78 .strategy = &sysctl_intvec,
79 .extra1 = &zero, 77 .extra1 = &zero,
80 }, 78 },
81 { 79 {
82 .ctl_name = INOTIFY_MAX_USER_WATCHES,
83 .procname = "max_user_watches", 80 .procname = "max_user_watches",
84 .data = &inotify_max_user_watches, 81 .data = &inotify_max_user_watches,
85 .maxlen = sizeof(int), 82 .maxlen = sizeof(int),
86 .mode = 0644, 83 .mode = 0644,
87 .proc_handler = &proc_dointvec_minmax, 84 .proc_handler = proc_dointvec_minmax,
88 .strategy = &sysctl_intvec,
89 .extra1 = &zero, 85 .extra1 = &zero,
90 }, 86 },
91 { 87 {
92 .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
93 .procname = "max_queued_events", 88 .procname = "max_queued_events",
94 .data = &inotify_max_queued_events, 89 .data = &inotify_max_queued_events,
95 .maxlen = sizeof(int), 90 .maxlen = sizeof(int),
96 .mode = 0644, 91 .mode = 0644,
97 .proc_handler = &proc_dointvec_minmax, 92 .proc_handler = proc_dointvec_minmax,
98 .strategy = &sysctl_intvec,
99 .extra1 = &zero 93 .extra1 = &zero
100 }, 94 },
101 { .ctl_name = 0 } 95 { }
102}; 96};
103#endif /* CONFIG_SYSCTL */ 97#endif /* CONFIG_SYSCTL */
104 98
@@ -558,7 +552,7 @@ retry:
558 552
559 spin_lock(&group->inotify_data.idr_lock); 553 spin_lock(&group->inotify_data.idr_lock);
560 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
561 group->inotify_data.last_wd, 555 group->inotify_data.last_wd+1,
562 &tmp_ientry->wd); 556 &tmp_ientry->wd);
563 spin_unlock(&group->inotify_data.idr_lock); 557 spin_unlock(&group->inotify_data.idr_lock);
564 if (ret) { 558 if (ret) {
@@ -638,7 +632,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
638 632
639 spin_lock_init(&group->inotify_data.idr_lock); 633 spin_lock_init(&group->inotify_data.idr_lock);
640 idr_init(&group->inotify_data.idr); 634 idr_init(&group->inotify_data.idr);
641 group->inotify_data.last_wd = 1; 635 group->inotify_data.last_wd = 0;
642 group->inotify_data.user = user; 636 group->inotify_data.user = user;
643 group->inotify_data.fa = NULL; 637 group->inotify_data.fa = NULL;
644 638
@@ -652,6 +646,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
652 struct fsnotify_group *group; 646 struct fsnotify_group *group;
653 struct user_struct *user; 647 struct user_struct *user;
654 struct file *filp; 648 struct file *filp;
649 struct path path;
655 int fd, ret; 650 int fd, ret;
656 651
657 /* Check the IN_* constants for consistency. */ 652 /* Check the IN_* constants for consistency. */
@@ -665,12 +660,6 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
665 if (fd < 0) 660 if (fd < 0)
666 return fd; 661 return fd;
667 662
668 filp = get_empty_filp();
669 if (!filp) {
670 ret = -ENFILE;
671 goto out_put_fd;
672 }
673
674 user = get_current_user(); 663 user = get_current_user();
675 if (unlikely(atomic_read(&user->inotify_devs) >= 664 if (unlikely(atomic_read(&user->inotify_devs) >=
676 inotify_max_user_instances)) { 665 inotify_max_user_instances)) {
@@ -685,24 +674,28 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
685 goto out_free_uid; 674 goto out_free_uid;
686 } 675 }
687 676
688 filp->f_op = &inotify_fops; 677 atomic_inc(&user->inotify_devs);
689 filp->f_path.mnt = mntget(inotify_mnt); 678
690 filp->f_path.dentry = dget(inotify_mnt->mnt_root); 679 path.mnt = inotify_mnt;
691 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; 680 path.dentry = inotify_mnt->mnt_root;
692 filp->f_mode = FMODE_READ; 681 path_get(&path);
682 filp = alloc_file(&path, FMODE_READ, &inotify_fops);
683 if (!filp)
684 goto Enfile;
685
693 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK); 686 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
694 filp->private_data = group; 687 filp->private_data = group;
695 688
696 atomic_inc(&user->inotify_devs);
697
698 fd_install(fd, filp); 689 fd_install(fd, filp);
699 690
700 return fd; 691 return fd;
701 692
693Enfile:
694 ret = -ENFILE;
695 path_put(&path);
696 atomic_dec(&user->inotify_devs);
702out_free_uid: 697out_free_uid:
703 free_uid(user); 698 free_uid(user);
704 put_filp(filp);
705out_put_fd:
706 put_unused_fd(fd); 699 put_unused_fd(fd);
707 return ret; 700 return ret;
708} 701}
@@ -747,10 +740,6 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
747 740
748 /* create/update an inode mark */ 741 /* create/update an inode mark */
749 ret = inotify_update_watch(group, inode, mask); 742 ret = inotify_update_watch(group, inode, mask);
750 if (unlikely(ret))
751 goto path_put_and_out;
752
753path_put_and_out:
754 path_put(&path); 743 path_put(&path);
755fput_and_out: 744fput_and_out:
756 fput_light(filp, fput_needed); 745 fput_light(filp, fput_needed);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 9669541d011..08f7530e934 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -927,7 +927,7 @@ lock_retry_remap:
927 return 0; 927 return 0;
928 928
929 ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ? 929 ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ?
930 "EOVERFLOW" : (!err ? "EIO" : "unkown error")); 930 "EOVERFLOW" : (!err ? "EIO" : "unknown error"));
931 return err < 0 ? err : -EIO; 931 return err < 0 ? err : -EIO;
932 932
933read_err: 933read_err:
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 663c0e341f8..43179ddd336 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -399,7 +399,7 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
399 * @cached_page: allocated but as yet unused page 399 * @cached_page: allocated but as yet unused page
400 * @lru_pvec: lru-buffering pagevec of caller 400 * @lru_pvec: lru-buffering pagevec of caller
401 * 401 *
402 * Obtain @nr_pages locked page cache pages from the mapping @maping and 402 * Obtain @nr_pages locked page cache pages from the mapping @mapping and
403 * starting at index @index. 403 * starting at index @index.
404 * 404 *
405 * If a page is newly created, increment its refcount and add it to the 405 * If a page is newly created, increment its refcount and add it to the
@@ -1281,7 +1281,7 @@ rl_not_mapped_enoent:
1281 1281
1282/* 1282/*
1283 * Copy as much as we can into the pages and return the number of bytes which 1283 * Copy as much as we can into the pages and return the number of bytes which
1284 * were sucessfully copied. If a fault is encountered then clear the pages 1284 * were successfully copied. If a fault is encountered then clear the pages
1285 * out to (ofs + bytes) and return the number of bytes which were copied. 1285 * out to (ofs + bytes) and return the number of bytes which were copied.
1286 */ 1286 */
1287static inline size_t ntfs_copy_from_user(struct page **pages, 1287static inline size_t ntfs_copy_from_user(struct page **pages,
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762c..dc2505abb6d 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
530 * the ntfs inode. 530 * the ntfs inode.
531 * 531 *
532 * Q: What locks are held when the function is called? 532 * Q: What locks are held when the function is called?
533 * A: i_state has I_LOCK set, hence the inode is locked, also 533 * A: i_state has I_NEW set, hence the inode is locked, also
534 * i_count is set to 1, so it is not going to go away 534 * i_count is set to 1, so it is not going to go away
535 * i_flags is set to 0 and we have no business touching it. Only an ioctl() 535 * i_flags is set to 0 and we have no business touching it. Only an ioctl()
536 * is allowed to write to them. We should of course be honouring them but 536 * is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
1207 * necessary fields in @vi as well as initializing the ntfs inode. 1207 * necessary fields in @vi as well as initializing the ntfs inode.
1208 * 1208 *
1209 * Q: What locks are held when the function is called? 1209 * Q: What locks are held when the function is called?
1210 * A: i_state has I_LOCK set, hence the inode is locked, also 1210 * A: i_state has I_NEW set, hence the inode is locked, also
1211 * i_count is set to 1, so it is not going to go away 1211 * i_count is set to 1, so it is not going to go away
1212 * 1212 *
1213 * Return 0 on success and -errno on error. In the error case, the inode will 1213 * Return 0 on success and -errno on error. In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
1474 * normal directory inodes. 1474 * normal directory inodes.
1475 * 1475 *
1476 * Q: What locks are held when the function is called? 1476 * Q: What locks are held when the function is called?
1477 * A: i_state has I_LOCK set, hence the inode is locked, also 1477 * A: i_state has I_NEW set, hence the inode is locked, also
1478 * i_count is set to 1, so it is not going to go away 1478 * i_count is set to 1, so it is not going to go away
1479 * 1479 *
1480 * Return 0 on success and -errno on error. In the error case, the inode will 1480 * Return 0 on success and -errno on error. In the error case, the inode will
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 89b02985c05..4dadcdf3d45 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -338,7 +338,7 @@ err_out:
338 * copy of the complete multi sector transfer deprotected page. On failure, 338 * copy of the complete multi sector transfer deprotected page. On failure,
339 * *@wrp is undefined. 339 * *@wrp is undefined.
340 * 340 *
341 * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current 341 * Simillarly, if @lsn is not NULL, on success *@lsn will be set to the current
342 * logfile lsn according to this restart page. On failure, *@lsn is undefined. 342 * logfile lsn according to this restart page. On failure, *@lsn is undefined.
343 * 343 *
344 * The following error codes are defined: 344 * The following error codes are defined:
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 9ef85e628fe..79a89184cb5 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -36,12 +36,11 @@
36/* Definition of the ntfs sysctl. */ 36/* Definition of the ntfs sysctl. */
37static ctl_table ntfs_sysctls[] = { 37static ctl_table ntfs_sysctls[] = {
38 { 38 {
39 .ctl_name = CTL_UNNUMBERED, /* Binary and text IDs. */
40 .procname = "ntfs-debug", 39 .procname = "ntfs-debug",
41 .data = &debug_msgs, /* Data pointer and size. */ 40 .data = &debug_msgs, /* Data pointer and size. */
42 .maxlen = sizeof(debug_msgs), 41 .maxlen = sizeof(debug_msgs),
43 .mode = 0644, /* Mode, proc handler. */ 42 .mode = 0644, /* Mode, proc handler. */
44 .proc_handler = &proc_dointvec 43 .proc_handler = proc_dointvec
45 }, 44 },
46 {} 45 {}
47}; 46};
@@ -49,7 +48,6 @@ static ctl_table ntfs_sysctls[] = {
49/* Define the parent directory /proc/sys/fs. */ 48/* Define the parent directory /proc/sys/fs. */
50static ctl_table sysctls_root[] = { 49static ctl_table sysctls_root[] = {
51 { 50 {
52 .ctl_name = CTL_FS,
53 .procname = "fs", 51 .procname = "fs",
54 .mode = 0555, 52 .mode = 0555,
55 .child = ntfs_sysctls 53 .child = ntfs_sysctls
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 701b7a3a872..0d840669698 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -6,6 +6,7 @@ config OCFS2_FS
6 select CRC32 6 select CRC32
7 select QUOTA 7 select QUOTA
8 select QUOTA_TREE 8 select QUOTA_TREE
9 select FS_POSIX_ACL
9 help 10 help
10 OCFS2 is a general purpose extent based shared disk cluster file 11 OCFS2 is a general purpose extent based shared disk cluster file
11 system with many similarities to ext3. It supports 64 bit inode 12 system with many similarities to ext3. It supports 64 bit inode
@@ -74,12 +75,3 @@ config OCFS2_DEBUG_FS
74 This option will enable expensive consistency checks. Enable 75 This option will enable expensive consistency checks. Enable
75 this option for debugging only as it is likely to decrease 76 this option for debugging only as it is likely to decrease
76 performance of the filesystem. 77 performance of the filesystem.
77
78config OCFS2_FS_POSIX_ACL
79 bool "OCFS2 POSIX Access Control Lists"
80 depends on OCFS2_FS
81 select FS_POSIX_ACL
82 default n
83 help
84 Posix Access Control Lists (ACLs) support permissions for users and
85 groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 31f25ce32c9..600d2d2ade1 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -39,11 +39,8 @@ ocfs2-objs := \
39 ver.o \ 39 ver.o \
40 quota_local.o \ 40 quota_local.o \
41 quota_global.o \ 41 quota_global.o \
42 xattr.o 42 xattr.o \
43 43 acl.o
44ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
45ocfs2-objs += acl.o
46endif
47 44
48ocfs2_stackglue-objs := stackglue.o 45ocfs2_stackglue-objs := stackglue.o
49ocfs2_stack_o2cb-objs := stack_o2cb.o 46ocfs2_stack_o2cb-objs := stack_o2cb.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index fbeaec76210..0501974bedd 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -98,15 +98,11 @@ static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
98 int type, 98 int type,
99 struct buffer_head *di_bh) 99 struct buffer_head *di_bh)
100{ 100{
101 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
102 int name_index; 101 int name_index;
103 char *value = NULL; 102 char *value = NULL;
104 struct posix_acl *acl; 103 struct posix_acl *acl;
105 int retval; 104 int retval;
106 105
107 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
108 return NULL;
109
110 switch (type) { 106 switch (type) {
111 case ACL_TYPE_ACCESS: 107 case ACL_TYPE_ACCESS:
112 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; 108 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
@@ -331,13 +327,14 @@ cleanup:
331 return ret; 327 return ret;
332} 328}
333 329
334static size_t ocfs2_xattr_list_acl_access(struct inode *inode, 330static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry,
335 char *list, 331 char *list,
336 size_t list_len, 332 size_t list_len,
337 const char *name, 333 const char *name,
338 size_t name_len) 334 size_t name_len,
335 int type)
339{ 336{
340 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 337 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
341 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 338 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
342 339
343 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 340 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -348,13 +345,14 @@ static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
348 return size; 345 return size;
349} 346}
350 347
351static size_t ocfs2_xattr_list_acl_default(struct inode *inode, 348static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry,
352 char *list, 349 char *list,
353 size_t list_len, 350 size_t list_len,
354 const char *name, 351 const char *name,
355 size_t name_len) 352 size_t name_len,
353 int type)
356{ 354{
357 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 355 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 356 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359 357
360 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 358 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -365,19 +363,19 @@ static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
365 return size; 363 return size;
366} 364}
367 365
368static int ocfs2_xattr_get_acl(struct inode *inode, 366static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
369 int type, 367 void *buffer, size_t size, int type)
370 void *buffer,
371 size_t size)
372{ 368{
373 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 369 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
374 struct posix_acl *acl; 370 struct posix_acl *acl;
375 int ret; 371 int ret;
376 372
373 if (strcmp(name, "") != 0)
374 return -EINVAL;
377 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 375 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
378 return -EOPNOTSUPP; 376 return -EOPNOTSUPP;
379 377
380 acl = ocfs2_get_acl(inode, type); 378 acl = ocfs2_get_acl(dentry->d_inode, type);
381 if (IS_ERR(acl)) 379 if (IS_ERR(acl))
382 return PTR_ERR(acl); 380 return PTR_ERR(acl);
383 if (acl == NULL) 381 if (acl == NULL)
@@ -388,35 +386,16 @@ static int ocfs2_xattr_get_acl(struct inode *inode,
388 return ret; 386 return ret;
389} 387}
390 388
391static int ocfs2_xattr_get_acl_access(struct inode *inode, 389static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
392 const char *name, 390 const void *value, size_t size, int flags, int type)
393 void *buffer,
394 size_t size)
395{
396 if (strcmp(name, "") != 0)
397 return -EINVAL;
398 return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
399}
400
401static int ocfs2_xattr_get_acl_default(struct inode *inode,
402 const char *name,
403 void *buffer,
404 size_t size)
405{
406 if (strcmp(name, "") != 0)
407 return -EINVAL;
408 return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
409}
410
411static int ocfs2_xattr_set_acl(struct inode *inode,
412 int type,
413 const void *value,
414 size_t size)
415{ 391{
392 struct inode *inode = dentry->d_inode;
416 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 393 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
417 struct posix_acl *acl; 394 struct posix_acl *acl;
418 int ret = 0; 395 int ret = 0;
419 396
397 if (strcmp(name, "") != 0)
398 return -EINVAL;
420 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 399 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
421 return -EOPNOTSUPP; 400 return -EOPNOTSUPP;
422 401
@@ -442,38 +421,18 @@ cleanup:
442 return ret; 421 return ret;
443} 422}
444 423
445static int ocfs2_xattr_set_acl_access(struct inode *inode,
446 const char *name,
447 const void *value,
448 size_t size,
449 int flags)
450{
451 if (strcmp(name, "") != 0)
452 return -EINVAL;
453 return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
454}
455
456static int ocfs2_xattr_set_acl_default(struct inode *inode,
457 const char *name,
458 const void *value,
459 size_t size,
460 int flags)
461{
462 if (strcmp(name, "") != 0)
463 return -EINVAL;
464 return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
465}
466
467struct xattr_handler ocfs2_xattr_acl_access_handler = { 424struct xattr_handler ocfs2_xattr_acl_access_handler = {
468 .prefix = POSIX_ACL_XATTR_ACCESS, 425 .prefix = POSIX_ACL_XATTR_ACCESS,
426 .flags = ACL_TYPE_ACCESS,
469 .list = ocfs2_xattr_list_acl_access, 427 .list = ocfs2_xattr_list_acl_access,
470 .get = ocfs2_xattr_get_acl_access, 428 .get = ocfs2_xattr_get_acl,
471 .set = ocfs2_xattr_set_acl_access, 429 .set = ocfs2_xattr_set_acl,
472}; 430};
473 431
474struct xattr_handler ocfs2_xattr_acl_default_handler = { 432struct xattr_handler ocfs2_xattr_acl_default_handler = {
475 .prefix = POSIX_ACL_XATTR_DEFAULT, 433 .prefix = POSIX_ACL_XATTR_DEFAULT,
434 .flags = ACL_TYPE_DEFAULT,
476 .list = ocfs2_xattr_list_acl_default, 435 .list = ocfs2_xattr_list_acl_default,
477 .get = ocfs2_xattr_get_acl_default, 436 .get = ocfs2_xattr_get_acl,
478 .set = ocfs2_xattr_set_acl_default, 437 .set = ocfs2_xattr_set_acl,
479}; 438};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 8f6389ed4da..5c5d31f0585 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,8 +26,6 @@ struct ocfs2_acl_entry {
26 __le32 e_id; 26 __le32 e_id;
27}; 27};
28 28
29#ifdef CONFIG_OCFS2_FS_POSIX_ACL
30
31extern int ocfs2_check_acl(struct inode *, int); 29extern int ocfs2_check_acl(struct inode *, int);
32extern int ocfs2_acl_chmod(struct inode *); 30extern int ocfs2_acl_chmod(struct inode *);
33extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
@@ -35,24 +33,4 @@ extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
35 struct ocfs2_alloc_context *, 33 struct ocfs2_alloc_context *,
36 struct ocfs2_alloc_context *); 34 struct ocfs2_alloc_context *);
37 35
38#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
39
40#define ocfs2_check_acl NULL
41static inline int ocfs2_acl_chmod(struct inode *inode)
42{
43 return 0;
44}
45static inline int ocfs2_init_acl(handle_t *handle,
46 struct inode *inode,
47 struct inode *dir,
48 struct buffer_head *di_bh,
49 struct buffer_head *dir_bh,
50 struct ocfs2_alloc_context *meta_ac,
51 struct ocfs2_alloc_context *data_ac)
52{
53 return 0;
54}
55
56#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
57
58#endif /* OCFS2_ACL_H */ 36#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 38a42f5d59f..d17bdc718f7 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1765,9 +1765,9 @@ set_and_inc:
1765 * 1765 *
1766 * The array index of the subtree root is passed back. 1766 * The array index of the subtree root is passed back.
1767 */ 1767 */
1768static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, 1768int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
1769 struct ocfs2_path *left, 1769 struct ocfs2_path *left,
1770 struct ocfs2_path *right) 1770 struct ocfs2_path *right)
1771{ 1771{
1772 int i = 0; 1772 int i = 0;
1773 1773
@@ -2398,7 +2398,7 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
2398 * 2398 *
2399 * The array is assumed to be large enough to hold an entire path (tree depth). 2399 * The array is assumed to be large enough to hold an entire path (tree depth).
2400 * 2400 *
2401 * Upon succesful return from this function: 2401 * Upon successful return from this function:
2402 * 2402 *
2403 * - The 'right_path' array will contain a path to the leaf block 2403 * - The 'right_path' array will contain a path to the leaf block
2404 * whose range contains e_cpos. 2404 * whose range contains e_cpos.
@@ -2872,8 +2872,8 @@ out:
2872 * This looks similar, but is subtly different to 2872 * This looks similar, but is subtly different to
2873 * ocfs2_find_cpos_for_left_leaf(). 2873 * ocfs2_find_cpos_for_left_leaf().
2874 */ 2874 */
2875static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, 2875int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
2876 struct ocfs2_path *path, u32 *cpos) 2876 struct ocfs2_path *path, u32 *cpos)
2877{ 2877{
2878 int i, j, ret = 0; 2878 int i, j, ret = 0;
2879 u64 blkno; 2879 u64 blkno;
@@ -7190,8 +7190,8 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
7190 * wait on them - the truncate_inode_pages() call later will 7190 * wait on them - the truncate_inode_pages() call later will
7191 * do that for us. 7191 * do that for us.
7192 */ 7192 */
7193 ret = do_sync_mapping_range(inode->i_mapping, range_start, 7193 ret = filemap_fdatawrite_range(inode->i_mapping, range_start,
7194 range_end - 1, SYNC_FILE_RANGE_WRITE); 7194 range_end - 1);
7195 if (ret) 7195 if (ret)
7196 mlog_errno(ret); 7196 mlog_errno(ret);
7197 7197
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 9c122d57446..1db4359ccb9 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -317,4 +317,9 @@ int ocfs2_path_bh_journal_access(handle_t *handle,
317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, 317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
318 handle_t *handle, 318 handle_t *handle,
319 struct ocfs2_path *path); 319 struct ocfs2_path *path);
320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
321 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
323 struct ocfs2_path *left,
324 struct ocfs2_path *right);
320#endif /* OCFS2_ALLOC_H */ 325#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index deb2b132ae5..3dae4a13f6e 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -547,6 +547,9 @@ bail:
547 * 547 *
548 * called like this: dio->get_blocks(dio->inode, fs_startblk, 548 * called like this: dio->get_blocks(dio->inode, fs_startblk,
549 * fs_count, map_bh, dio->rw == WRITE); 549 * fs_count, map_bh, dio->rw == WRITE);
550 *
551 * Note that we never bother to allocate blocks here, and thus ignore the
552 * create argument.
550 */ 553 */
551static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, 554static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
552 struct buffer_head *bh_result, int create) 555 struct buffer_head *bh_result, int create)
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
563 566
564 inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 567 inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
565 568
566 /*
567 * Any write past EOF is not allowed because we'd be extending.
568 */
569 if (create && (iblock + max_blocks) > inode_blocks) {
570 ret = -EIO;
571 goto bail;
572 }
573
574 /* This figures out the size of the next contiguous block, and 569 /* This figures out the size of the next contiguous block, and
575 * our logical offset */ 570 * our logical offset */
576 ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, 571 ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
@@ -582,15 +577,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
582 goto bail; 577 goto bail;
583 } 578 }
584 579
585 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
586 ocfs2_error(inode->i_sb,
587 "Inode %llu has a hole at block %llu\n",
588 (unsigned long long)OCFS2_I(inode)->ip_blkno,
589 (unsigned long long)iblock);
590 ret = -EROFS;
591 goto bail;
592 }
593
594 /* We should already CoW the refcounted extent. */ 580 /* We should already CoW the refcounted extent. */
595 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 581 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
596 /* 582 /*
@@ -601,20 +587,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
601 */ 587 */
602 if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) 588 if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
603 map_bh(bh_result, inode->i_sb, p_blkno); 589 map_bh(bh_result, inode->i_sb, p_blkno);
604 else { 590 else
605 /*
606 * ocfs2_prepare_inode_for_write() should have caught
607 * the case where we'd be filling a hole and triggered
608 * a buffered write instead.
609 */
610 if (create) {
611 ret = -EIO;
612 mlog_errno(ret);
613 goto bail;
614 }
615
616 clear_buffer_mapped(bh_result); 591 clear_buffer_mapped(bh_result);
617 }
618 592
619 /* make sure we don't map more than max_blocks blocks here as 593 /* make sure we don't map more than max_blocks blocks here as
620 that's all the kernel will handle at this point. */ 594 that's all the kernel will handle at this point. */
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index a1163b8b417..b7428c5d0d3 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -47,7 +47,7 @@
47 * Calculate the bit offset in the hamming code buffer based on the bit's 47 * Calculate the bit offset in the hamming code buffer based on the bit's
48 * offset in the data buffer. Since the hamming code reserves all 48 * offset in the data buffer. Since the hamming code reserves all
49 * power-of-two bits for parity, the data bit number and the code bit 49 * power-of-two bits for parity, the data bit number and the code bit
50 * number are offest by all the parity bits beforehand. 50 * number are offset by all the parity bits beforehand.
51 * 51 *
52 * Recall that bit numbers in hamming code are 1-based. This function 52 * Recall that bit numbers in hamming code are 1-based. This function
53 * takes the 0-based data bit from the caller. 53 * takes the 0-based data bit from the caller.
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index c452d116b89..eda5b8bcddd 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -176,7 +176,8 @@ static void o2hb_write_timeout(struct work_struct *work)
176 176
177static void o2hb_arm_write_timeout(struct o2hb_region *reg) 177static void o2hb_arm_write_timeout(struct o2hb_region *reg)
178{ 178{
179 mlog(0, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS); 179 mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
180 O2HB_MAX_WRITE_TIMEOUT_MS);
180 181
181 cancel_delayed_work(&reg->hr_write_timeout_work); 182 cancel_delayed_work(&reg->hr_write_timeout_work);
182 reg->hr_last_timeout_start = jiffies; 183 reg->hr_last_timeout_start = jiffies;
@@ -874,7 +875,8 @@ static int o2hb_thread(void *data)
874 do_gettimeofday(&after_hb); 875 do_gettimeofday(&after_hb);
875 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 876 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
876 877
877 mlog(0, "start = %lu.%lu, end = %lu.%lu, msec = %u\n", 878 mlog(ML_HEARTBEAT,
879 "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
878 before_hb.tv_sec, (unsigned long) before_hb.tv_usec, 880 before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
879 after_hb.tv_sec, (unsigned long) after_hb.tv_usec, 881 after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
880 elapsed_msec); 882 elapsed_msec);
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index da794bc07a6..a3f150e52b0 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -294,10 +294,10 @@ static int sc_seq_show(struct seq_file *seq, void *v)
294 if (sc->sc_sock) { 294 if (sc->sc_sock) {
295 inet = inet_sk(sc->sc_sock->sk); 295 inet = inet_sk(sc->sc_sock->sk);
296 /* the stack's structs aren't sparse endian clean */ 296 /* the stack's structs aren't sparse endian clean */
297 saddr = (__force __be32)inet->saddr; 297 saddr = (__force __be32)inet->inet_saddr;
298 daddr = (__force __be32)inet->daddr; 298 daddr = (__force __be32)inet->inet_daddr;
299 sport = (__force __be16)inet->sport; 299 sport = (__force __be16)inet->inet_sport;
300 dport = (__force __be16)inet->dport; 300 dport = (__force __be16)inet->inet_dport;
301 } 301 }
302 302
303 /* XXX sigh, inet-> doesn't have sparse annotation so any 303 /* XXX sigh, inet-> doesn't have sparse annotation so any
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 7ee6188bc79..c81142e3ef8 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -35,6 +35,10 @@
35 * cluster references throughout where nodes are looked up */ 35 * cluster references throughout where nodes are looked up */
36struct o2nm_cluster *o2nm_single_cluster = NULL; 36struct o2nm_cluster *o2nm_single_cluster = NULL;
37 37
38char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
39 "reset", /* O2NM_FENCE_RESET */
40 "panic", /* O2NM_FENCE_PANIC */
41};
38 42
39struct o2nm_node *o2nm_get_node_by_num(u8 node_num) 43struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
40{ 44{
@@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
579 return o2nm_cluster_attr_write(page, count, 583 return o2nm_cluster_attr_write(page, count,
580 &cluster->cl_reconnect_delay_ms); 584 &cluster->cl_reconnect_delay_ms);
581} 585}
586
587static ssize_t o2nm_cluster_attr_fence_method_read(
588 struct o2nm_cluster *cluster, char *page)
589{
590 ssize_t ret = 0;
591
592 if (cluster)
593 ret = sprintf(page, "%s\n",
594 o2nm_fence_method_desc[cluster->cl_fence_method]);
595 return ret;
596}
597
598static ssize_t o2nm_cluster_attr_fence_method_write(
599 struct o2nm_cluster *cluster, const char *page, size_t count)
600{
601 unsigned int i;
602
603 if (page[count - 1] != '\n')
604 goto bail;
605
606 for (i = 0; i < O2NM_FENCE_METHODS; ++i) {
607 if (count != strlen(o2nm_fence_method_desc[i]) + 1)
608 continue;
609 if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1))
610 continue;
611 if (cluster->cl_fence_method != i) {
612 printk(KERN_INFO "ocfs2: Changing fence method to %s\n",
613 o2nm_fence_method_desc[i]);
614 cluster->cl_fence_method = i;
615 }
616 return count;
617 }
618
619bail:
620 return -EINVAL;
621}
622
582static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { 623static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
583 .attr = { .ca_owner = THIS_MODULE, 624 .attr = { .ca_owner = THIS_MODULE,
584 .ca_name = "idle_timeout_ms", 625 .ca_name = "idle_timeout_ms",
@@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
603 .store = o2nm_cluster_attr_reconnect_delay_ms_write, 644 .store = o2nm_cluster_attr_reconnect_delay_ms_write,
604}; 645};
605 646
647static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = {
648 .attr = { .ca_owner = THIS_MODULE,
649 .ca_name = "fence_method",
650 .ca_mode = S_IRUGO | S_IWUSR },
651 .show = o2nm_cluster_attr_fence_method_read,
652 .store = o2nm_cluster_attr_fence_method_write,
653};
654
606static struct configfs_attribute *o2nm_cluster_attrs[] = { 655static struct configfs_attribute *o2nm_cluster_attrs[] = {
607 &o2nm_cluster_attr_idle_timeout_ms.attr, 656 &o2nm_cluster_attr_idle_timeout_ms.attr,
608 &o2nm_cluster_attr_keepalive_delay_ms.attr, 657 &o2nm_cluster_attr_keepalive_delay_ms.attr,
609 &o2nm_cluster_attr_reconnect_delay_ms.attr, 658 &o2nm_cluster_attr_reconnect_delay_ms.attr,
659 &o2nm_cluster_attr_fence_method.attr,
610 NULL, 660 NULL,
611}; 661};
612static ssize_t o2nm_cluster_show(struct config_item *item, 662static ssize_t o2nm_cluster_show(struct config_item *item,
@@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
778 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; 828 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
779 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; 829 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
780 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; 830 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
831 cluster->cl_fence_method = O2NM_FENCE_RESET;
781 832
782 ret = &cluster->cl_group; 833 ret = &cluster->cl_group;
783 o2nm_single_cluster = cluster; 834 o2nm_single_cluster = cluster;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index c992ea0da4a..09ea2d388bb 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -33,6 +33,12 @@
33#include <linux/configfs.h> 33#include <linux/configfs.h>
34#include <linux/rbtree.h> 34#include <linux/rbtree.h>
35 35
36enum o2nm_fence_method {
37 O2NM_FENCE_RESET = 0,
38 O2NM_FENCE_PANIC,
39 O2NM_FENCE_METHODS, /* Number of fence methods */
40};
41
36struct o2nm_node { 42struct o2nm_node {
37 spinlock_t nd_lock; 43 spinlock_t nd_lock;
38 struct config_item nd_item; 44 struct config_item nd_item;
@@ -58,6 +64,7 @@ struct o2nm_cluster {
58 unsigned int cl_idle_timeout_ms; 64 unsigned int cl_idle_timeout_ms;
59 unsigned int cl_keepalive_delay_ms; 65 unsigned int cl_keepalive_delay_ms;
60 unsigned int cl_reconnect_delay_ms; 66 unsigned int cl_reconnect_delay_ms;
67 enum o2nm_fence_method cl_fence_method;
61 68
62 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 69 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
63 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 70 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index bbacf7da48a..639024033fc 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -74,8 +74,20 @@ static void o2quo_fence_self(void)
74 * threads can still schedule, etc, etc */ 74 * threads can still schedule, etc, etc */
75 o2hb_stop_all_regions(); 75 o2hb_stop_all_regions();
76 76
77 printk("ocfs2 is very sorry to be fencing this system by restarting\n"); 77 switch (o2nm_single_cluster->cl_fence_method) {
78 emergency_restart(); 78 case O2NM_FENCE_PANIC:
79 panic("*** ocfs2 is very sorry to be fencing this system by "
80 "panicing ***\n");
81 break;
82 default:
83 WARN_ON(o2nm_single_cluster->cl_fence_method >=
84 O2NM_FENCE_METHODS);
85 case O2NM_FENCE_RESET:
86 printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
87 "system by restarting ***\n");
88 emergency_restart();
89 break;
90 };
79} 91}
80 92
81/* Indicate that a timeout occured on a hearbeat region write. The 93/* Indicate that a timeout occured on a hearbeat region write. The
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 83bcaf266b3..03ccf9a7b1f 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2586,7 +2586,7 @@ fail:
2586 * is complete everywhere. if the target dies while this is 2586 * is complete everywhere. if the target dies while this is
2587 * going on, some nodes could potentially see the target as the 2587 * going on, some nodes could potentially see the target as the
2588 * master, so it is important that my recovery finds the migration 2588 * master, so it is important that my recovery finds the migration
2589 * mle and sets the master to UNKNONWN. */ 2589 * mle and sets the master to UNKNOWN. */
2590 2590
2591 2591
2592 /* wait for new node to assert master */ 2592 /* wait for new node to assert master */
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index d9fa3d22e17..2f9e4e19a4f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2589,6 +2589,14 @@ retry:
2589 "begin reco msg (%d)\n", dlm->name, nodenum, ret); 2589 "begin reco msg (%d)\n", dlm->name, nodenum, ret);
2590 ret = 0; 2590 ret = 0;
2591 } 2591 }
2592 if (ret == -EAGAIN) {
2593 mlog(0, "%s: trying to start recovery of node "
2594 "%u, but node %u is waiting for last recovery "
2595 "to complete, backoff for a bit\n", dlm->name,
2596 dead_node, nodenum);
2597 msleep(100);
2598 goto retry;
2599 }
2592 if (ret < 0) { 2600 if (ret < 0) {
2593 struct dlm_lock_resource *res; 2601 struct dlm_lock_resource *res;
2594 /* this is now a serious problem, possibly ENOMEM 2602 /* this is now a serious problem, possibly ENOMEM
@@ -2608,14 +2616,6 @@ retry:
2608 * another ENOMEM */ 2616 * another ENOMEM */
2609 msleep(100); 2617 msleep(100);
2610 goto retry; 2618 goto retry;
2611 } else if (ret == EAGAIN) {
2612 mlog(0, "%s: trying to start recovery of node "
2613 "%u, but node %u is waiting for last recovery "
2614 "to complete, backoff for a bit\n", dlm->name,
2615 dead_node, nodenum);
2616 /* TODO Look into replacing msleep with cond_resched() */
2617 msleep(100);
2618 goto retry;
2619 } 2619 }
2620 } 2620 }
2621 2621
@@ -2639,7 +2639,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2639 dlm->name, br->node_idx, br->dead_node, 2639 dlm->name, br->node_idx, br->dead_node,
2640 dlm->reco.dead_node, dlm->reco.new_master); 2640 dlm->reco.dead_node, dlm->reco.new_master);
2641 spin_unlock(&dlm->spinlock); 2641 spin_unlock(&dlm->spinlock);
2642 return EAGAIN; 2642 return -EAGAIN;
2643 } 2643 }
2644 spin_unlock(&dlm->spinlock); 2644 spin_unlock(&dlm->spinlock);
2645 2645
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 0d38d67194c..c5e4a49e3a1 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1855,7 +1855,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
1855 * outstanding lock request, so a cancel convert is 1855 * outstanding lock request, so a cancel convert is
1856 * required. We intentionally overwrite 'ret' - if the 1856 * required. We intentionally overwrite 'ret' - if the
1857 * cancel fails and the lock was granted, it's easier 1857 * cancel fails and the lock was granted, it's easier
1858 * to just bubble sucess back up to the user. 1858 * to just bubble success back up to the user.
1859 */ 1859 */
1860 ret = ocfs2_flock_handle_signal(lockres, level); 1860 ret = ocfs2_flock_handle_signal(lockres, level);
1861 } else if (!ret && (level > lockres->l_level)) { 1861 } else if (!ret && (level > lockres->l_level)) {
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 843db64e9d4..d35a27f4523 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -37,6 +37,7 @@
37#include "extent_map.h" 37#include "extent_map.h"
38#include "inode.h" 38#include "inode.h"
39#include "super.h" 39#include "super.h"
40#include "symlink.h"
40 41
41#include "buffer_head_io.h" 42#include "buffer_head_io.h"
42 43
@@ -703,6 +704,12 @@ out:
703 return ret; 704 return ret;
704} 705}
705 706
707/*
708 * The ocfs2_fiemap_inline() may be a little bit misleading, since
709 * it not only handles the fiemap for inlined files, but also deals
710 * with the fast symlink, cause they have no difference for extent
711 * mapping per se.
712 */
706static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 713static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
707 struct fiemap_extent_info *fieinfo, 714 struct fiemap_extent_info *fieinfo,
708 u64 map_start) 715 u64 map_start)
@@ -715,11 +722,18 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
715 struct ocfs2_inode_info *oi = OCFS2_I(inode); 722 struct ocfs2_inode_info *oi = OCFS2_I(inode);
716 723
717 di = (struct ocfs2_dinode *)di_bh->b_data; 724 di = (struct ocfs2_dinode *)di_bh->b_data;
718 id_count = le16_to_cpu(di->id2.i_data.id_count); 725 if (ocfs2_inode_is_fast_symlink(inode))
726 id_count = ocfs2_fast_symlink_chars(inode->i_sb);
727 else
728 id_count = le16_to_cpu(di->id2.i_data.id_count);
719 729
720 if (map_start < id_count) { 730 if (map_start < id_count) {
721 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 731 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
722 phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); 732 if (ocfs2_inode_is_fast_symlink(inode))
733 phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
734 else
735 phys += offsetof(struct ocfs2_dinode,
736 id2.i_data.id_data);
723 737
724 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 738 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
725 flags); 739 flags);
@@ -756,9 +770,10 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
756 down_read(&OCFS2_I(inode)->ip_alloc_sem); 770 down_read(&OCFS2_I(inode)->ip_alloc_sem);
757 771
758 /* 772 /*
759 * Handle inline-data separately. 773 * Handle inline-data and fast symlink separately.
760 */ 774 */
761 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 775 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
776 ocfs2_inode_is_fast_symlink(inode)) {
762 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 777 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
763 goto out_unlock; 778 goto out_unlock;
764 } 779 }
@@ -786,6 +801,8 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
786 fe_flags = 0; 801 fe_flags = 0;
787 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 802 if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
788 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 803 fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
804 if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
805 fe_flags |= FIEMAP_EXTENT_SHARED;
789 if (is_last) 806 if (is_last)
790 fe_flags |= FIEMAP_EXTENT_LAST; 807 fe_flags |= FIEMAP_EXTENT_LAST;
791 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 808 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index de059f49058..06ccf6a86d3 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1772,7 +1772,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1772 loff_t *ppos, 1772 loff_t *ppos,
1773 size_t count, 1773 size_t count,
1774 int appending, 1774 int appending,
1775 int *direct_io) 1775 int *direct_io,
1776 int *has_refcount)
1776{ 1777{
1777 int ret = 0, meta_level = 0; 1778 int ret = 0, meta_level = 0;
1778 struct inode *inode = dentry->d_inode; 1779 struct inode *inode = dentry->d_inode;
@@ -1833,6 +1834,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1833 saved_pos, 1834 saved_pos,
1834 count, 1835 count,
1835 &meta_level); 1836 &meta_level);
1837 if (has_refcount)
1838 *has_refcount = 1;
1836 } 1839 }
1837 1840
1838 if (ret < 0) { 1841 if (ret < 0) {
@@ -1856,6 +1859,10 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1856 break; 1859 break;
1857 } 1860 }
1858 1861
1862 if (has_refcount && *has_refcount == 1) {
1863 *direct_io = 0;
1864 break;
1865 }
1859 /* 1866 /*
1860 * Allowing concurrent direct writes means 1867 * Allowing concurrent direct writes means
1861 * i_size changes wouldn't be synchronized, so 1868 * i_size changes wouldn't be synchronized, so
@@ -1899,7 +1906,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1899 loff_t pos) 1906 loff_t pos)
1900{ 1907{
1901 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 1908 int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
1902 int can_do_direct; 1909 int can_do_direct, has_refcount = 0;
1903 ssize_t written = 0; 1910 ssize_t written = 0;
1904 size_t ocount; /* original count */ 1911 size_t ocount; /* original count */
1905 size_t count; /* after file limit checks */ 1912 size_t count; /* after file limit checks */
@@ -1942,7 +1949,7 @@ relock:
1942 can_do_direct = direct_io; 1949 can_do_direct = direct_io;
1943 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, 1950 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
1944 iocb->ki_left, appending, 1951 iocb->ki_left, appending,
1945 &can_do_direct); 1952 &can_do_direct, &has_refcount);
1946 if (ret < 0) { 1953 if (ret < 0) {
1947 mlog_errno(ret); 1954 mlog_errno(ret);
1948 goto out; 1955 goto out;
@@ -2006,14 +2013,16 @@ out_dio:
2006 /* buffered aio wouldn't have proper lock coverage today */ 2013 /* buffered aio wouldn't have proper lock coverage today */
2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2014 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2008 2015
2009 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { 2016 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode) ||
2017 (file->f_flags & O_DIRECT && has_refcount)) {
2010 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2018 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2011 pos + count - 1); 2019 pos + count - 1);
2012 if (ret < 0) 2020 if (ret < 0)
2013 written = ret; 2021 written = ret;
2014 2022
2015 if (!ret && (old_size != i_size_read(inode) || 2023 if (!ret && (old_size != i_size_read(inode) ||
2016 old_clusters != OCFS2_I(inode)->ip_clusters)) { 2024 old_clusters != OCFS2_I(inode)->ip_clusters ||
2025 has_refcount)) {
2017 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2026 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2018 if (ret < 0) 2027 if (ret < 0)
2019 written = ret; 2028 written = ret;
@@ -2062,7 +2071,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
2062 int ret; 2071 int ret;
2063 2072
2064 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, 2073 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
2065 sd->total_len, 0, NULL); 2074 sd->total_len, 0, NULL, NULL);
2066 if (ret < 0) { 2075 if (ret < 0) {
2067 mlog_errno(ret); 2076 mlog_errno(ret);
2068 return ret; 2077 return ret;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 54c16b66327..bf34c491ae9 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -659,7 +659,7 @@ static int __ocfs2_journal_access(handle_t *handle,
659 659
660 default: 660 default:
661 status = -EINVAL; 661 status = -EINVAL;
662 mlog(ML_ERROR, "Uknown access type!\n"); 662 mlog(ML_ERROR, "Unknown access type!\n");
663 } 663 }
664 if (!status && ocfs2_meta_ecc(osb) && triggers) 664 if (!status && ocfs2_meta_ecc(osb) && triggers)
665 jbd2_journal_set_triggers(bh, &triggers->ot_triggers); 665 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f010b22b1c4..50fb26a6a5f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2108,6 +2108,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2108 } 2108 }
2109 did_quota_inode = 1; 2109 did_quota_inode = 1;
2110 2110
2111 inode->i_nlink = 0;
2111 /* do the real work now. */ 2112 /* do the real work now. */
2112 status = ocfs2_mknod_locked(osb, dir, inode, 2113 status = ocfs2_mknod_locked(osb, dir, inode,
2113 0, &new_di_bh, parent_di_bh, handle, 2114 0, &new_di_bh, parent_di_bh, handle,
@@ -2136,6 +2137,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2136 if (status < 0) 2137 if (status < 0)
2137 mlog_errno(status); 2138 mlog_errno(status);
2138 2139
2140 insert_inode_hash(inode);
2139leave: 2141leave:
2140 if (status < 0 && did_quota_inode) 2142 if (status < 0 && did_quota_inode)
2141 vfs_dq_free_inode(inode); 2143 vfs_dq_free_inode(inode);
@@ -2267,6 +2269,8 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2267 di = (struct ocfs2_dinode *)di_bh->b_data; 2269 di = (struct ocfs2_dinode *)di_bh->b_data;
2268 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); 2270 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
2269 di->i_orphaned_slot = 0; 2271 di->i_orphaned_slot = 0;
2272 inode->i_nlink = 1;
2273 ocfs2_set_links_count(di, inode->i_nlink);
2270 ocfs2_journal_dirty(handle, di_bh); 2274 ocfs2_journal_dirty(handle, di_bh);
2271 2275
2272 status = ocfs2_add_entry(handle, dentry, inode, 2276 status = ocfs2_add_entry(handle, dentry, inode,
@@ -2284,7 +2288,6 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2284 goto out_commit; 2288 goto out_commit;
2285 } 2289 }
2286 2290
2287 insert_inode_hash(inode);
2288 dentry->d_op = &ocfs2_dentry_ops; 2291 dentry->d_op = &ocfs2_dentry_ops;
2289 d_instantiate(dentry, inode); 2292 d_instantiate(dentry, inode);
2290 status = 0; 2293 status = 0;
@@ -2326,4 +2329,5 @@ const struct inode_operations ocfs2_dir_iops = {
2326 .getxattr = generic_getxattr, 2329 .getxattr = generic_getxattr,
2327 .listxattr = ocfs2_listxattr, 2330 .listxattr = ocfs2_listxattr,
2328 .removexattr = generic_removexattr, 2331 .removexattr = generic_removexattr,
2332 .fiemap = ocfs2_fiemap,
2329}; 2333};
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d963d863870..9362eea7424 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -245,9 +245,11 @@ enum ocfs2_mount_options
245 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 245 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
246 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ 246 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
247 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ 247 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
248 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */ 248 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */
249 OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */ 249 OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access
250 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 250 control lists */
251 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
252 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
251}; 253};
252 254
253#define OCFS2_OSB_SOFT_RO 0x0001 255#define OCFS2_OSB_SOFT_RO 0x0001
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index e9431e4a5e7..1a1a679e51b 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1202,7 +1202,7 @@ struct ocfs2_local_disk_dqinfo {
1202/* Header of one chunk of a quota file */ 1202/* Header of one chunk of a quota file */
1203struct ocfs2_local_disk_chunk { 1203struct ocfs2_local_disk_chunk {
1204 __le32 dqc_free; /* Number of free entries in the bitmap */ 1204 __le32 dqc_free; /* Number of free entries in the bitmap */
1205 u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding 1205 __u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
1206 * chunk of quota file */ 1206 * chunk of quota file */
1207}; 1207};
1208 1208
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index e5df9d170b0..123bc520a2c 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -17,10 +17,6 @@
17 17
18#include "ocfs2.h" 18#include "ocfs2.h"
19 19
20/* Common stuff */
21/* id number of quota format */
22#define QFMT_OCFS2 3
23
24/* 20/*
25 * In-memory structures 21 * In-memory structures
26 */ 22 */
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 1a2c50a759f..21f9e71223c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -1325,7 +1325,7 @@ out:
1325 return status; 1325 return status;
1326} 1326}
1327 1327
1328static struct quota_format_ops ocfs2_format_ops = { 1328static const struct quota_format_ops ocfs2_format_ops = {
1329 .check_quota_file = ocfs2_local_check_quota_file, 1329 .check_quota_file = ocfs2_local_check_quota_file,
1330 .read_file_info = ocfs2_local_read_info, 1330 .read_file_info = ocfs2_local_read_info,
1331 .write_file_info = ocfs2_global_write_info, 1331 .write_file_info = ocfs2_global_write_info,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3a0df7a1b81..74db2be75dd 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -276,7 +276,7 @@ static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
276 spin_unlock(&osb->osb_lock); 276 spin_unlock(&osb->osb_lock);
277} 277}
278 278
279void ocfs2_kref_remove_refcount_tree(struct kref *kref) 279static void ocfs2_kref_remove_refcount_tree(struct kref *kref)
280{ 280{
281 struct ocfs2_refcount_tree *tree = 281 struct ocfs2_refcount_tree *tree =
282 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); 282 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
@@ -524,23 +524,6 @@ out:
524 return ret; 524 return ret;
525} 525}
526 526
527int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
528 struct ocfs2_refcount_tree **ret_tree,
529 struct buffer_head **ref_bh)
530{
531 int ret;
532 u64 ref_blkno;
533
534 ret = ocfs2_get_refcount_block(inode, &ref_blkno);
535 if (ret) {
536 mlog_errno(ret);
537 return ret;
538 }
539
540 return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
541 rw, ret_tree, ref_bh);
542}
543
544void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, 527void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
545 struct ocfs2_refcount_tree *tree, int rw) 528 struct ocfs2_refcount_tree *tree, int rw)
546{ 529{
@@ -969,6 +952,103 @@ out:
969} 952}
970 953
971/* 954/*
955 * Find the end range for a leaf refcount block indicated by
956 * el->l_recs[index].e_blkno.
957 */
958static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
959 struct buffer_head *ref_root_bh,
960 struct ocfs2_extent_block *eb,
961 struct ocfs2_extent_list *el,
962 int index, u32 *cpos_end)
963{
964 int ret, i, subtree_root;
965 u32 cpos;
966 u64 blkno;
967 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
968 struct ocfs2_path *left_path = NULL, *right_path = NULL;
969 struct ocfs2_extent_tree et;
970 struct ocfs2_extent_list *tmp_el;
971
972 if (index < le16_to_cpu(el->l_next_free_rec) - 1) {
973 /*
974 * We have a extent rec after index, so just use the e_cpos
975 * of the next extent rec.
976 */
977 *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos);
978 return 0;
979 }
980
981 if (!eb || (eb && !eb->h_next_leaf_blk)) {
982 /*
983 * We are the last extent rec, so any high cpos should
984 * be stored in this leaf refcount block.
985 */
986 *cpos_end = UINT_MAX;
987 return 0;
988 }
989
990 /*
991 * If the extent block isn't the last one, we have to find
992 * the subtree root between this extent block and the next
993 * leaf extent block and get the corresponding e_cpos from
994 * the subroot. Otherwise we may corrupt the b-tree.
995 */
996 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
997
998 left_path = ocfs2_new_path_from_et(&et);
999 if (!left_path) {
1000 ret = -ENOMEM;
1001 mlog_errno(ret);
1002 goto out;
1003 }
1004
1005 cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos);
1006 ret = ocfs2_find_path(ci, left_path, cpos);
1007 if (ret) {
1008 mlog_errno(ret);
1009 goto out;
1010 }
1011
1012 right_path = ocfs2_new_path_from_path(left_path);
1013 if (!right_path) {
1014 ret = -ENOMEM;
1015 mlog_errno(ret);
1016 goto out;
1017 }
1018
1019 ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos);
1020 if (ret) {
1021 mlog_errno(ret);
1022 goto out;
1023 }
1024
1025 ret = ocfs2_find_path(ci, right_path, cpos);
1026 if (ret) {
1027 mlog_errno(ret);
1028 goto out;
1029 }
1030
1031 subtree_root = ocfs2_find_subtree_root(&et, left_path,
1032 right_path);
1033
1034 tmp_el = left_path->p_node[subtree_root].el;
1035 blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
1036 for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) {
1037 if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
1038 *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
1039 break;
1040 }
1041 }
1042
1043 BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec));
1044
1045out:
1046 ocfs2_free_path(left_path);
1047 ocfs2_free_path(right_path);
1048 return ret;
1049}
1050
1051/*
972 * Given a cpos and len, try to find the refcount record which contains cpos. 1052 * Given a cpos and len, try to find the refcount record which contains cpos.
973 * 1. If cpos can be found in one refcount record, return the record. 1053 * 1. If cpos can be found in one refcount record, return the record.
974 * 2. If cpos can't be found, return a fake record which start from cpos 1054 * 2. If cpos can't be found, return a fake record which start from cpos
@@ -983,10 +1063,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
983 struct buffer_head **ret_bh) 1063 struct buffer_head **ret_bh)
984{ 1064{
985 int ret = 0, i, found; 1065 int ret = 0, i, found;
986 u32 low_cpos; 1066 u32 low_cpos, uninitialized_var(cpos_end);
987 struct ocfs2_extent_list *el; 1067 struct ocfs2_extent_list *el;
988 struct ocfs2_extent_rec *tmp, *rec = NULL; 1068 struct ocfs2_extent_rec *rec = NULL;
989 struct ocfs2_extent_block *eb; 1069 struct ocfs2_extent_block *eb = NULL;
990 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; 1070 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
991 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1071 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
992 struct ocfs2_refcount_block *rb = 1072 struct ocfs2_refcount_block *rb =
@@ -1034,12 +1114,16 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
1034 } 1114 }
1035 } 1115 }
1036 1116
1037 /* adjust len when we have ocfs2_extent_rec after it. */ 1117 if (found) {
1038 if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { 1118 ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh,
1039 tmp = &el->l_recs[i+1]; 1119 eb, el, i, &cpos_end);
1120 if (ret) {
1121 mlog_errno(ret);
1122 goto out;
1123 }
1040 1124
1041 if (le32_to_cpu(tmp->e_cpos) < cpos + len) 1125 if (cpos_end < low_cpos + len)
1042 len = le32_to_cpu(tmp->e_cpos) - cpos; 1126 len = cpos_end - low_cpos;
1043 } 1127 }
1044 1128
1045 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), 1129 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
@@ -1418,7 +1502,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
1418 1502
1419 /* change old and new rl_used accordingly. */ 1503 /* change old and new rl_used accordingly. */
1420 le16_add_cpu(&rl->rl_used, -num_moved); 1504 le16_add_cpu(&rl->rl_used, -num_moved);
1421 new_rl->rl_used = cpu_to_le32(num_moved); 1505 new_rl->rl_used = cpu_to_le16(num_moved);
1422 1506
1423 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), 1507 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
1424 sizeof(struct ocfs2_refcount_rec), 1508 sizeof(struct ocfs2_refcount_rec),
@@ -1797,7 +1881,8 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1797 recs_need++; 1881 recs_need++;
1798 1882
1799 /* If the leaf block don't have enough record, expand it. */ 1883 /* If the leaf block don't have enough record, expand it. */
1800 if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) { 1884 if (le16_to_cpu(rf_list->rl_used) + recs_need >
1885 le16_to_cpu(rf_list->rl_count)) {
1801 struct ocfs2_refcount_rec tmp_rec; 1886 struct ocfs2_refcount_rec tmp_rec;
1802 u64 cpos = le64_to_cpu(orig_rec->r_cpos); 1887 u64 cpos = le64_to_cpu(orig_rec->r_cpos);
1803 len = le32_to_cpu(orig_rec->r_clusters); 1888 len = le32_to_cpu(orig_rec->r_clusters);
@@ -1859,7 +1944,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1859 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); 1944 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
1860 le64_add_cpu(&tail_rec->r_cpos, 1945 le64_add_cpu(&tail_rec->r_cpos,
1861 le32_to_cpu(tail_rec->r_clusters) - len); 1946 le32_to_cpu(tail_rec->r_clusters) - len);
1862 tail_rec->r_clusters = le32_to_cpu(len); 1947 tail_rec->r_clusters = cpu_to_le32(len);
1863 } 1948 }
1864 1949
1865 /* 1950 /*
@@ -2431,7 +2516,7 @@ out:
2431 * we gonna touch and whether we need to create new blocks. 2516 * we gonna touch and whether we need to create new blocks.
2432 * 2517 *
2433 * Normally the refcount blocks store these refcount should be 2518 * Normally the refcount blocks store these refcount should be
2434 * continguous also, so that we can get the number easily. 2519 * contiguous also, so that we can get the number easily.
2435 * As for meta_ac, we will at most add split 2 refcount record and 2520 * As for meta_ac, we will at most add split 2 refcount record and
2436 * 2 more refcount block, so just check it in a rough way. 2521 * 2 more refcount block, so just check it in a rough way.
2437 * 2522 *
@@ -3840,8 +3925,7 @@ static int ocfs2_add_refcounted_extent(struct inode *inode,
3840 } 3925 }
3841 3926
3842 ret = ocfs2_insert_extent(handle, et, cpos, 3927 ret = ocfs2_insert_extent(handle, et, cpos,
3843 cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, 3928 ocfs2_clusters_to_blocks(inode->i_sb, p_cluster),
3844 p_cluster)),
3845 num_clusters, ext_flags, meta_ac); 3929 num_clusters, ext_flags, meta_ac);
3846 if (ret) { 3930 if (ret) {
3847 mlog_errno(ret); 3931 mlog_errno(ret);
@@ -4253,8 +4337,8 @@ static int ocfs2_user_path_parent(const char __user *path,
4253 * @new_dentry: target dentry 4337 * @new_dentry: target dentry
4254 * @preserve: if true, preserve all file attributes 4338 * @preserve: if true, preserve all file attributes
4255 */ 4339 */
4256int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, 4340static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
4257 struct dentry *new_dentry, bool preserve) 4341 struct dentry *new_dentry, bool preserve)
4258{ 4342{
4259 struct inode *inode = old_dentry->d_inode; 4343 struct inode *inode = old_dentry->d_inode;
4260 int error; 4344 int error;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index ff4c798a563..da78a2a334f 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -814,7 +814,7 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
814static int user_cluster_connect(struct ocfs2_cluster_connection *conn) 814static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
815{ 815{
816 dlm_lockspace_t *fsdlm; 816 dlm_lockspace_t *fsdlm;
817 struct ocfs2_live_connection *control; 817 struct ocfs2_live_connection *uninitialized_var(control);
818 int rc = 0; 818 int rc = 0;
819 819
820 BUG_ON(conn == NULL); 820 BUG_ON(conn == NULL);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 3f2f1c45b7b..f3df0baa9a4 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -620,51 +620,46 @@ error:
620 620
621static ctl_table ocfs2_nm_table[] = { 621static ctl_table ocfs2_nm_table[] = {
622 { 622 {
623 .ctl_name = 1,
624 .procname = "hb_ctl_path", 623 .procname = "hb_ctl_path",
625 .data = ocfs2_hb_ctl_path, 624 .data = ocfs2_hb_ctl_path,
626 .maxlen = OCFS2_MAX_HB_CTL_PATH, 625 .maxlen = OCFS2_MAX_HB_CTL_PATH,
627 .mode = 0644, 626 .mode = 0644,
628 .proc_handler = &proc_dostring, 627 .proc_handler = proc_dostring,
629 .strategy = &sysctl_string,
630 }, 628 },
631 { .ctl_name = 0 } 629 { }
632}; 630};
633 631
634static ctl_table ocfs2_mod_table[] = { 632static ctl_table ocfs2_mod_table[] = {
635 { 633 {
636 .ctl_name = FS_OCFS2_NM,
637 .procname = "nm", 634 .procname = "nm",
638 .data = NULL, 635 .data = NULL,
639 .maxlen = 0, 636 .maxlen = 0,
640 .mode = 0555, 637 .mode = 0555,
641 .child = ocfs2_nm_table 638 .child = ocfs2_nm_table
642 }, 639 },
643 { .ctl_name = 0} 640 { }
644}; 641};
645 642
646static ctl_table ocfs2_kern_table[] = { 643static ctl_table ocfs2_kern_table[] = {
647 { 644 {
648 .ctl_name = FS_OCFS2,
649 .procname = "ocfs2", 645 .procname = "ocfs2",
650 .data = NULL, 646 .data = NULL,
651 .maxlen = 0, 647 .maxlen = 0,
652 .mode = 0555, 648 .mode = 0555,
653 .child = ocfs2_mod_table 649 .child = ocfs2_mod_table
654 }, 650 },
655 { .ctl_name = 0} 651 { }
656}; 652};
657 653
658static ctl_table ocfs2_root_table[] = { 654static ctl_table ocfs2_root_table[] = {
659 { 655 {
660 .ctl_name = CTL_FS,
661 .procname = "fs", 656 .procname = "fs",
662 .data = NULL, 657 .data = NULL,
663 .maxlen = 0, 658 .maxlen = 0,
664 .mode = 0555, 659 .mode = 0555,
665 .child = ocfs2_kern_table 660 .child = ocfs2_kern_table
666 }, 661 },
667 { .ctl_name = 0 } 662 { }
668}; 663};
669 664
670static struct ctl_table_header *ocfs2_table_header = NULL; 665static struct ctl_table_header *ocfs2_table_header = NULL;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 14f47d2bfe0..26069917a9f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -100,6 +100,8 @@ struct mount_options
100static int ocfs2_parse_options(struct super_block *sb, char *options, 100static int ocfs2_parse_options(struct super_block *sb, char *options,
101 struct mount_options *mopt, 101 struct mount_options *mopt,
102 int is_remount); 102 int is_remount);
103static int ocfs2_check_set_options(struct super_block *sb,
104 struct mount_options *options);
103static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); 105static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
104static void ocfs2_put_super(struct super_block *sb); 106static void ocfs2_put_super(struct super_block *sb);
105static int ocfs2_mount_volume(struct super_block *sb); 107static int ocfs2_mount_volume(struct super_block *sb);
@@ -600,7 +602,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
600 602
601 lock_kernel(); 603 lock_kernel();
602 604
603 if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { 605 if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
606 !ocfs2_check_set_options(sb, &parsed_options)) {
604 ret = -EINVAL; 607 ret = -EINVAL;
605 goto out; 608 goto out;
606 } 609 }
@@ -691,8 +694,6 @@ unlock_osb:
691 if (!ret) { 694 if (!ret) {
692 /* Only save off the new mount options in case of a successful 695 /* Only save off the new mount options in case of a successful
693 * remount. */ 696 * remount. */
694 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
695 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
696 osb->s_mount_opt = parsed_options.mount_opt; 697 osb->s_mount_opt = parsed_options.mount_opt;
697 osb->s_atime_quantum = parsed_options.atime_quantum; 698 osb->s_atime_quantum = parsed_options.atime_quantum;
698 osb->preferred_slot = parsed_options.slot; 699 osb->preferred_slot = parsed_options.slot;
@@ -701,6 +702,10 @@ unlock_osb:
701 702
702 if (!ocfs2_is_hard_readonly(osb)) 703 if (!ocfs2_is_hard_readonly(osb))
703 ocfs2_set_journal_params(osb); 704 ocfs2_set_journal_params(osb);
705
706 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
707 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ?
708 MS_POSIXACL : 0);
704 } 709 }
705out: 710out:
706 unlock_kernel(); 711 unlock_kernel();
@@ -1011,31 +1016,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1011 brelse(bh); 1016 brelse(bh);
1012 bh = NULL; 1017 bh = NULL;
1013 1018
1014 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) 1019 if (!ocfs2_check_set_options(sb, &parsed_options)) {
1015 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1020 status = -EINVAL;
1016 1021 goto read_super_error;
1022 }
1017 osb->s_mount_opt = parsed_options.mount_opt; 1023 osb->s_mount_opt = parsed_options.mount_opt;
1018 osb->s_atime_quantum = parsed_options.atime_quantum; 1024 osb->s_atime_quantum = parsed_options.atime_quantum;
1019 osb->preferred_slot = parsed_options.slot; 1025 osb->preferred_slot = parsed_options.slot;
1020 osb->osb_commit_interval = parsed_options.commit_interval; 1026 osb->osb_commit_interval = parsed_options.commit_interval;
1021 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 1027 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
1022 osb->local_alloc_bits = osb->local_alloc_default_bits; 1028 osb->local_alloc_bits = osb->local_alloc_default_bits;
1023 if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
1024 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1025 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1026 status = -EINVAL;
1027 mlog(ML_ERROR, "User quotas were requested, but this "
1028 "filesystem does not have the feature enabled.\n");
1029 goto read_super_error;
1030 }
1031 if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
1032 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1033 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1034 status = -EINVAL;
1035 mlog(ML_ERROR, "Group quotas were requested, but this "
1036 "filesystem does not have the feature enabled.\n");
1037 goto read_super_error;
1038 }
1039 1029
1040 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 1030 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
1041 if (status) 1031 if (status)
@@ -1245,6 +1235,40 @@ static struct file_system_type ocfs2_fs_type = {
1245 .next = NULL 1235 .next = NULL
1246}; 1236};
1247 1237
1238static int ocfs2_check_set_options(struct super_block *sb,
1239 struct mount_options *options)
1240{
1241 if (options->mount_opt & OCFS2_MOUNT_USRQUOTA &&
1242 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1243 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1244 mlog(ML_ERROR, "User quotas were requested, but this "
1245 "filesystem does not have the feature enabled.\n");
1246 return 0;
1247 }
1248 if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA &&
1249 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1250 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1251 mlog(ML_ERROR, "Group quotas were requested, but this "
1252 "filesystem does not have the feature enabled.\n");
1253 return 0;
1254 }
1255 if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL &&
1256 !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) {
1257 mlog(ML_ERROR, "ACL support requested but extended attributes "
1258 "feature is not enabled\n");
1259 return 0;
1260 }
1261 /* No ACL setting specified? Use XATTR feature... */
1262 if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL |
1263 OCFS2_MOUNT_NO_POSIX_ACL))) {
1264 if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR))
1265 options->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1266 else
1267 options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1268 }
1269 return 1;
1270}
1271
1248static int ocfs2_parse_options(struct super_block *sb, 1272static int ocfs2_parse_options(struct super_block *sb,
1249 char *options, 1273 char *options,
1250 struct mount_options *mopt, 1274 struct mount_options *mopt,
@@ -1392,40 +1416,19 @@ static int ocfs2_parse_options(struct super_block *sb,
1392 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1416 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
1393 break; 1417 break;
1394 case Opt_usrquota: 1418 case Opt_usrquota:
1395 /* We check only on remount, otherwise features
1396 * aren't yet initialized. */
1397 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1398 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1399 mlog(ML_ERROR, "User quota requested but "
1400 "filesystem feature is not set\n");
1401 status = 0;
1402 goto bail;
1403 }
1404 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; 1419 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
1405 break; 1420 break;
1406 case Opt_grpquota: 1421 case Opt_grpquota:
1407 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1408 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1409 mlog(ML_ERROR, "Group quota requested but "
1410 "filesystem feature is not set\n");
1411 status = 0;
1412 goto bail;
1413 }
1414 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; 1422 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
1415 break; 1423 break;
1416#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1417 case Opt_acl: 1424 case Opt_acl:
1418 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; 1425 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1426 mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
1419 break; 1427 break;
1420 case Opt_noacl: 1428 case Opt_noacl:
1429 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1421 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1430 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1422 break; 1431 break;
1423#else
1424 case Opt_acl:
1425 case Opt_noacl:
1426 printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
1427 break;
1428#endif
1429 default: 1432 default:
1430 mlog(ML_ERROR, 1433 mlog(ML_ERROR,
1431 "Unrecognized mount option \"%s\" " 1434 "Unrecognized mount option \"%s\" "
@@ -1502,12 +1505,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1502 if (opts & OCFS2_MOUNT_INODE64) 1505 if (opts & OCFS2_MOUNT_INODE64)
1503 seq_printf(s, ",inode64"); 1506 seq_printf(s, ",inode64");
1504 1507
1505#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1506 if (opts & OCFS2_MOUNT_POSIX_ACL) 1508 if (opts & OCFS2_MOUNT_POSIX_ACL)
1507 seq_printf(s, ",acl"); 1509 seq_printf(s, ",acl");
1508 else 1510 else
1509 seq_printf(s, ",noacl"); 1511 seq_printf(s, ",noacl");
1510#endif
1511 1512
1512 return 0; 1513 return 0;
1513} 1514}
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index e3421030a69..49b133ccbf1 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -163,6 +163,7 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
163 .getxattr = generic_getxattr, 163 .getxattr = generic_getxattr,
164 .listxattr = ocfs2_listxattr, 164 .listxattr = ocfs2_listxattr,
165 .removexattr = generic_removexattr, 165 .removexattr = generic_removexattr,
166 .fiemap = ocfs2_fiemap,
166}; 167};
167const struct inode_operations ocfs2_fast_symlink_inode_operations = { 168const struct inode_operations ocfs2_fast_symlink_inode_operations = {
168 .readlink = ocfs2_readlink, 169 .readlink = ocfs2_readlink,
@@ -174,4 +175,5 @@ const struct inode_operations ocfs2_fast_symlink_inode_operations = {
174 .getxattr = generic_getxattr, 175 .getxattr = generic_getxattr,
175 .listxattr = ocfs2_listxattr, 176 .listxattr = ocfs2_listxattr,
176 .removexattr = generic_removexattr, 177 .removexattr = generic_removexattr,
178 .fiemap = ocfs2_fiemap,
177}; 179};
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fe3419068df..8fc6fb071c6 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -98,10 +98,8 @@ static struct ocfs2_xattr_def_value_root def_xv = {
98 98
99struct xattr_handler *ocfs2_xattr_handlers[] = { 99struct xattr_handler *ocfs2_xattr_handlers[] = {
100 &ocfs2_xattr_user_handler, 100 &ocfs2_xattr_user_handler,
101#ifdef CONFIG_OCFS2_FS_POSIX_ACL
102 &ocfs2_xattr_acl_access_handler, 101 &ocfs2_xattr_acl_access_handler,
103 &ocfs2_xattr_acl_default_handler, 102 &ocfs2_xattr_acl_default_handler,
104#endif
105 &ocfs2_xattr_trusted_handler, 103 &ocfs2_xattr_trusted_handler,
106 &ocfs2_xattr_security_handler, 104 &ocfs2_xattr_security_handler,
107 NULL 105 NULL
@@ -109,12 +107,10 @@ struct xattr_handler *ocfs2_xattr_handlers[] = {
109 107
110static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 108static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
111 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 109 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
112#ifdef CONFIG_OCFS2_FS_POSIX_ACL
113 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 110 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
114 = &ocfs2_xattr_acl_access_handler, 111 = &ocfs2_xattr_acl_access_handler,
115 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 112 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
116 = &ocfs2_xattr_acl_default_handler, 113 = &ocfs2_xattr_acl_default_handler,
117#endif
118 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 114 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
119 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 115 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
120}; 116};
@@ -205,8 +201,6 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
205 int offset, 201 int offset,
206 struct ocfs2_xattr_value_root **xv, 202 struct ocfs2_xattr_value_root **xv,
207 struct buffer_head **bh); 203 struct buffer_head **bh);
208static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
209 const void *value, size_t size, int flags);
210 204
211static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 205static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
212{ 206{
@@ -6066,7 +6060,7 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6066 * to the extent block, so just calculate a maximum record num. 6060 * to the extent block, so just calculate a maximum record num.
6067 */ 6061 */
6068 if (!xv->xr_list.l_tree_depth) 6062 if (!xv->xr_list.l_tree_depth)
6069 *num_recs += xv->xr_list.l_next_free_rec; 6063 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6070 else 6064 else
6071 *num_recs += ocfs2_clusters_for_bytes(sb, 6065 *num_recs += ocfs2_clusters_for_bytes(sb,
6072 XATTR_SIZE_MAX); 6066 XATTR_SIZE_MAX);
@@ -6978,9 +6972,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
6978 6972
6979 ret = ocfs2_init_security_get(inode, dir, &si); 6973 ret = ocfs2_init_security_get(inode, dir, &si);
6980 if (!ret) { 6974 if (!ret) {
6981 ret = ocfs2_xattr_security_set(inode, si.name, 6975 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
6982 si.value, si.value_len, 6976 si.name, si.value, si.value_len,
6983 XATTR_CREATE); 6977 XATTR_CREATE);
6984 if (ret) { 6978 if (ret) {
6985 mlog_errno(ret); 6979 mlog_errno(ret);
6986 goto leave; 6980 goto leave;
@@ -7008,9 +7002,9 @@ leave:
7008/* 7002/*
7009 * 'security' attributes support 7003 * 'security' attributes support
7010 */ 7004 */
7011static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, 7005static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7012 size_t list_size, const char *name, 7006 size_t list_size, const char *name,
7013 size_t name_len) 7007 size_t name_len, int type)
7014{ 7008{
7015 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 7009 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7016 const size_t total_len = prefix_len + name_len + 1; 7010 const size_t total_len = prefix_len + name_len + 1;
@@ -7023,23 +7017,23 @@ static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
7023 return total_len; 7017 return total_len;
7024} 7018}
7025 7019
7026static int ocfs2_xattr_security_get(struct inode *inode, const char *name, 7020static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7027 void *buffer, size_t size) 7021 void *buffer, size_t size, int type)
7028{ 7022{
7029 if (strcmp(name, "") == 0) 7023 if (strcmp(name, "") == 0)
7030 return -EINVAL; 7024 return -EINVAL;
7031 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name, 7025 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7032 buffer, size); 7026 name, buffer, size);
7033} 7027}
7034 7028
7035static int ocfs2_xattr_security_set(struct inode *inode, const char *name, 7029static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7036 const void *value, size_t size, int flags) 7030 const void *value, size_t size, int flags, int type)
7037{ 7031{
7038 if (strcmp(name, "") == 0) 7032 if (strcmp(name, "") == 0)
7039 return -EINVAL; 7033 return -EINVAL;
7040 7034
7041 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value, 7035 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7042 size, flags); 7036 name, value, size, flags);
7043} 7037}
7044 7038
7045int ocfs2_init_security_get(struct inode *inode, 7039int ocfs2_init_security_get(struct inode *inode,
@@ -7076,9 +7070,9 @@ struct xattr_handler ocfs2_xattr_security_handler = {
7076/* 7070/*
7077 * 'trusted' attributes support 7071 * 'trusted' attributes support
7078 */ 7072 */
7079static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, 7073static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7080 size_t list_size, const char *name, 7074 size_t list_size, const char *name,
7081 size_t name_len) 7075 size_t name_len, int type)
7082{ 7076{
7083 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 7077 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7084 const size_t total_len = prefix_len + name_len + 1; 7078 const size_t total_len = prefix_len + name_len + 1;
@@ -7091,23 +7085,23 @@ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
7091 return total_len; 7085 return total_len;
7092} 7086}
7093 7087
7094static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name, 7088static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7095 void *buffer, size_t size) 7089 void *buffer, size_t size, int type)
7096{ 7090{
7097 if (strcmp(name, "") == 0) 7091 if (strcmp(name, "") == 0)
7098 return -EINVAL; 7092 return -EINVAL;
7099 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, 7093 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7100 buffer, size); 7094 name, buffer, size);
7101} 7095}
7102 7096
7103static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name, 7097static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7104 const void *value, size_t size, int flags) 7098 const void *value, size_t size, int flags, int type)
7105{ 7099{
7106 if (strcmp(name, "") == 0) 7100 if (strcmp(name, "") == 0)
7107 return -EINVAL; 7101 return -EINVAL;
7108 7102
7109 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value, 7103 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7110 size, flags); 7104 name, value, size, flags);
7111} 7105}
7112 7106
7113struct xattr_handler ocfs2_xattr_trusted_handler = { 7107struct xattr_handler ocfs2_xattr_trusted_handler = {
@@ -7120,13 +7114,13 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
7120/* 7114/*
7121 * 'user' attributes support 7115 * 'user' attributes support
7122 */ 7116 */
7123static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, 7117static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7124 size_t list_size, const char *name, 7118 size_t list_size, const char *name,
7125 size_t name_len) 7119 size_t name_len, int type)
7126{ 7120{
7127 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 7121 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7128 const size_t total_len = prefix_len + name_len + 1; 7122 const size_t total_len = prefix_len + name_len + 1;
7129 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7123 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7130 7124
7131 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7125 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7132 return 0; 7126 return 0;
@@ -7139,31 +7133,31 @@ static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
7139 return total_len; 7133 return total_len;
7140} 7134}
7141 7135
7142static int ocfs2_xattr_user_get(struct inode *inode, const char *name, 7136static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7143 void *buffer, size_t size) 7137 void *buffer, size_t size, int type)
7144{ 7138{
7145 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7139 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7146 7140
7147 if (strcmp(name, "") == 0) 7141 if (strcmp(name, "") == 0)
7148 return -EINVAL; 7142 return -EINVAL;
7149 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7143 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7150 return -EOPNOTSUPP; 7144 return -EOPNOTSUPP;
7151 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7145 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7152 buffer, size); 7146 buffer, size);
7153} 7147}
7154 7148
7155static int ocfs2_xattr_user_set(struct inode *inode, const char *name, 7149static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7156 const void *value, size_t size, int flags) 7150 const void *value, size_t size, int flags, int type)
7157{ 7151{
7158 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7152 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7159 7153
7160 if (strcmp(name, "") == 0) 7154 if (strcmp(name, "") == 0)
7161 return -EINVAL; 7155 return -EINVAL;
7162 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7156 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7163 return -EOPNOTSUPP; 7157 return -EOPNOTSUPP;
7164 7158
7165 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value, 7159 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7166 size, flags); 7160 name, value, size, flags);
7167} 7161}
7168 7162
7169struct xattr_handler ocfs2_xattr_user_handler = { 7163struct xattr_handler ocfs2_xattr_user_handler = {
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 08e36389f56..abd72a47f52 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -40,10 +40,8 @@ struct ocfs2_security_xattr_info {
40extern struct xattr_handler ocfs2_xattr_user_handler; 40extern struct xattr_handler ocfs2_xattr_user_handler;
41extern struct xattr_handler ocfs2_xattr_trusted_handler; 41extern struct xattr_handler ocfs2_xattr_trusted_handler;
42extern struct xattr_handler ocfs2_xattr_security_handler; 42extern struct xattr_handler ocfs2_xattr_security_handler;
43#ifdef CONFIG_OCFS2_FS_POSIX_ACL
44extern struct xattr_handler ocfs2_xattr_acl_access_handler; 43extern struct xattr_handler ocfs2_xattr_acl_access_handler;
45extern struct xattr_handler ocfs2_xattr_acl_default_handler; 44extern struct xattr_handler ocfs2_xattr_acl_default_handler;
46#endif
47extern struct xattr_handler *ocfs2_xattr_handlers[]; 45extern struct xattr_handler *ocfs2_xattr_handlers[];
48 46
49ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); 47ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index e1c0ec0ae98..082234581d0 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -85,7 +85,7 @@ out:
85} 85}
86 86
87/* 87/*
88 * Tries to allocate exactly one block. Returns true if sucessful. 88 * Tries to allocate exactly one block. Returns true if successful.
89 */ 89 */
90int omfs_allocate_block(struct super_block *sb, u64 block) 90int omfs_allocate_block(struct super_block *sb, u64 block)
91{ 91{
diff --git a/fs/open.c b/fs/open.c
index 4f01e06227c..040cef72bc0 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,6 +30,9 @@
30#include <linux/audit.h> 30#include <linux/audit.h>
31#include <linux/falloc.h> 31#include <linux/falloc.h>
32#include <linux/fs_struct.h> 32#include <linux/fs_struct.h>
33#include <linux/ima.h>
34
35#include "internal.h"
33 36
34int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 37int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
35{ 38{
@@ -587,6 +590,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
587 error = -EPERM; 590 error = -EPERM;
588 if (!capable(CAP_SYS_CHROOT)) 591 if (!capable(CAP_SYS_CHROOT))
589 goto dput_and_out; 592 goto dput_and_out;
593 error = security_path_chroot(&path);
594 if (error)
595 goto dput_and_out;
590 596
591 set_fs_root(current->fs, &path); 597 set_fs_root(current->fs, &path);
592 error = 0; 598 error = 0;
@@ -617,11 +623,15 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
617 if (err) 623 if (err)
618 goto out_putf; 624 goto out_putf;
619 mutex_lock(&inode->i_mutex); 625 mutex_lock(&inode->i_mutex);
626 err = security_path_chmod(dentry, file->f_vfsmnt, mode);
627 if (err)
628 goto out_unlock;
620 if (mode == (mode_t) -1) 629 if (mode == (mode_t) -1)
621 mode = inode->i_mode; 630 mode = inode->i_mode;
622 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 631 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
623 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 632 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
624 err = notify_change(dentry, &newattrs); 633 err = notify_change(dentry, &newattrs);
634out_unlock:
625 mutex_unlock(&inode->i_mutex); 635 mutex_unlock(&inode->i_mutex);
626 mnt_drop_write(file->f_path.mnt); 636 mnt_drop_write(file->f_path.mnt);
627out_putf: 637out_putf:
@@ -646,11 +656,15 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
646 if (error) 656 if (error)
647 goto dput_and_out; 657 goto dput_and_out;
648 mutex_lock(&inode->i_mutex); 658 mutex_lock(&inode->i_mutex);
659 error = security_path_chmod(path.dentry, path.mnt, mode);
660 if (error)
661 goto out_unlock;
649 if (mode == (mode_t) -1) 662 if (mode == (mode_t) -1)
650 mode = inode->i_mode; 663 mode = inode->i_mode;
651 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 664 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
652 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 665 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
653 error = notify_change(path.dentry, &newattrs); 666 error = notify_change(path.dentry, &newattrs);
667out_unlock:
654 mutex_unlock(&inode->i_mutex); 668 mutex_unlock(&inode->i_mutex);
655 mnt_drop_write(path.mnt); 669 mnt_drop_write(path.mnt);
656dput_and_out: 670dput_and_out:
@@ -664,9 +678,9 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode)
664 return sys_fchmodat(AT_FDCWD, filename, mode); 678 return sys_fchmodat(AT_FDCWD, filename, mode);
665} 679}
666 680
667static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 681static int chown_common(struct path *path, uid_t user, gid_t group)
668{ 682{
669 struct inode *inode = dentry->d_inode; 683 struct inode *inode = path->dentry->d_inode;
670 int error; 684 int error;
671 struct iattr newattrs; 685 struct iattr newattrs;
672 686
@@ -683,7 +697,9 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
683 newattrs.ia_valid |= 697 newattrs.ia_valid |=
684 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 698 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
685 mutex_lock(&inode->i_mutex); 699 mutex_lock(&inode->i_mutex);
686 error = notify_change(dentry, &newattrs); 700 error = security_path_chown(path, user, group);
701 if (!error)
702 error = notify_change(path->dentry, &newattrs);
687 mutex_unlock(&inode->i_mutex); 703 mutex_unlock(&inode->i_mutex);
688 704
689 return error; 705 return error;
@@ -700,7 +716,7 @@ SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
700 error = mnt_want_write(path.mnt); 716 error = mnt_want_write(path.mnt);
701 if (error) 717 if (error)
702 goto out_release; 718 goto out_release;
703 error = chown_common(path.dentry, user, group); 719 error = chown_common(&path, user, group);
704 mnt_drop_write(path.mnt); 720 mnt_drop_write(path.mnt);
705out_release: 721out_release:
706 path_put(&path); 722 path_put(&path);
@@ -725,7 +741,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
725 error = mnt_want_write(path.mnt); 741 error = mnt_want_write(path.mnt);
726 if (error) 742 if (error)
727 goto out_release; 743 goto out_release;
728 error = chown_common(path.dentry, user, group); 744 error = chown_common(&path, user, group);
729 mnt_drop_write(path.mnt); 745 mnt_drop_write(path.mnt);
730out_release: 746out_release:
731 path_put(&path); 747 path_put(&path);
@@ -744,7 +760,7 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
744 error = mnt_want_write(path.mnt); 760 error = mnt_want_write(path.mnt);
745 if (error) 761 if (error)
746 goto out_release; 762 goto out_release;
747 error = chown_common(path.dentry, user, group); 763 error = chown_common(&path, user, group);
748 mnt_drop_write(path.mnt); 764 mnt_drop_write(path.mnt);
749out_release: 765out_release:
750 path_put(&path); 766 path_put(&path);
@@ -767,7 +783,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
767 goto out_fput; 783 goto out_fput;
768 dentry = file->f_path.dentry; 784 dentry = file->f_path.dentry;
769 audit_inode(NULL, dentry); 785 audit_inode(NULL, dentry);
770 error = chown_common(dentry, user, group); 786 error = chown_common(&file->f_path, user, group);
771 mnt_drop_write(file->f_path.mnt); 787 mnt_drop_write(file->f_path.mnt);
772out_fput: 788out_fput:
773 fput(file); 789 fput(file);
@@ -805,15 +821,14 @@ static inline int __get_file_write_access(struct inode *inode,
805} 821}
806 822
807static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 823static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
808 int flags, struct file *f, 824 struct file *f,
809 int (*open)(struct inode *, struct file *), 825 int (*open)(struct inode *, struct file *),
810 const struct cred *cred) 826 const struct cred *cred)
811{ 827{
812 struct inode *inode; 828 struct inode *inode;
813 int error; 829 int error;
814 830
815 f->f_flags = flags; 831 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
816 f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
817 FMODE_PREAD | FMODE_PWRITE; 832 FMODE_PREAD | FMODE_PWRITE;
818 inode = dentry->d_inode; 833 inode = dentry->d_inode;
819 if (f->f_mode & FMODE_WRITE) { 834 if (f->f_mode & FMODE_WRITE) {
@@ -842,6 +857,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
842 if (error) 857 if (error)
843 goto cleanup_all; 858 goto cleanup_all;
844 } 859 }
860 ima_counts_get(f);
845 861
846 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 862 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
847 863
@@ -913,7 +929,6 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
913 if (IS_ERR(dentry)) 929 if (IS_ERR(dentry))
914 goto out_err; 930 goto out_err;
915 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), 931 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
916 nd->intent.open.flags - 1,
917 nd->intent.open.file, 932 nd->intent.open.file,
918 open, cred); 933 open, cred);
919out: 934out:
@@ -932,7 +947,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
932 * 947 *
933 * Note that this function destroys the original nameidata 948 * Note that this function destroys the original nameidata
934 */ 949 */
935struct file *nameidata_to_filp(struct nameidata *nd, int flags) 950struct file *nameidata_to_filp(struct nameidata *nd)
936{ 951{
937 const struct cred *cred = current_cred(); 952 const struct cred *cred = current_cred();
938 struct file *filp; 953 struct file *filp;
@@ -941,7 +956,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
941 filp = nd->intent.open.file; 956 filp = nd->intent.open.file;
942 /* Has the filesystem initialised the file for us? */ 957 /* Has the filesystem initialised the file for us? */
943 if (filp->f_path.dentry == NULL) 958 if (filp->f_path.dentry == NULL)
944 filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, 959 filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
945 NULL, cred); 960 NULL, cred);
946 else 961 else
947 path_put(&nd->path); 962 path_put(&nd->path);
@@ -980,7 +995,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
980 return ERR_PTR(error); 995 return ERR_PTR(error);
981 } 996 }
982 997
983 return __dentry_open(dentry, mnt, flags, f, NULL, cred); 998 f->f_flags = flags;
999 return __dentry_open(dentry, mnt, f, NULL, cred);
984} 1000}
985EXPORT_SYMBOL(dentry_open); 1001EXPORT_SYMBOL(dentry_open);
986 1002
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e10cba..64bc8998ac9 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -226,6 +226,13 @@ ssize_t part_alignment_offset_show(struct device *dev,
226 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); 226 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
227} 227}
228 228
229ssize_t part_discard_alignment_show(struct device *dev,
230 struct device_attribute *attr, char *buf)
231{
232 struct hd_struct *p = dev_to_part(dev);
233 return sprintf(buf, "%u\n", p->discard_alignment);
234}
235
229ssize_t part_stat_show(struct device *dev, 236ssize_t part_stat_show(struct device *dev,
230 struct device_attribute *attr, char *buf) 237 struct device_attribute *attr, char *buf)
231{ 238{
@@ -288,6 +295,8 @@ static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
288static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); 295static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
289static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 296static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
290static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 297static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
298static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
299 NULL);
291static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 300static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
292static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 301static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
293#ifdef CONFIG_FAIL_MAKE_REQUEST 302#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -300,6 +309,7 @@ static struct attribute *part_attrs[] = {
300 &dev_attr_start.attr, 309 &dev_attr_start.attr,
301 &dev_attr_size.attr, 310 &dev_attr_size.attr,
302 &dev_attr_alignment_offset.attr, 311 &dev_attr_alignment_offset.attr,
312 &dev_attr_discard_alignment.attr,
303 &dev_attr_stat.attr, 313 &dev_attr_stat.attr,
304 &dev_attr_inflight.attr, 314 &dev_attr_inflight.attr,
305#ifdef CONFIG_FAIL_MAKE_REQUEST 315#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -403,6 +413,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
403 413
404 p->start_sect = start; 414 p->start_sect = start;
405 p->alignment_offset = queue_sector_alignment_offset(disk->queue, start); 415 p->alignment_offset = queue_sector_alignment_offset(disk->queue, start);
416 p->discard_alignment = queue_sector_discard_alignment(disk->queue,
417 start);
406 p->nr_sects = len; 418 p->nr_sects = len;
407 p->partno = partno; 419 p->partno = partno;
408 p->policy = get_disk_ro(disk); 420 p->policy = get_disk_ro(disk);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 038a6022152..49cfd5f5423 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -1,7 +1,9 @@
1/************************************************************ 1/************************************************************
2 * EFI GUID Partition Table handling 2 * EFI GUID Partition Table handling
3 * Per Intel EFI Specification v1.02 3 *
4 * http://developer.intel.com/technology/efi/efi.htm 4 * http://www.uefi.org/specs/
5 * http://www.intel.com/technology/efi/
6 *
5 * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com> 7 * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com>
6 * Copyright 2000,2001,2002,2004 Dell Inc. 8 * Copyright 2000,2001,2002,2004 Dell Inc.
7 * 9 *
@@ -92,6 +94,7 @@
92 * 94 *
93 ************************************************************/ 95 ************************************************************/
94#include <linux/crc32.h> 96#include <linux/crc32.h>
97#include <linux/math64.h>
95#include "check.h" 98#include "check.h"
96#include "efi.h" 99#include "efi.h"
97 100
@@ -141,7 +144,8 @@ last_lba(struct block_device *bdev)
141{ 144{
142 if (!bdev || !bdev->bd_inode) 145 if (!bdev || !bdev->bd_inode)
143 return 0; 146 return 0;
144 return (bdev->bd_inode->i_size >> 9) - 1ULL; 147 return div_u64(bdev->bd_inode->i_size,
148 bdev_logical_block_size(bdev)) - 1ULL;
145} 149}
146 150
147static inline int 151static inline int
@@ -188,6 +192,7 @@ static size_t
188read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) 192read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count)
189{ 193{
190 size_t totalreadcount = 0; 194 size_t totalreadcount = 0;
195 sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
191 196
192 if (!bdev || !buffer || lba > last_lba(bdev)) 197 if (!bdev || !buffer || lba > last_lba(bdev))
193 return 0; 198 return 0;
@@ -195,7 +200,7 @@ read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count)
195 while (count) { 200 while (count) {
196 int copied = 512; 201 int copied = 512;
197 Sector sect; 202 Sector sect;
198 unsigned char *data = read_dev_sector(bdev, lba++, &sect); 203 unsigned char *data = read_dev_sector(bdev, n++, &sect);
199 if (!data) 204 if (!data)
200 break; 205 break;
201 if (copied > count) 206 if (copied > count)
@@ -257,15 +262,16 @@ static gpt_header *
257alloc_read_gpt_header(struct block_device *bdev, u64 lba) 262alloc_read_gpt_header(struct block_device *bdev, u64 lba)
258{ 263{
259 gpt_header *gpt; 264 gpt_header *gpt;
265 unsigned ssz = bdev_logical_block_size(bdev);
266
260 if (!bdev) 267 if (!bdev)
261 return NULL; 268 return NULL;
262 269
263 gpt = kzalloc(sizeof (gpt_header), GFP_KERNEL); 270 gpt = kzalloc(ssz, GFP_KERNEL);
264 if (!gpt) 271 if (!gpt)
265 return NULL; 272 return NULL;
266 273
267 if (read_lba(bdev, lba, (u8 *) gpt, 274 if (read_lba(bdev, lba, (u8 *) gpt, ssz) < ssz) {
268 sizeof (gpt_header)) < sizeof (gpt_header)) {
269 kfree(gpt); 275 kfree(gpt);
270 gpt=NULL; 276 gpt=NULL;
271 return NULL; 277 return NULL;
@@ -601,6 +607,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
601 gpt_header *gpt = NULL; 607 gpt_header *gpt = NULL;
602 gpt_entry *ptes = NULL; 608 gpt_entry *ptes = NULL;
603 u32 i; 609 u32 i;
610 unsigned ssz = bdev_logical_block_size(bdev) / 512;
604 611
605 if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) { 612 if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) {
606 kfree(gpt); 613 kfree(gpt);
@@ -611,13 +618,14 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
611 pr_debug("GUID Partition Table is valid! Yea!\n"); 618 pr_debug("GUID Partition Table is valid! Yea!\n");
612 619
613 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { 620 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
621 u64 start = le64_to_cpu(ptes[i].starting_lba);
622 u64 size = le64_to_cpu(ptes[i].ending_lba) -
623 le64_to_cpu(ptes[i].starting_lba) + 1ULL;
624
614 if (!is_pte_valid(&ptes[i], last_lba(bdev))) 625 if (!is_pte_valid(&ptes[i], last_lba(bdev)))
615 continue; 626 continue;
616 627
617 put_partition(state, i+1, le64_to_cpu(ptes[i].starting_lba), 628 put_partition(state, i+1, start * ssz, size * ssz);
618 (le64_to_cpu(ptes[i].ending_lba) -
619 le64_to_cpu(ptes[i].starting_lba) +
620 1ULL));
621 629
622 /* If this is a RAID volume, tell md */ 630 /* If this is a RAID volume, tell md */
623 if (!efi_guidcmp(ptes[i].partition_type_guid, 631 if (!efi_guidcmp(ptes[i].partition_type_guid,
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h
index 2cc89d0475b..6998b589abf 100644
--- a/fs/partitions/efi.h
+++ b/fs/partitions/efi.h
@@ -37,7 +37,6 @@
37#define EFI_PMBR_OSTYPE_EFI 0xEF 37#define EFI_PMBR_OSTYPE_EFI 0xEF
38#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE 38#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE
39 39
40#define GPT_BLOCK_SIZE 512
41#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL 40#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL
42#define GPT_HEADER_REVISION_V1 0x00010000 41#define GPT_HEADER_REVISION_V1 0x00010000
43#define GPT_PRIMARY_PARTITION_TABLE_LBA 1 42#define GPT_PRIMARY_PARTITION_TABLE_LBA 1
@@ -79,7 +78,12 @@ typedef struct _gpt_header {
79 __le32 num_partition_entries; 78 __le32 num_partition_entries;
80 __le32 sizeof_partition_entry; 79 __le32 sizeof_partition_entry;
81 __le32 partition_entry_array_crc32; 80 __le32 partition_entry_array_crc32;
82 u8 reserved2[GPT_BLOCK_SIZE - 92]; 81
82 /* The rest of the logical block is reserved by UEFI and must be zero.
83 * EFI standard handles this by:
84 *
85 * uint8_t reserved2[ BlockSize - 92 ];
86 */
83} __attribute__ ((packed)) gpt_header; 87} __attribute__ ((packed)) gpt_header;
84 88
85typedef struct _gpt_entry_attributes { 89typedef struct _gpt_entry_attributes {
diff --git a/fs/pipe.c b/fs/pipe.c
index ae17d026aaa..37ba29ff315 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode)
906} 906}
907 907
908static struct vfsmount *pipe_mnt __read_mostly; 908static struct vfsmount *pipe_mnt __read_mostly;
909static int pipefs_delete_dentry(struct dentry *dentry)
910{
911 /*
912 * At creation time, we pretended this dentry was hashed
913 * (by clearing DCACHE_UNHASHED bit in d_flags)
914 * At delete time, we restore the truth : not hashed.
915 * (so that dput() can proceed correctly)
916 */
917 dentry->d_flags |= DCACHE_UNHASHED;
918 return 0;
919}
920 909
921/* 910/*
922 * pipefs_dname() is called from d_path(). 911 * pipefs_dname() is called from d_path().
@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
928} 917}
929 918
930static const struct dentry_operations pipefs_dentry_operations = { 919static const struct dentry_operations pipefs_dentry_operations = {
931 .d_delete = pipefs_delete_dentry,
932 .d_dname = pipefs_dname, 920 .d_dname = pipefs_dname,
933}; 921};
934 922
@@ -974,7 +962,7 @@ struct file *create_write_pipe(int flags)
974 int err; 962 int err;
975 struct inode *inode; 963 struct inode *inode;
976 struct file *f; 964 struct file *f;
977 struct dentry *dentry; 965 struct path path;
978 struct qstr name = { .name = "" }; 966 struct qstr name = { .name = "" };
979 967
980 err = -ENFILE; 968 err = -ENFILE;
@@ -983,21 +971,16 @@ struct file *create_write_pipe(int flags)
983 goto err; 971 goto err;
984 972
985 err = -ENOMEM; 973 err = -ENOMEM;
986 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 974 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
987 if (!dentry) 975 if (!path.dentry)
988 goto err_inode; 976 goto err_inode;
977 path.mnt = mntget(pipe_mnt);
989 978
990 dentry->d_op = &pipefs_dentry_operations; 979 path.dentry->d_op = &pipefs_dentry_operations;
991 /* 980 d_instantiate(path.dentry, inode);
992 * We dont want to publish this dentry into global dentry hash table.
993 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
994 * This permits a working /proc/$pid/fd/XXX on pipes
995 */
996 dentry->d_flags &= ~DCACHE_UNHASHED;
997 d_instantiate(dentry, inode);
998 981
999 err = -ENFILE; 982 err = -ENFILE;
1000 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops); 983 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
1001 if (!f) 984 if (!f)
1002 goto err_dentry; 985 goto err_dentry;
1003 f->f_mapping = inode->i_mapping; 986 f->f_mapping = inode->i_mapping;
@@ -1009,7 +992,7 @@ struct file *create_write_pipe(int flags)
1009 992
1010 err_dentry: 993 err_dentry:
1011 free_pipe_info(inode); 994 free_pipe_info(inode);
1012 dput(dentry); 995 path_put(&path);
1013 return ERR_PTR(err); 996 return ERR_PTR(err);
1014 997
1015 err_inode: 998 err_inode:
@@ -1028,20 +1011,14 @@ void free_write_pipe(struct file *f)
1028 1011
1029struct file *create_read_pipe(struct file *wrf, int flags) 1012struct file *create_read_pipe(struct file *wrf, int flags)
1030{ 1013{
1031 struct file *f = get_empty_filp(); 1014 /* Grab pipe from the writer */
1015 struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
1016 &read_pipefifo_fops);
1032 if (!f) 1017 if (!f)
1033 return ERR_PTR(-ENFILE); 1018 return ERR_PTR(-ENFILE);
1034 1019
1035 /* Grab pipe from the writer */
1036 f->f_path = wrf->f_path;
1037 path_get(&wrf->f_path); 1020 path_get(&wrf->f_path);
1038 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1039
1040 f->f_pos = 0;
1041 f->f_flags = O_RDONLY | (flags & O_NONBLOCK); 1021 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1042 f->f_op = &read_pipefifo_fops;
1043 f->f_mode = FMODE_READ;
1044 f->f_version = 0;
1045 1022
1046 return f; 1023 return f;
1047} 1024}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 822c2d50651..13b5d070817 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -134,13 +134,16 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
134 * simple bit tests. 134 * simple bit tests.
135 */ 135 */
136static const char *task_state_array[] = { 136static const char *task_state_array[] = {
137 "R (running)", /* 0 */ 137 "R (running)", /* 0 */
138 "S (sleeping)", /* 1 */ 138 "S (sleeping)", /* 1 */
139 "D (disk sleep)", /* 2 */ 139 "D (disk sleep)", /* 2 */
140 "T (stopped)", /* 4 */ 140 "T (stopped)", /* 4 */
141 "T (tracing stop)", /* 8 */ 141 "t (tracing stop)", /* 8 */
142 "Z (zombie)", /* 16 */ 142 "Z (zombie)", /* 16 */
143 "X (dead)" /* 32 */ 143 "X (dead)", /* 32 */
144 "x (dead)", /* 64 */
145 "K (wakekill)", /* 128 */
146 "W (waking)", /* 256 */
144}; 147};
145 148
146static inline const char *get_task_state(struct task_struct *tsk) 149static inline const char *get_task_state(struct task_struct *tsk)
@@ -148,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk)
148 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; 151 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
149 const char **p = &task_state_array[0]; 152 const char **p = &task_state_array[0];
150 153
154 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
155
151 while (state) { 156 while (state) {
152 p++; 157 p++;
153 state >>= 1; 158 state >>= 1;
@@ -322,93 +327,15 @@ static inline void task_context_switch_counts(struct seq_file *m,
322 p->nivcsw); 327 p->nivcsw);
323} 328}
324 329
325#ifdef CONFIG_MMU 330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
326
327struct stack_stats {
328 struct vm_area_struct *vma;
329 unsigned long startpage;
330 unsigned long usage;
331};
332
333static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
334 unsigned long end, struct mm_walk *walk)
335{
336 struct stack_stats *ss = walk->private;
337 struct vm_area_struct *vma = ss->vma;
338 pte_t *pte, ptent;
339 spinlock_t *ptl;
340 int ret = 0;
341
342 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
343 for (; addr != end; pte++, addr += PAGE_SIZE) {
344 ptent = *pte;
345
346#ifdef CONFIG_STACK_GROWSUP
347 if (pte_present(ptent) || is_swap_pte(ptent))
348 ss->usage = addr - ss->startpage + PAGE_SIZE;
349#else
350 if (pte_present(ptent) || is_swap_pte(ptent)) {
351 ss->usage = ss->startpage - addr + PAGE_SIZE;
352 pte++;
353 ret = 1;
354 break;
355 }
356#endif
357 }
358 pte_unmap_unlock(pte - 1, ptl);
359 cond_resched();
360 return ret;
361}
362
363static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
364 struct task_struct *task)
365{
366 struct stack_stats ss;
367 struct mm_walk stack_walk = {
368 .pmd_entry = stack_usage_pte_range,
369 .mm = vma->vm_mm,
370 .private = &ss,
371 };
372
373 if (!vma->vm_mm || is_vm_hugetlb_page(vma))
374 return 0;
375
376 ss.vma = vma;
377 ss.startpage = task->stack_start & PAGE_MASK;
378 ss.usage = 0;
379
380#ifdef CONFIG_STACK_GROWSUP
381 walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
382 &stack_walk);
383#else
384 walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
385 &stack_walk);
386#endif
387 return ss.usage;
388}
389
390static inline void task_show_stack_usage(struct seq_file *m,
391 struct task_struct *task)
392{
393 struct vm_area_struct *vma;
394 struct mm_struct *mm = get_task_mm(task);
395
396 if (mm) {
397 down_read(&mm->mmap_sem);
398 vma = find_vma(mm, task->stack_start);
399 if (vma)
400 seq_printf(m, "Stack usage:\t%lu kB\n",
401 get_stack_usage_in_bytes(vma, task) >> 10);
402
403 up_read(&mm->mmap_sem);
404 mmput(mm);
405 }
406}
407#else
408static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
409{ 331{
332 seq_printf(m, "Cpus_allowed:\t");
333 seq_cpumask(m, &task->cpus_allowed);
334 seq_printf(m, "\n");
335 seq_printf(m, "Cpus_allowed_list:\t");
336 seq_cpumask_list(m, &task->cpus_allowed);
337 seq_printf(m, "\n");
410} 338}
411#endif /* CONFIG_MMU */
412 339
413int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 340int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
414 struct pid *pid, struct task_struct *task) 341 struct pid *pid, struct task_struct *task)
@@ -424,12 +351,12 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
424 } 351 }
425 task_sig(m, task); 352 task_sig(m, task);
426 task_cap(m, task); 353 task_cap(m, task);
354 task_cpus_allowed(m, task);
427 cpuset_task_status_allowed(m, task); 355 cpuset_task_status_allowed(m, task);
428#if defined(CONFIG_S390) 356#if defined(CONFIG_S390)
429 task_show_regs(m, task); 357 task_show_regs(m, task);
430#endif 358#endif
431 task_context_switch_counts(m, task); 359 task_context_switch_counts(m, task);
432 task_show_stack_usage(m, task);
433 return 0; 360 return 0;
434} 361}
435 362
@@ -495,20 +422,17 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
495 422
496 /* add up live thread stats at the group level */ 423 /* add up live thread stats at the group level */
497 if (whole) { 424 if (whole) {
498 struct task_cputime cputime;
499 struct task_struct *t = task; 425 struct task_struct *t = task;
500 do { 426 do {
501 min_flt += t->min_flt; 427 min_flt += t->min_flt;
502 maj_flt += t->maj_flt; 428 maj_flt += t->maj_flt;
503 gtime = cputime_add(gtime, task_gtime(t)); 429 gtime = cputime_add(gtime, t->gtime);
504 t = next_thread(t); 430 t = next_thread(t);
505 } while (t != task); 431 } while (t != task);
506 432
507 min_flt += sig->min_flt; 433 min_flt += sig->min_flt;
508 maj_flt += sig->maj_flt; 434 maj_flt += sig->maj_flt;
509 thread_group_cputime(task, &cputime); 435 thread_group_times(task, &utime, &stime);
510 utime = cputime.utime;
511 stime = cputime.stime;
512 gtime = cputime_add(gtime, sig->gtime); 436 gtime = cputime_add(gtime, sig->gtime);
513 } 437 }
514 438
@@ -524,9 +448,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
524 if (!whole) { 448 if (!whole) {
525 min_flt = task->min_flt; 449 min_flt = task->min_flt;
526 maj_flt = task->maj_flt; 450 maj_flt = task->maj_flt;
527 utime = task_utime(task); 451 task_times(task, &utime, &stime);
528 stime = task_stime(task); 452 gtime = task->gtime;
529 gtime = task_gtime(task);
530 } 453 }
531 454
532 /* scale priority and nice values from timeslices to -20..20 */ 455 /* scale priority and nice values from timeslices to -20..20 */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index af643b5aefe..e42bbd843ed 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1265,6 +1265,72 @@ static const struct file_operations proc_pid_sched_operations = {
1265 1265
1266#endif 1266#endif
1267 1267
1268static ssize_t comm_write(struct file *file, const char __user *buf,
1269 size_t count, loff_t *offset)
1270{
1271 struct inode *inode = file->f_path.dentry->d_inode;
1272 struct task_struct *p;
1273 char buffer[TASK_COMM_LEN];
1274
1275 memset(buffer, 0, sizeof(buffer));
1276 if (count > sizeof(buffer) - 1)
1277 count = sizeof(buffer) - 1;
1278 if (copy_from_user(buffer, buf, count))
1279 return -EFAULT;
1280
1281 p = get_proc_task(inode);
1282 if (!p)
1283 return -ESRCH;
1284
1285 if (same_thread_group(current, p))
1286 set_task_comm(p, buffer);
1287 else
1288 count = -EINVAL;
1289
1290 put_task_struct(p);
1291
1292 return count;
1293}
1294
1295static int comm_show(struct seq_file *m, void *v)
1296{
1297 struct inode *inode = m->private;
1298 struct task_struct *p;
1299
1300 p = get_proc_task(inode);
1301 if (!p)
1302 return -ESRCH;
1303
1304 task_lock(p);
1305 seq_printf(m, "%s\n", p->comm);
1306 task_unlock(p);
1307
1308 put_task_struct(p);
1309
1310 return 0;
1311}
1312
1313static int comm_open(struct inode *inode, struct file *filp)
1314{
1315 int ret;
1316
1317 ret = single_open(filp, comm_show, NULL);
1318 if (!ret) {
1319 struct seq_file *m = filp->private_data;
1320
1321 m->private = inode;
1322 }
1323 return ret;
1324}
1325
1326static const struct file_operations proc_pid_set_comm_operations = {
1327 .open = comm_open,
1328 .read = seq_read,
1329 .write = comm_write,
1330 .llseek = seq_lseek,
1331 .release = single_release,
1332};
1333
1268/* 1334/*
1269 * We added or removed a vma mapping the executable. The vmas are only mapped 1335 * We added or removed a vma mapping the executable. The vmas are only mapped
1270 * during exec and are not mapped with the mmap system call. 1336 * during exec and are not mapped with the mmap system call.
@@ -1353,7 +1419,6 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1353 goto out; 1419 goto out;
1354 1420
1355 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1356 nd->last_type = LAST_BIND;
1357out: 1422out:
1358 return ERR_PTR(error); 1423 return ERR_PTR(error);
1359} 1424}
@@ -2200,7 +2265,7 @@ static const struct inode_operations proc_attr_dir_inode_operations = {
2200 2265
2201#endif 2266#endif
2202 2267
2203#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2268#ifdef CONFIG_ELF_CORE
2204static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2269static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2205 size_t count, loff_t *ppos) 2270 size_t count, loff_t *ppos)
2206{ 2271{
@@ -2504,6 +2569,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2504#ifdef CONFIG_SCHED_DEBUG 2569#ifdef CONFIG_SCHED_DEBUG
2505 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2570 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2506#endif 2571#endif
2572 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2507#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2573#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2508 INF("syscall", S_IRUSR, proc_pid_syscall), 2574 INF("syscall", S_IRUSR, proc_pid_syscall),
2509#endif 2575#endif
@@ -2556,7 +2622,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2556#ifdef CONFIG_FAULT_INJECTION 2622#ifdef CONFIG_FAULT_INJECTION
2557 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2623 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
2558#endif 2624#endif
2559#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2625#ifdef CONFIG_ELF_CORE
2560 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2626 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
2561#endif 2627#endif
2562#ifdef CONFIG_TASK_IO_ACCOUNTING 2628#ifdef CONFIG_TASK_IO_ACCOUNTING
@@ -2838,6 +2904,7 @@ static const struct pid_entry tid_base_stuff[] = {
2838#ifdef CONFIG_SCHED_DEBUG 2904#ifdef CONFIG_SCHED_DEBUG
2839 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2905 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2840#endif 2906#endif
2907 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2841#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2908#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2842 INF("syscall", S_IRUSR, proc_pid_syscall), 2909 INF("syscall", S_IRUSR, proc_pid_syscall),
2843#endif 2910#endif
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index fa678abc9db..480cb1065ee 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -429,7 +429,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
429 unsigned int ino; 429 unsigned int ino;
430 430
431 ino = de->low_ino; 431 ino = de->low_ino;
432 de_get(de); 432 pde_get(de);
433 spin_unlock(&proc_subdir_lock); 433 spin_unlock(&proc_subdir_lock);
434 error = -EINVAL; 434 error = -EINVAL;
435 inode = proc_get_inode(dir->i_sb, ino, de); 435 inode = proc_get_inode(dir->i_sb, ino, de);
@@ -445,7 +445,7 @@ out_unlock:
445 return NULL; 445 return NULL;
446 } 446 }
447 if (de) 447 if (de)
448 de_put(de); 448 pde_put(de);
449 return ERR_PTR(error); 449 return ERR_PTR(error);
450} 450}
451 451
@@ -509,17 +509,17 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
509 struct proc_dir_entry *next; 509 struct proc_dir_entry *next;
510 510
511 /* filldir passes info to user space */ 511 /* filldir passes info to user space */
512 de_get(de); 512 pde_get(de);
513 spin_unlock(&proc_subdir_lock); 513 spin_unlock(&proc_subdir_lock);
514 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 514 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
515 de->low_ino, de->mode >> 12) < 0) { 515 de->low_ino, de->mode >> 12) < 0) {
516 de_put(de); 516 pde_put(de);
517 goto out; 517 goto out;
518 } 518 }
519 spin_lock(&proc_subdir_lock); 519 spin_lock(&proc_subdir_lock);
520 filp->f_pos++; 520 filp->f_pos++;
521 next = de->next; 521 next = de->next;
522 de_put(de); 522 pde_put(de);
523 de = next; 523 de = next;
524 } while (de); 524 } while (de);
525 spin_unlock(&proc_subdir_lock); 525 spin_unlock(&proc_subdir_lock);
@@ -763,7 +763,7 @@ out:
763 return NULL; 763 return NULL;
764} 764}
765 765
766void free_proc_entry(struct proc_dir_entry *de) 766static void free_proc_entry(struct proc_dir_entry *de)
767{ 767{
768 unsigned int ino = de->low_ino; 768 unsigned int ino = de->low_ino;
769 769
@@ -777,6 +777,12 @@ void free_proc_entry(struct proc_dir_entry *de)
777 kfree(de); 777 kfree(de);
778} 778}
779 779
780void pde_put(struct proc_dir_entry *pde)
781{
782 if (atomic_dec_and_test(&pde->count))
783 free_proc_entry(pde);
784}
785
780/* 786/*
781 * Remove a /proc entry and free it if it's not currently in use. 787 * Remove a /proc entry and free it if it's not currently in use.
782 */ 788 */
@@ -845,6 +851,5 @@ continue_removing:
845 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " 851 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
846 "'%s/%s', leaking at least '%s'\n", __func__, 852 "'%s/%s', leaking at least '%s'\n", __func__,
847 de->parent->name, de->name, de->subdir->name); 853 de->parent->name, de->name, de->subdir->name);
848 if (atomic_dec_and_test(&de->count)) 854 pde_put(de);
849 free_proc_entry(de);
850} 855}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d78ade30554..445a02bcaab 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -24,29 +24,6 @@
24 24
25#include "internal.h" 25#include "internal.h"
26 26
27struct proc_dir_entry *de_get(struct proc_dir_entry *de)
28{
29 atomic_inc(&de->count);
30 return de;
31}
32
33/*
34 * Decrements the use count and checks for deferred deletion.
35 */
36void de_put(struct proc_dir_entry *de)
37{
38 if (!atomic_read(&de->count)) {
39 printk("de_put: entry %s already free!\n", de->name);
40 return;
41 }
42
43 if (atomic_dec_and_test(&de->count))
44 free_proc_entry(de);
45}
46
47/*
48 * Decrement the use count of the proc_dir_entry.
49 */
50static void proc_delete_inode(struct inode *inode) 27static void proc_delete_inode(struct inode *inode)
51{ 28{
52 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
@@ -59,7 +36,7 @@ static void proc_delete_inode(struct inode *inode)
59 /* Let go of any associated proc directory entry */ 36 /* Let go of any associated proc directory entry */
60 de = PROC_I(inode)->pde; 37 de = PROC_I(inode)->pde;
61 if (de) 38 if (de)
62 de_put(de); 39 pde_put(de);
63 if (PROC_I(inode)->sysctl) 40 if (PROC_I(inode)->sysctl)
64 sysctl_head_put(PROC_I(inode)->sysctl); 41 sysctl_head_put(PROC_I(inode)->sysctl);
65 clear_inode(inode); 42 clear_inode(inode);
@@ -480,7 +457,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
480 } 457 }
481 unlock_new_inode(inode); 458 unlock_new_inode(inode);
482 } else 459 } else
483 de_put(de); 460 pde_put(de);
484 return inode; 461 return inode;
485} 462}
486 463
@@ -495,7 +472,7 @@ int proc_fill_super(struct super_block *s)
495 s->s_op = &proc_sops; 472 s->s_op = &proc_sops;
496 s->s_time_gran = 1; 473 s->s_time_gran = 1;
497 474
498 de_get(&proc_root); 475 pde_get(&proc_root);
499 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); 476 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
500 if (!root_inode) 477 if (!root_inode)
501 goto out_no_root; 478 goto out_no_root;
@@ -509,6 +486,6 @@ int proc_fill_super(struct super_block *s)
509out_no_root: 486out_no_root:
510 printk("proc_read_super: get root inode failed\n"); 487 printk("proc_read_super: get root inode failed\n");
511 iput(root_inode); 488 iput(root_inode);
512 de_put(&proc_root); 489 pde_put(&proc_root);
513 return -ENOMEM; 490 return -ENOMEM;
514} 491}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 753ca37002c..1f24a3eddd1 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,8 +61,6 @@ extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 61extern const struct file_operations proc_net_operations;
62extern const struct inode_operations proc_net_inode_operations; 62extern const struct inode_operations proc_net_inode_operations;
63 63
64void free_proc_entry(struct proc_dir_entry *de);
65
66void proc_init_inodecache(void); 64void proc_init_inodecache(void);
67 65
68static inline struct pid *proc_pid(struct inode *inode) 66static inline struct pid *proc_pid(struct inode *inode)
@@ -101,8 +99,12 @@ unsigned long task_vsize(struct mm_struct *);
101int task_statm(struct mm_struct *, int *, int *, int *, int *); 99int task_statm(struct mm_struct *, int *, int *, int *, int *);
102void task_mem(struct seq_file *, struct mm_struct *); 100void task_mem(struct seq_file *, struct mm_struct *);
103 101
104struct proc_dir_entry *de_get(struct proc_dir_entry *de); 102static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
105void de_put(struct proc_dir_entry *de); 103{
104 atomic_inc(&pde->count);
105 return pde;
106}
107void pde_put(struct proc_dir_entry *pde);
106 108
107extern struct vfsmount *proc_mnt; 109extern struct vfsmount *proc_mnt;
108int proc_fill_super(struct super_block *); 110int proc_fill_super(struct super_block *);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 5033ce0d254..180cf5a0bd6 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -8,6 +8,7 @@
8#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/hugetlb.h> 10#include <linux/hugetlb.h>
11#include <linux/kernel-page-flags.h>
11#include <asm/uaccess.h> 12#include <asm/uaccess.h>
12#include "internal.h" 13#include "internal.h"
13 14
@@ -71,52 +72,12 @@ static const struct file_operations proc_kpagecount_operations = {
71 * physical page flags. 72 * physical page flags.
72 */ 73 */
73 74
74/* These macros are used to decouple internal flags from exported ones */
75
76#define KPF_LOCKED 0
77#define KPF_ERROR 1
78#define KPF_REFERENCED 2
79#define KPF_UPTODATE 3
80#define KPF_DIRTY 4
81#define KPF_LRU 5
82#define KPF_ACTIVE 6
83#define KPF_SLAB 7
84#define KPF_WRITEBACK 8
85#define KPF_RECLAIM 9
86#define KPF_BUDDY 10
87
88/* 11-20: new additions in 2.6.31 */
89#define KPF_MMAP 11
90#define KPF_ANON 12
91#define KPF_SWAPCACHE 13
92#define KPF_SWAPBACKED 14
93#define KPF_COMPOUND_HEAD 15
94#define KPF_COMPOUND_TAIL 16
95#define KPF_HUGE 17
96#define KPF_UNEVICTABLE 18
97#define KPF_HWPOISON 19
98#define KPF_NOPAGE 20
99
100#define KPF_KSM 21
101
102/* kernel hacking assistances
103 * WARNING: subject to change, never rely on them!
104 */
105#define KPF_RESERVED 32
106#define KPF_MLOCKED 33
107#define KPF_MAPPEDTODISK 34
108#define KPF_PRIVATE 35
109#define KPF_PRIVATE_2 36
110#define KPF_OWNER_PRIVATE 37
111#define KPF_ARCH 38
112#define KPF_UNCACHED 39
113
114static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) 75static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
115{ 76{
116 return ((kflags >> kbit) & 1) << ubit; 77 return ((kflags >> kbit) & 1) << ubit;
117} 78}
118 79
119static u64 get_uflags(struct page *page) 80u64 stable_page_flags(struct page *page)
120{ 81{
121 u64 k; 82 u64 k;
122 u64 u; 83 u64 u;
@@ -219,7 +180,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
219 else 180 else
220 ppage = NULL; 181 ppage = NULL;
221 182
222 if (put_user(get_uflags(ppage), out)) { 183 if (put_user(stable_page_flags(ppage), out)) {
223 ret = -EFAULT; 184 ret = -EFAULT;
224 break; 185 break;
225 } 186 }
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 7ba79a54948..123257bb356 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/time.h> 8#include <linux/time.h>
9#include <linux/proc_fs.h> 9#include <linux/proc_fs.h>
10#include <linux/seq_file.h>
10#include <linux/stat.h> 11#include <linux/stat.h>
11#include <linux/string.h> 12#include <linux/string.h>
12#include <asm/prom.h> 13#include <asm/prom.h>
@@ -25,26 +26,27 @@ static struct proc_dir_entry *proc_device_tree;
25/* 26/*
26 * Supply data on a read from /proc/device-tree/node/property. 27 * Supply data on a read from /proc/device-tree/node/property.
27 */ 28 */
28static int property_read_proc(char *page, char **start, off_t off, 29static int property_proc_show(struct seq_file *m, void *v)
29 int count, int *eof, void *data)
30{ 30{
31 struct property *pp = data; 31 struct property *pp = m->private;
32 int n;
33 32
34 if (off >= pp->length) { 33 seq_write(m, pp->value, pp->length);
35 *eof = 1; 34 return 0;
36 return 0; 35}
37 } 36
38 n = pp->length - off; 37static int property_proc_open(struct inode *inode, struct file *file)
39 if (n > count) 38{
40 n = count; 39 return single_open(file, property_proc_show, PDE(inode)->data);
41 else
42 *eof = 1;
43 memcpy(page, (char *)pp->value + off, n);
44 *start = page;
45 return n;
46} 40}
47 41
42static const struct file_operations property_proc_fops = {
43 .owner = THIS_MODULE,
44 .open = property_proc_open,
45 .read = seq_read,
46 .llseek = seq_lseek,
47 .release = single_release,
48};
49
48/* 50/*
49 * For a node with a name like "gc@10", we make symlinks called "gc" 51 * For a node with a name like "gc@10", we make symlinks called "gc"
50 * and "@10" to it. 52 * and "@10" to it.
@@ -63,10 +65,9 @@ __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp,
63 * Unfortunately proc_register puts each new entry 65 * Unfortunately proc_register puts each new entry
64 * at the beginning of the list. So we rearrange them. 66 * at the beginning of the list. So we rearrange them.
65 */ 67 */
66 ent = create_proc_read_entry(name, 68 ent = proc_create_data(name,
67 strncmp(name, "security-", 9) 69 strncmp(name, "security-", 9) ? S_IRUGO : S_IRUSR,
68 ? S_IRUGO : S_IRUSR, de, 70 de, &property_proc_fops, pp);
69 property_read_proc, pp);
70 if (ent == NULL) 71 if (ent == NULL)
71 return NULL; 72 return NULL;
72 73
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f667e8aeabd..6ff9981f0a1 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -48,7 +48,7 @@ out:
48static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) 48static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
49{ 49{
50 int len; 50 int len;
51 for ( ; p->ctl_name || p->procname; p++) { 51 for ( ; p->procname; p++) {
52 52
53 if (!p->procname) 53 if (!p->procname)
54 continue; 54 continue;
@@ -218,7 +218,7 @@ static int scan(struct ctl_table_header *head, ctl_table *table,
218 void *dirent, filldir_t filldir) 218 void *dirent, filldir_t filldir)
219{ 219{
220 220
221 for (; table->ctl_name || table->procname; table++, (*pos)++) { 221 for (; table->procname; table++, (*pos)++) {
222 int res; 222 int res;
223 223
224 /* Can't do anything without a proc name */ 224 /* Can't do anything without a proc name */
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 7cc726c6d70..b9b7aad2003 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v)
27 int i, j; 27 int i, j;
28 unsigned long jif; 28 unsigned long jif;
29 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 29 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
30 cputime64_t guest; 30 cputime64_t guest, guest_nice;
31 u64 sum = 0; 31 u64 sum = 0;
32 u64 sum_softirq = 0; 32 u64 sum_softirq = 0;
33 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; 33 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
@@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v)
36 36
37 user = nice = system = idle = iowait = 37 user = nice = system = idle = iowait =
38 irq = softirq = steal = cputime64_zero; 38 irq = softirq = steal = cputime64_zero;
39 guest = cputime64_zero; 39 guest = guest_nice = cputime64_zero;
40 getboottime(&boottime); 40 getboottime(&boottime);
41 jif = boottime.tv_sec; 41 jif = boottime.tv_sec;
42 42
@@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v)
51 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 51 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
52 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 52 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
53 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 53 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
54 guest_nice = cputime64_add(guest_nice,
55 kstat_cpu(i).cpustat.guest_nice);
54 for_each_irq_nr(j) { 56 for_each_irq_nr(j) {
55 sum += kstat_irqs_cpu(j, i); 57 sum += kstat_irqs_cpu(j, i);
56 } 58 }
@@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v)
65 } 67 }
66 sum += arch_irq_stat(); 68 sum += arch_irq_stat();
67 69
68 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", 70 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu "
71 "%llu\n",
69 (unsigned long long)cputime64_to_clock_t(user), 72 (unsigned long long)cputime64_to_clock_t(user),
70 (unsigned long long)cputime64_to_clock_t(nice), 73 (unsigned long long)cputime64_to_clock_t(nice),
71 (unsigned long long)cputime64_to_clock_t(system), 74 (unsigned long long)cputime64_to_clock_t(system),
@@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v)
74 (unsigned long long)cputime64_to_clock_t(irq), 77 (unsigned long long)cputime64_to_clock_t(irq),
75 (unsigned long long)cputime64_to_clock_t(softirq), 78 (unsigned long long)cputime64_to_clock_t(softirq),
76 (unsigned long long)cputime64_to_clock_t(steal), 79 (unsigned long long)cputime64_to_clock_t(steal),
77 (unsigned long long)cputime64_to_clock_t(guest)); 80 (unsigned long long)cputime64_to_clock_t(guest),
81 (unsigned long long)cputime64_to_clock_t(guest_nice));
78 for_each_online_cpu(i) { 82 for_each_online_cpu(i) {
79 83
80 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 84 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
@@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v)
88 softirq = kstat_cpu(i).cpustat.softirq; 92 softirq = kstat_cpu(i).cpustat.softirq;
89 steal = kstat_cpu(i).cpustat.steal; 93 steal = kstat_cpu(i).cpustat.steal;
90 guest = kstat_cpu(i).cpustat.guest; 94 guest = kstat_cpu(i).cpustat.guest;
95 guest_nice = kstat_cpu(i).cpustat.guest_nice;
91 seq_printf(p, 96 seq_printf(p,
92 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", 97 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
98 "%llu\n",
93 i, 99 i,
94 (unsigned long long)cputime64_to_clock_t(user), 100 (unsigned long long)cputime64_to_clock_t(user),
95 (unsigned long long)cputime64_to_clock_t(nice), 101 (unsigned long long)cputime64_to_clock_t(nice),
@@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v)
99 (unsigned long long)cputime64_to_clock_t(irq), 105 (unsigned long long)cputime64_to_clock_t(irq),
100 (unsigned long long)cputime64_to_clock_t(softirq), 106 (unsigned long long)cputime64_to_clock_t(softirq),
101 (unsigned long long)cputime64_to_clock_t(steal), 107 (unsigned long long)cputime64_to_clock_t(steal),
102 (unsigned long long)cputime64_to_clock_t(guest)); 108 (unsigned long long)cputime64_to_clock_t(guest),
109 (unsigned long long)cputime64_to_clock_t(guest_nice));
103 } 110 }
104 seq_printf(p, "intr %llu", (unsigned long long)sum); 111 seq_printf(p, "intr %llu", (unsigned long long)sum);
105 112
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2a1bef9203c..f277c4a111c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -361,12 +361,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
361 if (!pte_present(ptent)) 361 if (!pte_present(ptent))
362 continue; 362 continue;
363 363
364 mss->resident += PAGE_SIZE;
365
366 page = vm_normal_page(vma, addr, ptent); 364 page = vm_normal_page(vma, addr, ptent);
367 if (!page) 365 if (!page)
368 continue; 366 continue;
369 367
368 mss->resident += PAGE_SIZE;
370 /* Accumulate the size in pages that have been accessed. */ 369 /* Accumulate the size in pages that have been accessed. */
371 if (pte_young(ptent) || PageReferenced(page)) 370 if (pte_young(ptent) || PageReferenced(page))
372 mss->referenced += PAGE_SIZE; 371 mss->referenced += PAGE_SIZE;
@@ -650,6 +649,50 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
650 return err; 649 return err;
651} 650}
652 651
652static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
653{
654 u64 pme = 0;
655 if (pte_present(pte))
656 pme = PM_PFRAME(pte_pfn(pte) + offset)
657 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
658 return pme;
659}
660
661static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr,
662 unsigned long end, struct mm_walk *walk)
663{
664 struct vm_area_struct *vma;
665 struct pagemapread *pm = walk->private;
666 struct hstate *hs = NULL;
667 int err = 0;
668
669 vma = find_vma(walk->mm, addr);
670 if (vma)
671 hs = hstate_vma(vma);
672 for (; addr != end; addr += PAGE_SIZE) {
673 u64 pfn = PM_NOT_PRESENT;
674
675 if (vma && (addr >= vma->vm_end)) {
676 vma = find_vma(walk->mm, addr);
677 if (vma)
678 hs = hstate_vma(vma);
679 }
680
681 if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) {
682 /* calculate pfn of the "raw" page in the hugepage. */
683 int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT;
684 pfn = huge_pte_to_pagemap_entry(*pte, offset);
685 }
686 err = add_to_pagemap(addr, pfn, pm);
687 if (err)
688 return err;
689 }
690
691 cond_resched();
692
693 return err;
694}
695
653/* 696/*
654 * /proc/pid/pagemap - an array mapping virtual pages to pfns 697 * /proc/pid/pagemap - an array mapping virtual pages to pfns
655 * 698 *
@@ -742,6 +785,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
742 785
743 pagemap_walk.pmd_entry = pagemap_pte_range; 786 pagemap_walk.pmd_entry = pagemap_pte_range;
744 pagemap_walk.pte_hole = pagemap_pte_hole; 787 pagemap_walk.pte_hole = pagemap_pte_hole;
788 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
745 pagemap_walk.mm = mm; 789 pagemap_walk.mm = mm;
746 pagemap_walk.private = &pm; 790 pagemap_walk.private = &pm;
747 791
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8f5c05d3dbd..5d9fd64ef81 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -110,9 +110,13 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
110 } 110 }
111 } 111 }
112 112
113 size += (*text = mm->end_code - mm->start_code); 113 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
114 size += (*data = mm->start_stack - mm->start_data); 114 >> PAGE_SHIFT;
115 *data = (PAGE_ALIGN(mm->start_stack) - (mm->start_data & PAGE_MASK))
116 >> PAGE_SHIFT;
115 up_read(&mm->mmap_sem); 117 up_read(&mm->mmap_sem);
118 size >>= PAGE_SHIFT;
119 size += *text + *data;
116 *resident = size; 120 *resident = size;
117 return size; 121 return size;
118} 122}
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 0afba069d56..22e0d60e53e 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -17,13 +17,6 @@
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include "qnx4.h" 18#include "qnx4.h"
19 19
20#if 0
21int qnx4_new_block(struct super_block *sb)
22{
23 return 0;
24}
25#endif /* 0 */
26
27static void count_bits(register const char *bmPart, register int size, 20static void count_bits(register const char *bmPart, register int size,
28 int *const tf) 21 int *const tf)
29{ 22{
@@ -35,22 +28,7 @@ static void count_bits(register const char *bmPart, register int size,
35 } 28 }
36 do { 29 do {
37 b = *bmPart++; 30 b = *bmPart++;
38 if ((b & 1) == 0) 31 tot += 8 - hweight8(b);
39 tot++;
40 if ((b & 2) == 0)
41 tot++;
42 if ((b & 4) == 0)
43 tot++;
44 if ((b & 8) == 0)
45 tot++;
46 if ((b & 16) == 0)
47 tot++;
48 if ((b & 32) == 0)
49 tot++;
50 if ((b & 64) == 0)
51 tot++;
52 if ((b & 128) == 0)
53 tot++;
54 size--; 32 size--;
55 } while (size != 0); 33 } while (size != 0);
56 *tf = tot; 34 *tf = tot;
@@ -67,7 +45,7 @@ unsigned long qnx4_count_free_blocks(struct super_block *sb)
67 45
68 while (total < size) { 46 while (total < size) {
69 if ((bh = sb_bread(sb, start + offset)) == NULL) { 47 if ((bh = sb_bread(sb, start + offset)) == NULL) {
70 printk("qnx4: I/O error in counting free blocks\n"); 48 printk(KERN_ERR "qnx4: I/O error in counting free blocks\n");
71 break; 49 break;
72 } 50 }
73 count_bits(bh->b_data, size - total, &total_free); 51 count_bits(bh->b_data, size - total, &total_free);
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 86cc39cb139..6f30c3d5bcb 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -26,8 +26,8 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
26 int ix, ino; 26 int ix, ino;
27 int size; 27 int size;
28 28
29 QNX4DEBUG(("qnx4_readdir:i_size = %ld\n", (long) inode->i_size)); 29 QNX4DEBUG((KERN_INFO "qnx4_readdir:i_size = %ld\n", (long) inode->i_size));
30 QNX4DEBUG(("filp->f_pos = %ld\n", (long) filp->f_pos)); 30 QNX4DEBUG((KERN_INFO "filp->f_pos = %ld\n", (long) filp->f_pos));
31 31
32 lock_kernel(); 32 lock_kernel();
33 33
@@ -50,7 +50,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
50 size = QNX4_NAME_MAX; 50 size = QNX4_NAME_MAX;
51 51
52 if ( ( de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK) ) != 0 ) { 52 if ( ( de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK) ) != 0 ) {
53 QNX4DEBUG(("qnx4_readdir:%.*s\n", size, de->di_fname)); 53 QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname));
54 if ( ( de->di_status & QNX4_FILE_LINK ) == 0 ) 54 if ( ( de->di_status & QNX4_FILE_LINK ) == 0 )
55 ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; 55 ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
56 else { 56 else {
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index d2cd1798d8c..ebf3440d28c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -64,25 +64,7 @@ static struct buffer_head *qnx4_getblk(struct inode *inode, int nr,
64 result = sb_getblk(inode->i_sb, nr); 64 result = sb_getblk(inode->i_sb, nr);
65 return result; 65 return result;
66 } 66 }
67 if (!create) { 67 return NULL;
68 return NULL;
69 }
70#if 0
71 tmp = qnx4_new_block(inode->i_sb);
72 if (!tmp) {
73 return NULL;
74 }
75 result = sb_getblk(inode->i_sb, tmp);
76 if (tst) {
77 qnx4_free_block(inode->i_sb, tmp);
78 brelse(result);
79 goto repeat;
80 }
81 tst = tmp;
82#endif
83 inode->i_ctime = CURRENT_TIME_SEC;
84 mark_inode_dirty(inode);
85 return result;
86} 68}
87 69
88struct buffer_head *qnx4_bread(struct inode *inode, int block, int create) 70struct buffer_head *qnx4_bread(struct inode *inode, int block, int create)
@@ -107,14 +89,12 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h
107{ 89{
108 unsigned long phys; 90 unsigned long phys;
109 91
110 QNX4DEBUG(("qnx4: qnx4_get_block inode=[%ld] iblock=[%ld]\n",inode->i_ino,iblock)); 92 QNX4DEBUG((KERN_INFO "qnx4: qnx4_get_block inode=[%ld] iblock=[%ld]\n",inode->i_ino,iblock));
111 93
112 phys = qnx4_block_map( inode, iblock ); 94 phys = qnx4_block_map( inode, iblock );
113 if ( phys ) { 95 if ( phys ) {
114 // logical block is before EOF 96 // logical block is before EOF
115 map_bh(bh, inode->i_sb, phys); 97 map_bh(bh, inode->i_sb, phys);
116 } else if ( create ) {
117 // to be done.
118 } 98 }
119 return 0; 99 return 0;
120} 100}
@@ -142,12 +122,12 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
142 // read next xtnt block. 122 // read next xtnt block.
143 bh = sb_bread(inode->i_sb, i_xblk - 1); 123 bh = sb_bread(inode->i_sb, i_xblk - 1);
144 if ( !bh ) { 124 if ( !bh ) {
145 QNX4DEBUG(("qnx4: I/O error reading xtnt block [%ld])\n", i_xblk - 1)); 125 QNX4DEBUG((KERN_ERR "qnx4: I/O error reading xtnt block [%ld])\n", i_xblk - 1));
146 return -EIO; 126 return -EIO;
147 } 127 }
148 xblk = (struct qnx4_xblk*)bh->b_data; 128 xblk = (struct qnx4_xblk*)bh->b_data;
149 if ( memcmp( xblk->xblk_signature, "IamXblk", 7 ) ) { 129 if ( memcmp( xblk->xblk_signature, "IamXblk", 7 ) ) {
150 QNX4DEBUG(("qnx4: block at %ld is not a valid xtnt\n", qnx4_inode->i_xblk)); 130 QNX4DEBUG((KERN_ERR "qnx4: block at %ld is not a valid xtnt\n", qnx4_inode->i_xblk));
151 return -EIO; 131 return -EIO;
152 } 132 }
153 } 133 }
@@ -168,7 +148,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
168 brelse( bh ); 148 brelse( bh );
169 } 149 }
170 150
171 QNX4DEBUG(("qnx4: mapping block %ld of inode %ld = %ld\n",iblock,inode->i_ino,block)); 151 QNX4DEBUG((KERN_INFO "qnx4: mapping block %ld of inode %ld = %ld\n",iblock,inode->i_ino,block));
172 return block; 152 return block;
173} 153}
174 154
@@ -209,7 +189,7 @@ static const char *qnx4_checkroot(struct super_block *sb)
209 if (*(qnx4_sb(sb)->sb->RootDir.di_fname) != '/') { 189 if (*(qnx4_sb(sb)->sb->RootDir.di_fname) != '/') {
210 return "no qnx4 filesystem (no root dir)."; 190 return "no qnx4 filesystem (no root dir).";
211 } else { 191 } else {
212 QNX4DEBUG(("QNX4 filesystem found on dev %s.\n", sb->s_id)); 192 QNX4DEBUG((KERN_NOTICE "QNX4 filesystem found on dev %s.\n", sb->s_id));
213 rd = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_blk) - 1; 193 rd = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_blk) - 1;
214 rl = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_size); 194 rl = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_size);
215 for (j = 0; j < rl; j++) { 195 for (j = 0; j < rl; j++) {
@@ -220,7 +200,7 @@ static const char *qnx4_checkroot(struct super_block *sb)
220 for (i = 0; i < QNX4_INODES_PER_BLOCK; i++) { 200 for (i = 0; i < QNX4_INODES_PER_BLOCK; i++) {
221 rootdir = (struct qnx4_inode_entry *) (bh->b_data + i * QNX4_DIR_ENTRY_SIZE); 201 rootdir = (struct qnx4_inode_entry *) (bh->b_data + i * QNX4_DIR_ENTRY_SIZE);
222 if (rootdir->di_fname != NULL) { 202 if (rootdir->di_fname != NULL) {
223 QNX4DEBUG(("Rootdir entry found : [%s]\n", rootdir->di_fname)); 203 QNX4DEBUG((KERN_INFO "rootdir entry found : [%s]\n", rootdir->di_fname));
224 if (!strncmp(rootdir->di_fname, QNX4_BMNAME, sizeof QNX4_BMNAME)) { 204 if (!strncmp(rootdir->di_fname, QNX4_BMNAME, sizeof QNX4_BMNAME)) {
225 found = 1; 205 found = 1;
226 qnx4_sb(sb)->BitMap = kmalloc( sizeof( struct qnx4_inode_entry ), GFP_KERNEL ); 206 qnx4_sb(sb)->BitMap = kmalloc( sizeof( struct qnx4_inode_entry ), GFP_KERNEL );
@@ -265,12 +245,12 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
265 if we don't belong here... */ 245 if we don't belong here... */
266 bh = sb_bread(s, 1); 246 bh = sb_bread(s, 1);
267 if (!bh) { 247 if (!bh) {
268 printk("qnx4: unable to read the superblock\n"); 248 printk(KERN_ERR "qnx4: unable to read the superblock\n");
269 goto outnobh; 249 goto outnobh;
270 } 250 }
271 if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) { 251 if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) {
272 if (!silent) 252 if (!silent)
273 printk("qnx4: wrong fsid in superblock.\n"); 253 printk(KERN_ERR "qnx4: wrong fsid in superblock.\n");
274 goto out; 254 goto out;
275 } 255 }
276 s->s_op = &qnx4_sops; 256 s->s_op = &qnx4_sops;
@@ -284,14 +264,14 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
284 errmsg = qnx4_checkroot(s); 264 errmsg = qnx4_checkroot(s);
285 if (errmsg != NULL) { 265 if (errmsg != NULL) {
286 if (!silent) 266 if (!silent)
287 printk("qnx4: %s\n", errmsg); 267 printk(KERN_ERR "qnx4: %s\n", errmsg);
288 goto out; 268 goto out;
289 } 269 }
290 270
291 /* does root not have inode number QNX4_ROOT_INO ?? */ 271 /* does root not have inode number QNX4_ROOT_INO ?? */
292 root = qnx4_iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK); 272 root = qnx4_iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK);
293 if (IS_ERR(root)) { 273 if (IS_ERR(root)) {
294 printk("qnx4: get inode failed\n"); 274 printk(KERN_ERR "qnx4: get inode failed\n");
295 ret = PTR_ERR(root); 275 ret = PTR_ERR(root);
296 goto out; 276 goto out;
297 } 277 }
@@ -374,7 +354,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
374 qnx4_inode = qnx4_raw_inode(inode); 354 qnx4_inode = qnx4_raw_inode(inode);
375 inode->i_mode = 0; 355 inode->i_mode = 0;
376 356
377 QNX4DEBUG(("Reading inode : [%d]\n", ino)); 357 QNX4DEBUG((KERN_INFO "reading inode : [%d]\n", ino));
378 if (!ino) { 358 if (!ino) {
379 printk(KERN_ERR "qnx4: bad inode number on dev %s: %lu is " 359 printk(KERN_ERR "qnx4: bad inode number on dev %s: %lu is "
380 "out of range\n", 360 "out of range\n",
@@ -385,7 +365,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
385 block = ino / QNX4_INODES_PER_BLOCK; 365 block = ino / QNX4_INODES_PER_BLOCK;
386 366
387 if (!(bh = sb_bread(sb, block))) { 367 if (!(bh = sb_bread(sb, block))) {
388 printk("qnx4: major problem: unable to read inode from dev " 368 printk(KERN_ERR "qnx4: major problem: unable to read inode from dev "
389 "%s\n", sb->s_id); 369 "%s\n", sb->s_id);
390 iget_failed(inode); 370 iget_failed(inode);
391 return ERR_PTR(-EIO); 371 return ERR_PTR(-EIO);
@@ -499,7 +479,7 @@ static int __init init_qnx4_fs(void)
499 return err; 479 return err;
500 } 480 }
501 481
502 printk("QNX4 filesystem 0.2.3 registered.\n"); 482 printk(KERN_INFO "QNX4 filesystem 0.2.3 registered.\n");
503 return 0; 483 return 0;
504} 484}
505 485
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index ae1e7edbacd..58703ebba87 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -30,7 +30,7 @@ static int qnx4_match(int len, const char *name,
30 int namelen, thislen; 30 int namelen, thislen;
31 31
32 if (bh == NULL) { 32 if (bh == NULL) {
33 printk("qnx4: matching unassigned buffer !\n"); 33 printk(KERN_WARNING "qnx4: matching unassigned buffer !\n");
34 return 0; 34 return 0;
35 } 35 }
36 de = (struct qnx4_inode_entry *) (bh->b_data + *offset); 36 de = (struct qnx4_inode_entry *) (bh->b_data + *offset);
@@ -66,7 +66,7 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
66 66
67 *res_dir = NULL; 67 *res_dir = NULL;
68 if (!dir->i_sb) { 68 if (!dir->i_sb) {
69 printk("qnx4: no superblock on dir.\n"); 69 printk(KERN_WARNING "qnx4: no superblock on dir.\n");
70 return NULL; 70 return NULL;
71 } 71 }
72 bh = NULL; 72 bh = NULL;
@@ -124,7 +124,7 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nam
124 foundinode = qnx4_iget(dir->i_sb, ino); 124 foundinode = qnx4_iget(dir->i_sb, ino);
125 if (IS_ERR(foundinode)) { 125 if (IS_ERR(foundinode)) {
126 unlock_kernel(); 126 unlock_kernel();
127 QNX4DEBUG(("qnx4: lookup->iget -> error %ld\n", 127 QNX4DEBUG((KERN_ERR "qnx4: lookup->iget -> error %ld\n",
128 PTR_ERR(foundinode))); 128 PTR_ERR(foundinode)));
129 return ERR_CAST(foundinode); 129 return ERR_CAST(foundinode);
130 } 130 }
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 8047e01ef46..efc02ebb8c7 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -17,7 +17,7 @@ config QUOTA
17 17
18config QUOTA_NETLINK_INTERFACE 18config QUOTA_NETLINK_INTERFACE
19 bool "Report quota messages through netlink interface" 19 bool "Report quota messages through netlink interface"
20 depends on QUOTA && NET 20 depends on QUOTACTL && NET
21 help 21 help
22 If you say Y here, quota warnings (about exceeding softlimit, reaching 22 If you say Y here, quota warnings (about exceeding softlimit, reaching
23 hardlimit, etc.) will be reported through netlink interface. If unsure, 23 hardlimit, etc.) will be reported through netlink interface. If unsure,
@@ -46,12 +46,14 @@ config QFMT_V1
46 format say Y here. 46 format say Y here.
47 47
48config QFMT_V2 48config QFMT_V2
49 tristate "Quota format v2 support" 49 tristate "Quota format vfsv0 and vfsv1 support"
50 depends on QUOTA 50 depends on QUOTA
51 select QUOTA_TREE 51 select QUOTA_TREE
52 help 52 help
53 This quota format allows using quotas with 32-bit UIDs/GIDs. If you 53 This config option enables kernel support for vfsv0 and vfsv1 quota
54 need this functionality say Y here. 54 formats. Both these formats support 32-bit UIDs/GIDs and vfsv1 format
55 also supports 64-bit inode and block quota limits. If you need this
56 functionality say Y here.
55 57
56config QUOTACTL 58config QUOTACTL
57 bool 59 bool
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 39b49c42a7e..3fc62b097be 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -77,10 +77,6 @@
77#include <linux/capability.h> 77#include <linux/capability.h>
78#include <linux/quotaops.h> 78#include <linux/quotaops.h>
79#include <linux/writeback.h> /* for inode_lock, oddly enough.. */ 79#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
80#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
81#include <net/netlink.h>
82#include <net/genetlink.h>
83#endif
84 80
85#include <asm/uaccess.h> 81#include <asm/uaccess.h>
86 82
@@ -327,6 +323,30 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
327} 323}
328EXPORT_SYMBOL(dquot_mark_dquot_dirty); 324EXPORT_SYMBOL(dquot_mark_dquot_dirty);
329 325
326/* Dirtify all the dquots - this can block when journalling */
327static inline int mark_all_dquot_dirty(struct dquot * const *dquot)
328{
329 int ret, err, cnt;
330
331 ret = err = 0;
332 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
333 if (dquot[cnt])
334 /* Even in case of error we have to continue */
335 ret = mark_dquot_dirty(dquot[cnt]);
336 if (!err)
337 err = ret;
338 }
339 return err;
340}
341
342static inline void dqput_all(struct dquot **dquot)
343{
344 unsigned int cnt;
345
346 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
347 dqput(dquot[cnt]);
348}
349
330/* This function needs dq_list_lock */ 350/* This function needs dq_list_lock */
331static inline int clear_dquot_dirty(struct dquot *dquot) 351static inline int clear_dquot_dirty(struct dquot *dquot)
332{ 352{
@@ -1071,73 +1091,6 @@ static void print_warning(struct dquot *dquot, const int warntype)
1071} 1091}
1072#endif 1092#endif
1073 1093
1074#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
1075
1076/* Netlink family structure for quota */
1077static struct genl_family quota_genl_family = {
1078 .id = GENL_ID_GENERATE,
1079 .hdrsize = 0,
1080 .name = "VFS_DQUOT",
1081 .version = 1,
1082 .maxattr = QUOTA_NL_A_MAX,
1083};
1084
1085/* Send warning to userspace about user which exceeded quota */
1086static void send_warning(const struct dquot *dquot, const char warntype)
1087{
1088 static atomic_t seq;
1089 struct sk_buff *skb;
1090 void *msg_head;
1091 int ret;
1092 int msg_size = 4 * nla_total_size(sizeof(u32)) +
1093 2 * nla_total_size(sizeof(u64));
1094
1095 /* We have to allocate using GFP_NOFS as we are called from a
1096 * filesystem performing write and thus further recursion into
1097 * the fs to free some data could cause deadlocks. */
1098 skb = genlmsg_new(msg_size, GFP_NOFS);
1099 if (!skb) {
1100 printk(KERN_ERR
1101 "VFS: Not enough memory to send quota warning.\n");
1102 return;
1103 }
1104 msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
1105 &quota_genl_family, 0, QUOTA_NL_C_WARNING);
1106 if (!msg_head) {
1107 printk(KERN_ERR
1108 "VFS: Cannot store netlink header in quota warning.\n");
1109 goto err_out;
1110 }
1111 ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
1112 if (ret)
1113 goto attr_err_out;
1114 ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
1115 if (ret)
1116 goto attr_err_out;
1117 ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
1118 if (ret)
1119 goto attr_err_out;
1120 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
1121 MAJOR(dquot->dq_sb->s_dev));
1122 if (ret)
1123 goto attr_err_out;
1124 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
1125 MINOR(dquot->dq_sb->s_dev));
1126 if (ret)
1127 goto attr_err_out;
1128 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
1129 if (ret)
1130 goto attr_err_out;
1131 genlmsg_end(skb, msg_head);
1132
1133 genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
1134 return;
1135attr_err_out:
1136 printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
1137err_out:
1138 kfree_skb(skb);
1139}
1140#endif
1141/* 1094/*
1142 * Write warnings to the console and send warning messages over netlink. 1095 * Write warnings to the console and send warning messages over netlink.
1143 * 1096 *
@@ -1145,18 +1098,20 @@ err_out:
1145 */ 1098 */
1146static void flush_warnings(struct dquot *const *dquots, char *warntype) 1099static void flush_warnings(struct dquot *const *dquots, char *warntype)
1147{ 1100{
1101 struct dquot *dq;
1148 int i; 1102 int i;
1149 1103
1150 for (i = 0; i < MAXQUOTAS; i++) 1104 for (i = 0; i < MAXQUOTAS; i++) {
1151 if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN && 1105 dq = dquots[i];
1152 !warning_issued(dquots[i], warntype[i])) { 1106 if (dq && warntype[i] != QUOTA_NL_NOWARN &&
1107 !warning_issued(dq, warntype[i])) {
1153#ifdef CONFIG_PRINT_QUOTA_WARNING 1108#ifdef CONFIG_PRINT_QUOTA_WARNING
1154 print_warning(dquots[i], warntype[i]); 1109 print_warning(dq, warntype[i]);
1155#endif
1156#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
1157 send_warning(dquots[i], warntype[i]);
1158#endif 1110#endif
1111 quota_send_warning(dq->dq_type, dq->dq_id,
1112 dq->dq_sb->s_dev, warntype[i]);
1159 } 1113 }
1114 }
1160} 1115}
1161 1116
1162static int ignore_hardlimit(struct dquot *dquot) 1117static int ignore_hardlimit(struct dquot *dquot)
@@ -1337,8 +1292,7 @@ int dquot_initialize(struct inode *inode, int type)
1337out_err: 1292out_err:
1338 up_write(&sb_dqopt(sb)->dqptr_sem); 1293 up_write(&sb_dqopt(sb)->dqptr_sem);
1339 /* Drop unused references */ 1294 /* Drop unused references */
1340 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1295 dqput_all(got);
1341 dqput(got[cnt]);
1342 return ret; 1296 return ret;
1343} 1297}
1344EXPORT_SYMBOL(dquot_initialize); 1298EXPORT_SYMBOL(dquot_initialize);
@@ -1357,9 +1311,7 @@ int dquot_drop(struct inode *inode)
1357 inode->i_dquot[cnt] = NULL; 1311 inode->i_dquot[cnt] = NULL;
1358 } 1312 }
1359 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1313 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1360 1314 dqput_all(put);
1361 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1362 dqput(put[cnt]);
1363 return 0; 1315 return 0;
1364} 1316}
1365EXPORT_SYMBOL(dquot_drop); 1317EXPORT_SYMBOL(dquot_drop);
@@ -1388,6 +1340,70 @@ void vfs_dq_drop(struct inode *inode)
1388EXPORT_SYMBOL(vfs_dq_drop); 1340EXPORT_SYMBOL(vfs_dq_drop);
1389 1341
1390/* 1342/*
1343 * inode_reserved_space is managed internally by quota, and protected by
1344 * i_lock similar to i_blocks+i_bytes.
1345 */
1346static qsize_t *inode_reserved_space(struct inode * inode)
1347{
1348 /* Filesystem must explicitly define it's own method in order to use
1349 * quota reservation interface */
1350 BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
1351 return inode->i_sb->dq_op->get_reserved_space(inode);
1352}
1353
1354static void inode_add_rsv_space(struct inode *inode, qsize_t number)
1355{
1356 spin_lock(&inode->i_lock);
1357 *inode_reserved_space(inode) += number;
1358 spin_unlock(&inode->i_lock);
1359}
1360
1361
1362static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
1363{
1364 spin_lock(&inode->i_lock);
1365 *inode_reserved_space(inode) -= number;
1366 __inode_add_bytes(inode, number);
1367 spin_unlock(&inode->i_lock);
1368}
1369
1370static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
1371{
1372 spin_lock(&inode->i_lock);
1373 *inode_reserved_space(inode) -= number;
1374 spin_unlock(&inode->i_lock);
1375}
1376
1377static qsize_t inode_get_rsv_space(struct inode *inode)
1378{
1379 qsize_t ret;
1380
1381 if (!inode->i_sb->dq_op->get_reserved_space)
1382 return 0;
1383 spin_lock(&inode->i_lock);
1384 ret = *inode_reserved_space(inode);
1385 spin_unlock(&inode->i_lock);
1386 return ret;
1387}
1388
1389static void inode_incr_space(struct inode *inode, qsize_t number,
1390 int reserve)
1391{
1392 if (reserve)
1393 inode_add_rsv_space(inode, number);
1394 else
1395 inode_add_bytes(inode, number);
1396}
1397
1398static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
1399{
1400 if (reserve)
1401 inode_sub_rsv_space(inode, number);
1402 else
1403 inode_sub_bytes(inode, number);
1404}
1405
1406/*
1391 * Following four functions update i_blocks+i_bytes fields and 1407 * Following four functions update i_blocks+i_bytes fields and
1392 * quota information (together with appropriate checks) 1408 * quota information (together with appropriate checks)
1393 * NOTE: We absolutely rely on the fact that caller dirties 1409 * NOTE: We absolutely rely on the fact that caller dirties
@@ -1405,6 +1421,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1405 int cnt, ret = QUOTA_OK; 1421 int cnt, ret = QUOTA_OK;
1406 char warntype[MAXQUOTAS]; 1422 char warntype[MAXQUOTAS];
1407 1423
1424 /*
1425 * First test before acquiring mutex - solves deadlocks when we
1426 * re-enter the quota code and are already holding the mutex
1427 */
1428 if (IS_NOQUOTA(inode)) {
1429 inode_incr_space(inode, number, reserve);
1430 goto out;
1431 }
1432
1433 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1434 if (IS_NOQUOTA(inode)) {
1435 inode_incr_space(inode, number, reserve);
1436 goto out_unlock;
1437 }
1438
1408 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1439 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1409 warntype[cnt] = QUOTA_NL_NOWARN; 1440 warntype[cnt] = QUOTA_NL_NOWARN;
1410 1441
@@ -1415,7 +1446,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1415 if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) 1446 if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
1416 == NO_QUOTA) { 1447 == NO_QUOTA) {
1417 ret = NO_QUOTA; 1448 ret = NO_QUOTA;
1418 goto out_unlock; 1449 spin_unlock(&dq_data_lock);
1450 goto out_flush_warn;
1419 } 1451 }
1420 } 1452 }
1421 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1453 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1426,64 +1458,29 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1426 else 1458 else
1427 dquot_incr_space(inode->i_dquot[cnt], number); 1459 dquot_incr_space(inode->i_dquot[cnt], number);
1428 } 1460 }
1429 if (!reserve) 1461 inode_incr_space(inode, number, reserve);
1430 inode_add_bytes(inode, number);
1431out_unlock:
1432 spin_unlock(&dq_data_lock); 1462 spin_unlock(&dq_data_lock);
1463
1464 if (reserve)
1465 goto out_flush_warn;
1466 mark_all_dquot_dirty(inode->i_dquot);
1467out_flush_warn:
1433 flush_warnings(inode->i_dquot, warntype); 1468 flush_warnings(inode->i_dquot, warntype);
1469out_unlock:
1470 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1471out:
1434 return ret; 1472 return ret;
1435} 1473}
1436 1474
1437int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) 1475int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
1438{ 1476{
1439 int cnt, ret = QUOTA_OK; 1477 return __dquot_alloc_space(inode, number, warn, 0);
1440
1441 /*
1442 * First test before acquiring mutex - solves deadlocks when we
1443 * re-enter the quota code and are already holding the mutex
1444 */
1445 if (IS_NOQUOTA(inode)) {
1446 inode_add_bytes(inode, number);
1447 goto out;
1448 }
1449
1450 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1451 if (IS_NOQUOTA(inode)) {
1452 inode_add_bytes(inode, number);
1453 goto out_unlock;
1454 }
1455
1456 ret = __dquot_alloc_space(inode, number, warn, 0);
1457 if (ret == NO_QUOTA)
1458 goto out_unlock;
1459
1460 /* Dirtify all the dquots - this can block when journalling */
1461 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1462 if (inode->i_dquot[cnt])
1463 mark_dquot_dirty(inode->i_dquot[cnt]);
1464out_unlock:
1465 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1466out:
1467 return ret;
1468} 1478}
1469EXPORT_SYMBOL(dquot_alloc_space); 1479EXPORT_SYMBOL(dquot_alloc_space);
1470 1480
1471int dquot_reserve_space(struct inode *inode, qsize_t number, int warn) 1481int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
1472{ 1482{
1473 int ret = QUOTA_OK; 1483 return __dquot_alloc_space(inode, number, warn, 1);
1474
1475 if (IS_NOQUOTA(inode))
1476 goto out;
1477
1478 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1479 if (IS_NOQUOTA(inode))
1480 goto out_unlock;
1481
1482 ret = __dquot_alloc_space(inode, number, warn, 1);
1483out_unlock:
1484 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1485out:
1486 return ret;
1487} 1484}
1488EXPORT_SYMBOL(dquot_reserve_space); 1485EXPORT_SYMBOL(dquot_reserve_space);
1489 1486
@@ -1524,10 +1521,7 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
1524warn_put_all: 1521warn_put_all:
1525 spin_unlock(&dq_data_lock); 1522 spin_unlock(&dq_data_lock);
1526 if (ret == QUOTA_OK) 1523 if (ret == QUOTA_OK)
1527 /* Dirtify all the dquots - this can block when journalling */ 1524 mark_all_dquot_dirty(inode->i_dquot);
1528 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1529 if (inode->i_dquot[cnt])
1530 mark_dquot_dirty(inode->i_dquot[cnt]);
1531 flush_warnings(inode->i_dquot, warntype); 1525 flush_warnings(inode->i_dquot, warntype);
1532 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1526 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1533 return ret; 1527 return ret;
@@ -1540,14 +1534,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
1540 int ret = QUOTA_OK; 1534 int ret = QUOTA_OK;
1541 1535
1542 if (IS_NOQUOTA(inode)) { 1536 if (IS_NOQUOTA(inode)) {
1543 inode_add_bytes(inode, number); 1537 inode_claim_rsv_space(inode, number);
1544 goto out; 1538 goto out;
1545 } 1539 }
1546 1540
1547 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1541 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1548 if (IS_NOQUOTA(inode)) { 1542 if (IS_NOQUOTA(inode)) {
1549 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1543 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1550 inode_add_bytes(inode, number); 1544 inode_claim_rsv_space(inode, number);
1551 goto out; 1545 goto out;
1552 } 1546 }
1553 1547
@@ -1559,12 +1553,9 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
1559 number); 1553 number);
1560 } 1554 }
1561 /* Update inode bytes */ 1555 /* Update inode bytes */
1562 inode_add_bytes(inode, number); 1556 inode_claim_rsv_space(inode, number);
1563 spin_unlock(&dq_data_lock); 1557 spin_unlock(&dq_data_lock);
1564 /* Dirtify all the dquots - this can block when journalling */ 1558 mark_all_dquot_dirty(inode->i_dquot);
1565 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1566 if (inode->i_dquot[cnt])
1567 mark_dquot_dirty(inode->i_dquot[cnt]);
1568 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1559 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1569out: 1560out:
1570 return ret; 1561 return ret;
@@ -1572,38 +1563,9 @@ out:
1572EXPORT_SYMBOL(dquot_claim_space); 1563EXPORT_SYMBOL(dquot_claim_space);
1573 1564
1574/* 1565/*
1575 * Release reserved quota space
1576 */
1577void dquot_release_reserved_space(struct inode *inode, qsize_t number)
1578{
1579 int cnt;
1580
1581 if (IS_NOQUOTA(inode))
1582 goto out;
1583
1584 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1585 if (IS_NOQUOTA(inode))
1586 goto out_unlock;
1587
1588 spin_lock(&dq_data_lock);
1589 /* Release reserved dquots */
1590 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1591 if (inode->i_dquot[cnt])
1592 dquot_free_reserved_space(inode->i_dquot[cnt], number);
1593 }
1594 spin_unlock(&dq_data_lock);
1595
1596out_unlock:
1597 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1598out:
1599 return;
1600}
1601EXPORT_SYMBOL(dquot_release_reserved_space);
1602
1603/*
1604 * This operation can block, but only after everything is updated 1566 * This operation can block, but only after everything is updated
1605 */ 1567 */
1606int dquot_free_space(struct inode *inode, qsize_t number) 1568int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
1607{ 1569{
1608 unsigned int cnt; 1570 unsigned int cnt;
1609 char warntype[MAXQUOTAS]; 1571 char warntype[MAXQUOTAS];
@@ -1612,7 +1574,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
1612 * re-enter the quota code and are already holding the mutex */ 1574 * re-enter the quota code and are already holding the mutex */
1613 if (IS_NOQUOTA(inode)) { 1575 if (IS_NOQUOTA(inode)) {
1614out_sub: 1576out_sub:
1615 inode_sub_bytes(inode, number); 1577 inode_decr_space(inode, number, reserve);
1616 return QUOTA_OK; 1578 return QUOTA_OK;
1617 } 1579 }
1618 1580
@@ -1627,21 +1589,40 @@ out_sub:
1627 if (!inode->i_dquot[cnt]) 1589 if (!inode->i_dquot[cnt])
1628 continue; 1590 continue;
1629 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); 1591 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
1630 dquot_decr_space(inode->i_dquot[cnt], number); 1592 if (reserve)
1593 dquot_free_reserved_space(inode->i_dquot[cnt], number);
1594 else
1595 dquot_decr_space(inode->i_dquot[cnt], number);
1631 } 1596 }
1632 inode_sub_bytes(inode, number); 1597 inode_decr_space(inode, number, reserve);
1633 spin_unlock(&dq_data_lock); 1598 spin_unlock(&dq_data_lock);
1634 /* Dirtify all the dquots - this can block when journalling */ 1599
1635 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1600 if (reserve)
1636 if (inode->i_dquot[cnt]) 1601 goto out_unlock;
1637 mark_dquot_dirty(inode->i_dquot[cnt]); 1602 mark_all_dquot_dirty(inode->i_dquot);
1603out_unlock:
1638 flush_warnings(inode->i_dquot, warntype); 1604 flush_warnings(inode->i_dquot, warntype);
1639 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1605 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1640 return QUOTA_OK; 1606 return QUOTA_OK;
1641} 1607}
1608
1609int dquot_free_space(struct inode *inode, qsize_t number)
1610{
1611 return __dquot_free_space(inode, number, 0);
1612}
1642EXPORT_SYMBOL(dquot_free_space); 1613EXPORT_SYMBOL(dquot_free_space);
1643 1614
1644/* 1615/*
1616 * Release reserved quota space
1617 */
1618void dquot_release_reserved_space(struct inode *inode, qsize_t number)
1619{
1620 __dquot_free_space(inode, number, 1);
1621
1622}
1623EXPORT_SYMBOL(dquot_release_reserved_space);
1624
1625/*
1645 * This operation can block, but only after everything is updated 1626 * This operation can block, but only after everything is updated
1646 */ 1627 */
1647int dquot_free_inode(const struct inode *inode, qsize_t number) 1628int dquot_free_inode(const struct inode *inode, qsize_t number)
@@ -1668,10 +1649,7 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
1668 dquot_decr_inodes(inode->i_dquot[cnt], number); 1649 dquot_decr_inodes(inode->i_dquot[cnt], number);
1669 } 1650 }
1670 spin_unlock(&dq_data_lock); 1651 spin_unlock(&dq_data_lock);
1671 /* Dirtify all the dquots - this can block when journalling */ 1652 mark_all_dquot_dirty(inode->i_dquot);
1672 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1673 if (inode->i_dquot[cnt])
1674 mark_dquot_dirty(inode->i_dquot[cnt]);
1675 flush_warnings(inode->i_dquot, warntype); 1653 flush_warnings(inode->i_dquot, warntype);
1676 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1654 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1677 return QUOTA_OK; 1655 return QUOTA_OK;
@@ -1679,19 +1657,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
1679EXPORT_SYMBOL(dquot_free_inode); 1657EXPORT_SYMBOL(dquot_free_inode);
1680 1658
1681/* 1659/*
1682 * call back function, get reserved quota space from underlying fs
1683 */
1684qsize_t dquot_get_reserved_space(struct inode *inode)
1685{
1686 qsize_t reserved_space = 0;
1687
1688 if (sb_any_quota_active(inode->i_sb) &&
1689 inode->i_sb->dq_op->get_reserved_space)
1690 reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
1691 return reserved_space;
1692}
1693
1694/*
1695 * Transfer the number of inode and blocks from one diskquota to an other. 1660 * Transfer the number of inode and blocks from one diskquota to an other.
1696 * 1661 *
1697 * This operation can block, but only after everything is updated 1662 * This operation can block, but only after everything is updated
@@ -1734,7 +1699,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1734 } 1699 }
1735 spin_lock(&dq_data_lock); 1700 spin_lock(&dq_data_lock);
1736 cur_space = inode_get_bytes(inode); 1701 cur_space = inode_get_bytes(inode);
1737 rsv_space = dquot_get_reserved_space(inode); 1702 rsv_space = inode_get_rsv_space(inode);
1738 space = cur_space + rsv_space; 1703 space = cur_space + rsv_space;
1739 /* Build the transfer_from list and check the limits */ 1704 /* Build the transfer_from list and check the limits */
1740 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1705 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1778,25 +1743,18 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1778 spin_unlock(&dq_data_lock); 1743 spin_unlock(&dq_data_lock);
1779 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1744 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1780 1745
1781 /* Dirtify all the dquots - this can block when journalling */ 1746 mark_all_dquot_dirty(transfer_from);
1782 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1747 mark_all_dquot_dirty(transfer_to);
1783 if (transfer_from[cnt]) 1748 /* The reference we got is transferred to the inode */
1784 mark_dquot_dirty(transfer_from[cnt]); 1749 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1785 if (transfer_to[cnt]) { 1750 transfer_to[cnt] = NULL;
1786 mark_dquot_dirty(transfer_to[cnt]);
1787 /* The reference we got is transferred to the inode */
1788 transfer_to[cnt] = NULL;
1789 }
1790 }
1791warn_put_all: 1751warn_put_all:
1792 flush_warnings(transfer_to, warntype_to); 1752 flush_warnings(transfer_to, warntype_to);
1793 flush_warnings(transfer_from, warntype_from_inodes); 1753 flush_warnings(transfer_from, warntype_from_inodes);
1794 flush_warnings(transfer_from, warntype_from_space); 1754 flush_warnings(transfer_from, warntype_from_space);
1795put_all: 1755put_all:
1796 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1756 dqput_all(transfer_from);
1797 dqput(transfer_from[cnt]); 1757 dqput_all(transfer_to);
1798 dqput(transfer_to[cnt]);
1799 }
1800 return ret; 1758 return ret;
1801over_quota: 1759over_quota:
1802 spin_unlock(&dq_data_lock); 1760 spin_unlock(&dq_data_lock);
@@ -2233,7 +2191,9 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
2233 struct dentry *dentry; 2191 struct dentry *dentry;
2234 int error; 2192 int error;
2235 2193
2194 mutex_lock(&sb->s_root->d_inode->i_mutex);
2236 dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name)); 2195 dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
2196 mutex_unlock(&sb->s_root->d_inode->i_mutex);
2237 if (IS_ERR(dentry)) 2197 if (IS_ERR(dentry))
2238 return PTR_ERR(dentry); 2198 return PTR_ERR(dentry);
2239 2199
@@ -2473,100 +2433,89 @@ const struct quotactl_ops vfs_quotactl_ops = {
2473 2433
2474static ctl_table fs_dqstats_table[] = { 2434static ctl_table fs_dqstats_table[] = {
2475 { 2435 {
2476 .ctl_name = FS_DQ_LOOKUPS,
2477 .procname = "lookups", 2436 .procname = "lookups",
2478 .data = &dqstats.lookups, 2437 .data = &dqstats.lookups,
2479 .maxlen = sizeof(int), 2438 .maxlen = sizeof(int),
2480 .mode = 0444, 2439 .mode = 0444,
2481 .proc_handler = &proc_dointvec, 2440 .proc_handler = proc_dointvec,
2482 }, 2441 },
2483 { 2442 {
2484 .ctl_name = FS_DQ_DROPS,
2485 .procname = "drops", 2443 .procname = "drops",
2486 .data = &dqstats.drops, 2444 .data = &dqstats.drops,
2487 .maxlen = sizeof(int), 2445 .maxlen = sizeof(int),
2488 .mode = 0444, 2446 .mode = 0444,
2489 .proc_handler = &proc_dointvec, 2447 .proc_handler = proc_dointvec,
2490 }, 2448 },
2491 { 2449 {
2492 .ctl_name = FS_DQ_READS,
2493 .procname = "reads", 2450 .procname = "reads",
2494 .data = &dqstats.reads, 2451 .data = &dqstats.reads,
2495 .maxlen = sizeof(int), 2452 .maxlen = sizeof(int),
2496 .mode = 0444, 2453 .mode = 0444,
2497 .proc_handler = &proc_dointvec, 2454 .proc_handler = proc_dointvec,
2498 }, 2455 },
2499 { 2456 {
2500 .ctl_name = FS_DQ_WRITES,
2501 .procname = "writes", 2457 .procname = "writes",
2502 .data = &dqstats.writes, 2458 .data = &dqstats.writes,
2503 .maxlen = sizeof(int), 2459 .maxlen = sizeof(int),
2504 .mode = 0444, 2460 .mode = 0444,
2505 .proc_handler = &proc_dointvec, 2461 .proc_handler = proc_dointvec,
2506 }, 2462 },
2507 { 2463 {
2508 .ctl_name = FS_DQ_CACHE_HITS,
2509 .procname = "cache_hits", 2464 .procname = "cache_hits",
2510 .data = &dqstats.cache_hits, 2465 .data = &dqstats.cache_hits,
2511 .maxlen = sizeof(int), 2466 .maxlen = sizeof(int),
2512 .mode = 0444, 2467 .mode = 0444,
2513 .proc_handler = &proc_dointvec, 2468 .proc_handler = proc_dointvec,
2514 }, 2469 },
2515 { 2470 {
2516 .ctl_name = FS_DQ_ALLOCATED,
2517 .procname = "allocated_dquots", 2471 .procname = "allocated_dquots",
2518 .data = &dqstats.allocated_dquots, 2472 .data = &dqstats.allocated_dquots,
2519 .maxlen = sizeof(int), 2473 .maxlen = sizeof(int),
2520 .mode = 0444, 2474 .mode = 0444,
2521 .proc_handler = &proc_dointvec, 2475 .proc_handler = proc_dointvec,
2522 }, 2476 },
2523 { 2477 {
2524 .ctl_name = FS_DQ_FREE,
2525 .procname = "free_dquots", 2478 .procname = "free_dquots",
2526 .data = &dqstats.free_dquots, 2479 .data = &dqstats.free_dquots,
2527 .maxlen = sizeof(int), 2480 .maxlen = sizeof(int),
2528 .mode = 0444, 2481 .mode = 0444,
2529 .proc_handler = &proc_dointvec, 2482 .proc_handler = proc_dointvec,
2530 }, 2483 },
2531 { 2484 {
2532 .ctl_name = FS_DQ_SYNCS,
2533 .procname = "syncs", 2485 .procname = "syncs",
2534 .data = &dqstats.syncs, 2486 .data = &dqstats.syncs,
2535 .maxlen = sizeof(int), 2487 .maxlen = sizeof(int),
2536 .mode = 0444, 2488 .mode = 0444,
2537 .proc_handler = &proc_dointvec, 2489 .proc_handler = proc_dointvec,
2538 }, 2490 },
2539#ifdef CONFIG_PRINT_QUOTA_WARNING 2491#ifdef CONFIG_PRINT_QUOTA_WARNING
2540 { 2492 {
2541 .ctl_name = FS_DQ_WARNINGS,
2542 .procname = "warnings", 2493 .procname = "warnings",
2543 .data = &flag_print_warnings, 2494 .data = &flag_print_warnings,
2544 .maxlen = sizeof(int), 2495 .maxlen = sizeof(int),
2545 .mode = 0644, 2496 .mode = 0644,
2546 .proc_handler = &proc_dointvec, 2497 .proc_handler = proc_dointvec,
2547 }, 2498 },
2548#endif 2499#endif
2549 { .ctl_name = 0 }, 2500 { },
2550}; 2501};
2551 2502
2552static ctl_table fs_table[] = { 2503static ctl_table fs_table[] = {
2553 { 2504 {
2554 .ctl_name = FS_DQSTATS,
2555 .procname = "quota", 2505 .procname = "quota",
2556 .mode = 0555, 2506 .mode = 0555,
2557 .child = fs_dqstats_table, 2507 .child = fs_dqstats_table,
2558 }, 2508 },
2559 { .ctl_name = 0 }, 2509 { },
2560}; 2510};
2561 2511
2562static ctl_table sys_table[] = { 2512static ctl_table sys_table[] = {
2563 { 2513 {
2564 .ctl_name = CTL_FS,
2565 .procname = "fs", 2514 .procname = "fs",
2566 .mode = 0555, 2515 .mode = 0555,
2567 .child = fs_table, 2516 .child = fs_table,
2568 }, 2517 },
2569 { .ctl_name = 0 }, 2518 { },
2570}; 2519};
2571 2520
2572static int __init dquot_init(void) 2521static int __init dquot_init(void)
@@ -2607,12 +2556,6 @@ static int __init dquot_init(void)
2607 2556
2608 register_shrinker(&dqcache_shrinker); 2557 register_shrinker(&dqcache_shrinker);
2609 2558
2610#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
2611 if (genl_register_family(&quota_genl_family) != 0)
2612 printk(KERN_ERR
2613 "VFS: Failed to create quota netlink interface.\n");
2614#endif
2615
2616 return 0; 2559 return 0;
2617} 2560}
2618module_init(dquot_init); 2561module_init(dquot_init);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 95c5b42384b..ee91e275695 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,8 @@
18#include <linux/capability.h> 18#include <linux/capability.h>
19#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20#include <linux/types.h> 20#include <linux/types.h>
21#include <net/netlink.h>
22#include <net/genetlink.h>
21 23
22/* Check validity of generic quotactl commands */ 24/* Check validity of generic quotactl commands */
23static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, 25static int generic_quotactl_valid(struct super_block *sb, int type, int cmd,
@@ -525,3 +527,94 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
525 return ret; 527 return ret;
526} 528}
527#endif 529#endif
530
531
532#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
533
534/* Netlink family structure for quota */
535static struct genl_family quota_genl_family = {
536 .id = GENL_ID_GENERATE,
537 .hdrsize = 0,
538 .name = "VFS_DQUOT",
539 .version = 1,
540 .maxattr = QUOTA_NL_A_MAX,
541};
542
543/**
544 * quota_send_warning - Send warning to userspace about exceeded quota
545 * @type: The quota type: USRQQUOTA, GRPQUOTA,...
546 * @id: The user or group id of the quota that was exceeded
547 * @dev: The device on which the fs is mounted (sb->s_dev)
548 * @warntype: The type of the warning: QUOTA_NL_...
549 *
550 * This can be used by filesystems (including those which don't use
551 * dquot) to send a message to userspace relating to quota limits.
552 *
553 */
554
555void quota_send_warning(short type, unsigned int id, dev_t dev,
556 const char warntype)
557{
558 static atomic_t seq;
559 struct sk_buff *skb;
560 void *msg_head;
561 int ret;
562 int msg_size = 4 * nla_total_size(sizeof(u32)) +
563 2 * nla_total_size(sizeof(u64));
564
565 /* We have to allocate using GFP_NOFS as we are called from a
566 * filesystem performing write and thus further recursion into
567 * the fs to free some data could cause deadlocks. */
568 skb = genlmsg_new(msg_size, GFP_NOFS);
569 if (!skb) {
570 printk(KERN_ERR
571 "VFS: Not enough memory to send quota warning.\n");
572 return;
573 }
574 msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
575 &quota_genl_family, 0, QUOTA_NL_C_WARNING);
576 if (!msg_head) {
577 printk(KERN_ERR
578 "VFS: Cannot store netlink header in quota warning.\n");
579 goto err_out;
580 }
581 ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type);
582 if (ret)
583 goto attr_err_out;
584 ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id);
585 if (ret)
586 goto attr_err_out;
587 ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
588 if (ret)
589 goto attr_err_out;
590 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev));
591 if (ret)
592 goto attr_err_out;
593 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
594 if (ret)
595 goto attr_err_out;
596 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
597 if (ret)
598 goto attr_err_out;
599 genlmsg_end(skb, msg_head);
600
601 genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
602 return;
603attr_err_out:
604 printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
605err_out:
606 kfree_skb(skb);
607}
608EXPORT_SYMBOL(quota_send_warning);
609
610static int __init quota_init(void)
611{
612 if (genl_register_family(&quota_genl_family) != 0)
613 printk(KERN_ERR
614 "VFS: Failed to create quota netlink interface.\n");
615 return 0;
616};
617
618module_init(quota_init);
619#endif
620
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 0edcf42b177..2ae757e9c00 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -204,7 +204,7 @@ out:
204 return ret; 204 return ret;
205} 205}
206 206
207static struct quota_format_ops v1_format_ops = { 207static const struct quota_format_ops v1_format_ops = {
208 .check_quota_file = v1_check_quota_file, 208 .check_quota_file = v1_check_quota_file,
209 .read_file_info = v1_read_file_info, 209 .read_file_info = v1_read_file_info,
210 .write_file_info = v1_write_file_info, 210 .write_file_info = v1_write_file_info,
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index a5475fb1ae4..e3da02f4986 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -23,14 +23,23 @@ MODULE_LICENSE("GPL");
23 23
24#define __QUOTA_V2_PARANOIA 24#define __QUOTA_V2_PARANOIA
25 25
26static void v2_mem2diskdqb(void *dp, struct dquot *dquot); 26static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot);
27static void v2_disk2memdqb(struct dquot *dquot, void *dp); 27static void v2r0_disk2memdqb(struct dquot *dquot, void *dp);
28static int v2_is_id(void *dp, struct dquot *dquot); 28static int v2r0_is_id(void *dp, struct dquot *dquot);
29 29static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot);
30static struct qtree_fmt_operations v2_qtree_ops = { 30static void v2r1_disk2memdqb(struct dquot *dquot, void *dp);
31 .mem2disk_dqblk = v2_mem2diskdqb, 31static int v2r1_is_id(void *dp, struct dquot *dquot);
32 .disk2mem_dqblk = v2_disk2memdqb, 32
33 .is_id = v2_is_id, 33static struct qtree_fmt_operations v2r0_qtree_ops = {
34 .mem2disk_dqblk = v2r0_mem2diskdqb,
35 .disk2mem_dqblk = v2r0_disk2memdqb,
36 .is_id = v2r0_is_id,
37};
38
39static struct qtree_fmt_operations v2r1_qtree_ops = {
40 .mem2disk_dqblk = v2r1_mem2diskdqb,
41 .disk2mem_dqblk = v2r1_disk2memdqb,
42 .is_id = v2r1_is_id,
34}; 43};
35 44
36#define QUOTABLOCK_BITS 10 45#define QUOTABLOCK_BITS 10
@@ -46,23 +55,33 @@ static inline qsize_t v2_qbtos(qsize_t blocks)
46 return blocks << QUOTABLOCK_BITS; 55 return blocks << QUOTABLOCK_BITS;
47} 56}
48 57
58static int v2_read_header(struct super_block *sb, int type,
59 struct v2_disk_dqheader *dqhead)
60{
61 ssize_t size;
62
63 size = sb->s_op->quota_read(sb, type, (char *)dqhead,
64 sizeof(struct v2_disk_dqheader), 0);
65 if (size != sizeof(struct v2_disk_dqheader)) {
66 printk(KERN_WARNING "quota_v2: Failed header read:"
67 " expected=%zd got=%zd\n",
68 sizeof(struct v2_disk_dqheader), size);
69 return 0;
70 }
71 return 1;
72}
73
49/* Check whether given file is really vfsv0 quotafile */ 74/* Check whether given file is really vfsv0 quotafile */
50static int v2_check_quota_file(struct super_block *sb, int type) 75static int v2_check_quota_file(struct super_block *sb, int type)
51{ 76{
52 struct v2_disk_dqheader dqhead; 77 struct v2_disk_dqheader dqhead;
53 ssize_t size;
54 static const uint quota_magics[] = V2_INITQMAGICS; 78 static const uint quota_magics[] = V2_INITQMAGICS;
55 static const uint quota_versions[] = V2_INITQVERSIONS; 79 static const uint quota_versions[] = V2_INITQVERSIONS;
56 80
57 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 81 if (!v2_read_header(sb, type, &dqhead))
58 sizeof(struct v2_disk_dqheader), 0);
59 if (size != sizeof(struct v2_disk_dqheader)) {
60 printk("quota_v2: failed read expected=%zd got=%zd\n",
61 sizeof(struct v2_disk_dqheader), size);
62 return 0; 82 return 0;
63 }
64 if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] || 83 if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
65 le32_to_cpu(dqhead.dqh_version) != quota_versions[type]) 84 le32_to_cpu(dqhead.dqh_version) > quota_versions[type])
66 return 0; 85 return 0;
67 return 1; 86 return 1;
68} 87}
@@ -71,14 +90,23 @@ static int v2_check_quota_file(struct super_block *sb, int type)
71static int v2_read_file_info(struct super_block *sb, int type) 90static int v2_read_file_info(struct super_block *sb, int type)
72{ 91{
73 struct v2_disk_dqinfo dinfo; 92 struct v2_disk_dqinfo dinfo;
93 struct v2_disk_dqheader dqhead;
74 struct mem_dqinfo *info = sb_dqinfo(sb, type); 94 struct mem_dqinfo *info = sb_dqinfo(sb, type);
75 struct qtree_mem_dqinfo *qinfo; 95 struct qtree_mem_dqinfo *qinfo;
76 ssize_t size; 96 ssize_t size;
97 unsigned int version;
98
99 if (!v2_read_header(sb, type, &dqhead))
100 return -1;
101 version = le32_to_cpu(dqhead.dqh_version);
102 if ((info->dqi_fmt_id == QFMT_VFS_V0 && version != 0) ||
103 (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1))
104 return -1;
77 105
78 size = sb->s_op->quota_read(sb, type, (char *)&dinfo, 106 size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
79 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 107 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
80 if (size != sizeof(struct v2_disk_dqinfo)) { 108 if (size != sizeof(struct v2_disk_dqinfo)) {
81 printk(KERN_WARNING "Can't read info structure on device %s.\n", 109 printk(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n",
82 sb->s_id); 110 sb->s_id);
83 return -1; 111 return -1;
84 } 112 }
@@ -89,9 +117,15 @@ static int v2_read_file_info(struct super_block *sb, int type)
89 return -1; 117 return -1;
90 } 118 }
91 qinfo = info->dqi_priv; 119 qinfo = info->dqi_priv;
92 /* limits are stored as unsigned 32-bit data */ 120 if (version == 0) {
93 info->dqi_maxblimit = 0xffffffff; 121 /* limits are stored as unsigned 32-bit data */
94 info->dqi_maxilimit = 0xffffffff; 122 info->dqi_maxblimit = 0xffffffff;
123 info->dqi_maxilimit = 0xffffffff;
124 } else {
125 /* used space is stored as unsigned 64-bit value */
126 info->dqi_maxblimit = 0xffffffffffffffffULL; /* 2^64-1 */
127 info->dqi_maxilimit = 0xffffffffffffffffULL;
128 }
95 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 129 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
96 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 130 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
97 info->dqi_flags = le32_to_cpu(dinfo.dqi_flags); 131 info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
@@ -103,8 +137,13 @@ static int v2_read_file_info(struct super_block *sb, int type)
103 qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS; 137 qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
104 qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS; 138 qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
105 qinfo->dqi_qtree_depth = qtree_depth(qinfo); 139 qinfo->dqi_qtree_depth = qtree_depth(qinfo);
106 qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk); 140 if (version == 0) {
107 qinfo->dqi_ops = &v2_qtree_ops; 141 qinfo->dqi_entry_size = sizeof(struct v2r0_disk_dqblk);
142 qinfo->dqi_ops = &v2r0_qtree_ops;
143 } else {
144 qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk);
145 qinfo->dqi_ops = &v2r1_qtree_ops;
146 }
108 return 0; 147 return 0;
109} 148}
110 149
@@ -135,9 +174,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
135 return 0; 174 return 0;
136} 175}
137 176
138static void v2_disk2memdqb(struct dquot *dquot, void *dp) 177static void v2r0_disk2memdqb(struct dquot *dquot, void *dp)
139{ 178{
140 struct v2_disk_dqblk *d = dp, empty; 179 struct v2r0_disk_dqblk *d = dp, empty;
141 struct mem_dqblk *m = &dquot->dq_dqb; 180 struct mem_dqblk *m = &dquot->dq_dqb;
142 181
143 m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit); 182 m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
@@ -149,15 +188,15 @@ static void v2_disk2memdqb(struct dquot *dquot, void *dp)
149 m->dqb_curspace = le64_to_cpu(d->dqb_curspace); 188 m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
150 m->dqb_btime = le64_to_cpu(d->dqb_btime); 189 m->dqb_btime = le64_to_cpu(d->dqb_btime);
151 /* We need to escape back all-zero structure */ 190 /* We need to escape back all-zero structure */
152 memset(&empty, 0, sizeof(struct v2_disk_dqblk)); 191 memset(&empty, 0, sizeof(struct v2r0_disk_dqblk));
153 empty.dqb_itime = cpu_to_le64(1); 192 empty.dqb_itime = cpu_to_le64(1);
154 if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk))) 193 if (!memcmp(&empty, dp, sizeof(struct v2r0_disk_dqblk)))
155 m->dqb_itime = 0; 194 m->dqb_itime = 0;
156} 195}
157 196
158static void v2_mem2diskdqb(void *dp, struct dquot *dquot) 197static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot)
159{ 198{
160 struct v2_disk_dqblk *d = dp; 199 struct v2r0_disk_dqblk *d = dp;
161 struct mem_dqblk *m = &dquot->dq_dqb; 200 struct mem_dqblk *m = &dquot->dq_dqb;
162 struct qtree_mem_dqinfo *info = 201 struct qtree_mem_dqinfo *info =
163 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 202 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
@@ -175,9 +214,60 @@ static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
175 d->dqb_itime = cpu_to_le64(1); 214 d->dqb_itime = cpu_to_le64(1);
176} 215}
177 216
178static int v2_is_id(void *dp, struct dquot *dquot) 217static int v2r0_is_id(void *dp, struct dquot *dquot)
179{ 218{
180 struct v2_disk_dqblk *d = dp; 219 struct v2r0_disk_dqblk *d = dp;
220 struct qtree_mem_dqinfo *info =
221 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
222
223 if (qtree_entry_unused(info, dp))
224 return 0;
225 return le32_to_cpu(d->dqb_id) == dquot->dq_id;
226}
227
228static void v2r1_disk2memdqb(struct dquot *dquot, void *dp)
229{
230 struct v2r1_disk_dqblk *d = dp, empty;
231 struct mem_dqblk *m = &dquot->dq_dqb;
232
233 m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
234 m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
235 m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
236 m->dqb_itime = le64_to_cpu(d->dqb_itime);
237 m->dqb_bhardlimit = v2_qbtos(le64_to_cpu(d->dqb_bhardlimit));
238 m->dqb_bsoftlimit = v2_qbtos(le64_to_cpu(d->dqb_bsoftlimit));
239 m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
240 m->dqb_btime = le64_to_cpu(d->dqb_btime);
241 /* We need to escape back all-zero structure */
242 memset(&empty, 0, sizeof(struct v2r1_disk_dqblk));
243 empty.dqb_itime = cpu_to_le64(1);
244 if (!memcmp(&empty, dp, sizeof(struct v2r1_disk_dqblk)))
245 m->dqb_itime = 0;
246}
247
248static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
249{
250 struct v2r1_disk_dqblk *d = dp;
251 struct mem_dqblk *m = &dquot->dq_dqb;
252 struct qtree_mem_dqinfo *info =
253 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
254
255 d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
256 d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
257 d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
258 d->dqb_itime = cpu_to_le64(m->dqb_itime);
259 d->dqb_bhardlimit = cpu_to_le64(v2_stoqb(m->dqb_bhardlimit));
260 d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit));
261 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
262 d->dqb_btime = cpu_to_le64(m->dqb_btime);
263 d->dqb_id = cpu_to_le32(dquot->dq_id);
264 if (qtree_entry_unused(info, dp))
265 d->dqb_itime = cpu_to_le64(1);
266}
267
268static int v2r1_is_id(void *dp, struct dquot *dquot)
269{
270 struct v2r1_disk_dqblk *d = dp;
181 struct qtree_mem_dqinfo *info = 271 struct qtree_mem_dqinfo *info =
182 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 272 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
183 273
@@ -207,7 +297,7 @@ static int v2_free_file_info(struct super_block *sb, int type)
207 return 0; 297 return 0;
208} 298}
209 299
210static struct quota_format_ops v2_format_ops = { 300static const struct quota_format_ops v2_format_ops = {
211 .check_quota_file = v2_check_quota_file, 301 .check_quota_file = v2_check_quota_file,
212 .read_file_info = v2_read_file_info, 302 .read_file_info = v2_read_file_info,
213 .write_file_info = v2_write_file_info, 303 .write_file_info = v2_write_file_info,
@@ -217,20 +307,32 @@ static struct quota_format_ops v2_format_ops = {
217 .release_dqblk = v2_release_dquot, 307 .release_dqblk = v2_release_dquot,
218}; 308};
219 309
220static struct quota_format_type v2_quota_format = { 310static struct quota_format_type v2r0_quota_format = {
221 .qf_fmt_id = QFMT_VFS_V0, 311 .qf_fmt_id = QFMT_VFS_V0,
222 .qf_ops = &v2_format_ops, 312 .qf_ops = &v2_format_ops,
223 .qf_owner = THIS_MODULE 313 .qf_owner = THIS_MODULE
224}; 314};
225 315
316static struct quota_format_type v2r1_quota_format = {
317 .qf_fmt_id = QFMT_VFS_V1,
318 .qf_ops = &v2_format_ops,
319 .qf_owner = THIS_MODULE
320};
321
226static int __init init_v2_quota_format(void) 322static int __init init_v2_quota_format(void)
227{ 323{
228 return register_quota_format(&v2_quota_format); 324 int ret;
325
326 ret = register_quota_format(&v2r0_quota_format);
327 if (ret)
328 return ret;
329 return register_quota_format(&v2r1_quota_format);
229} 330}
230 331
231static void __exit exit_v2_quota_format(void) 332static void __exit exit_v2_quota_format(void)
232{ 333{
233 unregister_quota_format(&v2_quota_format); 334 unregister_quota_format(&v2r0_quota_format);
335 unregister_quota_format(&v2r1_quota_format);
234} 336}
235 337
236module_init(init_v2_quota_format); 338module_init(init_v2_quota_format);
diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h
index 530fe580685..f1966b42c2f 100644
--- a/fs/quota/quotaio_v2.h
+++ b/fs/quota/quotaio_v2.h
@@ -17,8 +17,8 @@
17} 17}
18 18
19#define V2_INITQVERSIONS {\ 19#define V2_INITQVERSIONS {\
20 0, /* USRQUOTA */\ 20 1, /* USRQUOTA */\
21 0 /* GRPQUOTA */\ 21 1 /* GRPQUOTA */\
22} 22}
23 23
24/* First generic header */ 24/* First generic header */
@@ -32,7 +32,7 @@ struct v2_disk_dqheader {
32 * (as it appears on disk) - the file is a radix tree whose leaves point 32 * (as it appears on disk) - the file is a radix tree whose leaves point
33 * to blocks of these structures. 33 * to blocks of these structures.
34 */ 34 */
35struct v2_disk_dqblk { 35struct v2r0_disk_dqblk {
36 __le32 dqb_id; /* id this quota applies to */ 36 __le32 dqb_id; /* id this quota applies to */
37 __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */ 37 __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */
38 __le32 dqb_isoftlimit; /* preferred inode limit */ 38 __le32 dqb_isoftlimit; /* preferred inode limit */
@@ -44,6 +44,19 @@ struct v2_disk_dqblk {
44 __le64 dqb_itime; /* time limit for excessive inode use */ 44 __le64 dqb_itime; /* time limit for excessive inode use */
45}; 45};
46 46
47struct v2r1_disk_dqblk {
48 __le32 dqb_id; /* id this quota applies to */
49 __le32 dqb_pad;
50 __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
51 __le64 dqb_isoftlimit; /* preferred inode limit */
52 __le64 dqb_curinodes; /* current # allocated inodes */
53 __le64 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */
54 __le64 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */
55 __le64 dqb_curspace; /* current space occupied (in bytes) */
56 __le64 dqb_btime; /* time limit for excessive disk use */
57 __le64 dqb_itime; /* time limit for excessive inode use */
58};
59
47/* Header with type and version specific information */ 60/* Header with type and version specific information */
48struct v2_disk_dqinfo { 61struct v2_disk_dqinfo {
49 __le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */ 62 __le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 32fae4040eb..1739a4aba25 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -60,7 +60,7 @@ const struct inode_operations ramfs_file_inode_operations = {
60 */ 60 */
61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) 61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
62{ 62{
63 unsigned long npages, xpages, loop, limit; 63 unsigned long npages, xpages, loop;
64 struct page *pages; 64 struct page *pages;
65 unsigned order; 65 unsigned order;
66 void *data; 66 void *data;
@@ -123,30 +123,6 @@ add_error:
123 123
124/*****************************************************************************/ 124/*****************************************************************************/
125/* 125/*
126 * check that file shrinkage doesn't leave any VMAs dangling in midair
127 */
128static int ramfs_nommu_check_mappings(struct inode *inode,
129 size_t newsize, size_t size)
130{
131 struct vm_area_struct *vma;
132 struct prio_tree_iter iter;
133
134 /* search for VMAs that fall within the dead zone */
135 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
136 newsize >> PAGE_SHIFT,
137 (size + PAGE_SIZE - 1) >> PAGE_SHIFT
138 ) {
139 /* found one - only interested if it's shared out of the page
140 * cache */
141 if (vma->vm_flags & VM_SHARED)
142 return -ETXTBSY; /* not quite true, but near enough */
143 }
144
145 return 0;
146}
147
148/*****************************************************************************/
149/*
150 * 126 *
151 */ 127 */
152static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) 128static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
@@ -164,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
164 140
165 /* check that a decrease in size doesn't cut off any shared mappings */ 141 /* check that a decrease in size doesn't cut off any shared mappings */
166 if (newsize < size) { 142 if (newsize < size) {
167 ret = ramfs_nommu_check_mappings(inode, newsize, size); 143 ret = nommu_shrink_inode_mappings(inode, size, newsize);
168 if (ret < 0) 144 if (ret < 0)
169 return ret; 145 return ret;
170 } 146 }
diff --git a/fs/read_write.c b/fs/read_write.c
index 3ac28987f22..b7f4a1f94d4 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -826,8 +826,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
826 if (!(out_file->f_mode & FMODE_WRITE)) 826 if (!(out_file->f_mode & FMODE_WRITE))
827 goto fput_out; 827 goto fput_out;
828 retval = -EINVAL; 828 retval = -EINVAL;
829 if (!out_file->f_op || !out_file->f_op->sendpage)
830 goto fput_out;
831 in_inode = in_file->f_path.dentry->d_inode; 829 in_inode = in_file->f_path.dentry->d_inode;
832 out_inode = out_file->f_path.dentry->d_inode; 830 out_inode = out_file->f_path.dentry->d_inode;
833 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 831 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 7c5ab6330dd..792b3cb2cd1 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,7 +7,11 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ 7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ 8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
9 hashes.o tail_conversion.o journal.o resize.o \ 9 hashes.o tail_conversion.o journal.o resize.o \
10 item_ops.o ioctl.o procfs.o xattr.o 10 item_ops.o ioctl.o xattr.o lock.o
11
12ifeq ($(CONFIG_REISERFS_PROC_INFO),y)
13reiserfs-objs += procfs.o
14endif
11 15
12ifeq ($(CONFIG_REISERFS_FS_XATTR),y) 16ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
13reiserfs-objs += xattr_user.o xattr_trusted.o 17reiserfs-objs += xattr_user.o xattr_trusted.o
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index e716161ab32..65c87276117 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1249,14 +1249,18 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1249 else if (bitmap == 0) 1249 else if (bitmap == 0)
1250 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; 1250 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1251 1251
1252 reiserfs_write_unlock(sb);
1252 bh = sb_bread(sb, block); 1253 bh = sb_bread(sb, block);
1254 reiserfs_write_lock(sb);
1253 if (bh == NULL) 1255 if (bh == NULL)
1254 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " 1256 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1255 "reading failed", __func__, block); 1257 "reading failed", __func__, block);
1256 else { 1258 else {
1257 if (buffer_locked(bh)) { 1259 if (buffer_locked(bh)) {
1258 PROC_INFO_INC(sb, scan_bitmap.wait); 1260 PROC_INFO_INC(sb, scan_bitmap.wait);
1261 reiserfs_write_unlock(sb);
1259 __wait_on_buffer(bh); 1262 __wait_on_buffer(bh);
1263 reiserfs_write_lock(sb);
1260 } 1264 }
1261 BUG_ON(!buffer_uptodate(bh)); 1265 BUG_ON(!buffer_uptodate(bh));
1262 BUG_ON(atomic_read(&bh->b_count) == 0); 1266 BUG_ON(atomic_read(&bh->b_count) == 0);
@@ -1273,7 +1277,10 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
1273 struct reiserfs_bitmap_info *bitmap; 1277 struct reiserfs_bitmap_info *bitmap;
1274 unsigned int bmap_nr = reiserfs_bmap_count(sb); 1278 unsigned int bmap_nr = reiserfs_bmap_count(sb);
1275 1279
1280 /* Avoid lock recursion in fault case */
1281 reiserfs_write_unlock(sb);
1276 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); 1282 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1283 reiserfs_write_lock(sb);
1277 if (bitmap == NULL) 1284 if (bitmap == NULL)
1278 return -ENOMEM; 1285 return -ENOMEM;
1279 1286
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 6d2668fdc38..c094f58c744 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -20,7 +20,7 @@ const struct file_operations reiserfs_dir_operations = {
20 .read = generic_read_dir, 20 .read = generic_read_dir,
21 .readdir = reiserfs_readdir, 21 .readdir = reiserfs_readdir,
22 .fsync = reiserfs_dir_fsync, 22 .fsync = reiserfs_dir_fsync,
23 .ioctl = reiserfs_ioctl, 23 .unlocked_ioctl = reiserfs_ioctl,
24#ifdef CONFIG_COMPAT 24#ifdef CONFIG_COMPAT
25 .compat_ioctl = reiserfs_compat_ioctl, 25 .compat_ioctl = reiserfs_compat_ioctl,
26#endif 26#endif
@@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
174 // user space buffer is swapped out. At that time 174 // user space buffer is swapped out. At that time
175 // entry can move to somewhere else 175 // entry can move to somewhere else
176 memcpy(local_buf, d_name, d_reclen); 176 memcpy(local_buf, d_name, d_reclen);
177
178 /*
179 * Since filldir might sleep, we can release
180 * the write lock here for other waiters
181 */
182 reiserfs_write_unlock(inode->i_sb);
177 if (filldir 183 if (filldir
178 (dirent, local_buf, d_reclen, d_off, d_ino, 184 (dirent, local_buf, d_reclen, d_off, d_ino,
179 DT_UNKNOWN) < 0) { 185 DT_UNKNOWN) < 0) {
186 reiserfs_write_lock(inode->i_sb);
180 if (local_buf != small_buf) { 187 if (local_buf != small_buf) {
181 kfree(local_buf); 188 kfree(local_buf);
182 } 189 }
183 goto end; 190 goto end;
184 } 191 }
192 reiserfs_write_lock(inode->i_sb);
185 if (local_buf != small_buf) { 193 if (local_buf != small_buf) {
186 kfree(local_buf); 194 kfree(local_buf);
187 } 195 }
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 128d3f7c8aa..60c08044066 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -21,14 +21,6 @@
21#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23 23
24#ifdef CONFIG_REISERFS_CHECK
25
26struct tree_balance *cur_tb = NULL; /* detects whether more than one
27 copy of tb exists as a means
28 of checking whether schedule
29 is interrupting do_balance */
30#endif
31
32static inline void buffer_info_init_left(struct tree_balance *tb, 24static inline void buffer_info_init_left(struct tree_balance *tb,
33 struct buffer_info *bi) 25 struct buffer_info *bi)
34{ 26{
@@ -1840,11 +1832,12 @@ static int check_before_balancing(struct tree_balance *tb)
1840{ 1832{
1841 int retval = 0; 1833 int retval = 0;
1842 1834
1843 if (cur_tb) { 1835 if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1844 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " 1836 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1845 "occurred based on cur_tb not being null at " 1837 "occurred based on cur_tb not being null at "
1846 "this point in code. do_balance cannot properly " 1838 "this point in code. do_balance cannot properly "
1847 "handle schedule occurring while it runs."); 1839 "handle concurrent tree accesses on a same "
1840 "mount point.");
1848 } 1841 }
1849 1842
1850 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1843 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
@@ -1986,7 +1979,7 @@ static inline void do_balance_starts(struct tree_balance *tb)
1986 "check");*/ 1979 "check");*/
1987 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1980 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1988#ifdef CONFIG_REISERFS_CHECK 1981#ifdef CONFIG_REISERFS_CHECK
1989 cur_tb = tb; 1982 REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1990#endif 1983#endif
1991} 1984}
1992 1985
@@ -1996,7 +1989,7 @@ static inline void do_balance_completed(struct tree_balance *tb)
1996#ifdef CONFIG_REISERFS_CHECK 1989#ifdef CONFIG_REISERFS_CHECK
1997 check_leaf_level(tb); 1990 check_leaf_level(tb);
1998 check_internal_levels(tb); 1991 check_internal_levels(tb);
1999 cur_tb = NULL; 1992 REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
2000#endif 1993#endif
2001 1994
2002 /* reiserfs_free_block is no longer schedule safe. So, we need to 1995 /* reiserfs_free_block is no longer schedule safe. So, we need to
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9f436668b7f..da2dba082e2 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -284,7 +284,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
284const struct file_operations reiserfs_file_operations = { 284const struct file_operations reiserfs_file_operations = {
285 .read = do_sync_read, 285 .read = do_sync_read,
286 .write = reiserfs_file_write, 286 .write = reiserfs_file_write,
287 .ioctl = reiserfs_ioctl, 287 .unlocked_ioctl = reiserfs_ioctl,
288#ifdef CONFIG_COMPAT 288#ifdef CONFIG_COMPAT
289 .compat_ioctl = reiserfs_compat_ioctl, 289 .compat_ioctl = reiserfs_compat_ioctl,
290#endif 290#endif
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 5e5a4e6fbaf..6591cb21edf 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -563,9 +563,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
563 return needed_nodes; 563 return needed_nodes;
564} 564}
565 565
566#ifdef CONFIG_REISERFS_CHECK
567extern struct tree_balance *cur_tb;
568#endif
569 566
570/* Set parameters for balancing. 567/* Set parameters for balancing.
571 * Performs write of results of analysis of balancing into structure tb, 568 * Performs write of results of analysis of balancing into structure tb,
@@ -834,7 +831,7 @@ static int get_empty_nodes(struct tree_balance *tb, int h)
834 RFALSE(buffer_dirty(new_bh) || 831 RFALSE(buffer_dirty(new_bh) ||
835 buffer_journaled(new_bh) || 832 buffer_journaled(new_bh) ||
836 buffer_journal_dirty(new_bh), 833 buffer_journal_dirty(new_bh),
837 "PAP-8140: journlaled or dirty buffer %b for the new block", 834 "PAP-8140: journaled or dirty buffer %b for the new block",
838 new_bh); 835 new_bh);
839 836
840 /* Put empty buffers into the array. */ 837 /* Put empty buffers into the array. */
@@ -1022,7 +1019,11 @@ static int get_far_parent(struct tree_balance *tb,
1022 /* Check whether the common parent is locked. */ 1019 /* Check whether the common parent is locked. */
1023 1020
1024 if (buffer_locked(*pcom_father)) { 1021 if (buffer_locked(*pcom_father)) {
1022
1023 /* Release the write lock while the buffer is busy */
1024 reiserfs_write_unlock(tb->tb_sb);
1025 __wait_on_buffer(*pcom_father); 1025 __wait_on_buffer(*pcom_father);
1026 reiserfs_write_lock(tb->tb_sb);
1026 if (FILESYSTEM_CHANGED_TB(tb)) { 1027 if (FILESYSTEM_CHANGED_TB(tb)) {
1027 brelse(*pcom_father); 1028 brelse(*pcom_father);
1028 return REPEAT_SEARCH; 1029 return REPEAT_SEARCH;
@@ -1927,7 +1928,9 @@ static int get_direct_parent(struct tree_balance *tb, int h)
1927 return REPEAT_SEARCH; 1928 return REPEAT_SEARCH;
1928 1929
1929 if (buffer_locked(bh)) { 1930 if (buffer_locked(bh)) {
1931 reiserfs_write_unlock(tb->tb_sb);
1930 __wait_on_buffer(bh); 1932 __wait_on_buffer(bh);
1933 reiserfs_write_lock(tb->tb_sb);
1931 if (FILESYSTEM_CHANGED_TB(tb)) 1934 if (FILESYSTEM_CHANGED_TB(tb))
1932 return REPEAT_SEARCH; 1935 return REPEAT_SEARCH;
1933 } 1936 }
@@ -1965,7 +1968,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
1965 tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> 1968 tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
1966 FL[h]); 1969 FL[h]);
1967 son_number = B_N_CHILD_NUM(tb->FL[h], child_position); 1970 son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
1971 reiserfs_write_unlock(sb);
1968 bh = sb_bread(sb, son_number); 1972 bh = sb_bread(sb, son_number);
1973 reiserfs_write_lock(sb);
1969 if (!bh) 1974 if (!bh)
1970 return IO_ERROR; 1975 return IO_ERROR;
1971 if (FILESYSTEM_CHANGED_TB(tb)) { 1976 if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2003,7 +2008,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
2003 child_position = 2008 child_position =
2004 (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; 2009 (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
2005 son_number = B_N_CHILD_NUM(tb->FR[h], child_position); 2010 son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
2011 reiserfs_write_unlock(sb);
2006 bh = sb_bread(sb, son_number); 2012 bh = sb_bread(sb, son_number);
2013 reiserfs_write_lock(sb);
2007 if (!bh) 2014 if (!bh)
2008 return IO_ERROR; 2015 return IO_ERROR;
2009 if (FILESYSTEM_CHANGED_TB(tb)) { 2016 if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2278,7 +2285,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2278 REPEAT_SEARCH : CARRY_ON; 2285 REPEAT_SEARCH : CARRY_ON;
2279 } 2286 }
2280#endif 2287#endif
2288 reiserfs_write_unlock(tb->tb_sb);
2281 __wait_on_buffer(locked); 2289 __wait_on_buffer(locked);
2290 reiserfs_write_lock(tb->tb_sb);
2282 if (FILESYSTEM_CHANGED_TB(tb)) 2291 if (FILESYSTEM_CHANGED_TB(tb))
2283 return REPEAT_SEARCH; 2292 return REPEAT_SEARCH;
2284 } 2293 }
@@ -2349,12 +2358,14 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2349 2358
2350 /* if it possible in indirect_to_direct conversion */ 2359 /* if it possible in indirect_to_direct conversion */
2351 if (buffer_locked(tbS0)) { 2360 if (buffer_locked(tbS0)) {
2361 reiserfs_write_unlock(tb->tb_sb);
2352 __wait_on_buffer(tbS0); 2362 __wait_on_buffer(tbS0);
2363 reiserfs_write_lock(tb->tb_sb);
2353 if (FILESYSTEM_CHANGED_TB(tb)) 2364 if (FILESYSTEM_CHANGED_TB(tb))
2354 return REPEAT_SEARCH; 2365 return REPEAT_SEARCH;
2355 } 2366 }
2356#ifdef CONFIG_REISERFS_CHECK 2367#ifdef CONFIG_REISERFS_CHECK
2357 if (cur_tb) { 2368 if (REISERFS_SB(tb->tb_sb)->cur_tb) {
2358 print_cur_tb("fix_nodes"); 2369 print_cur_tb("fix_nodes");
2359 reiserfs_panic(tb->tb_sb, "PAP-8305", 2370 reiserfs_panic(tb->tb_sb, "PAP-8305",
2360 "there is pending do_balance"); 2371 "there is pending do_balance");
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a14d6cd9eed..9087b10209e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -31,11 +31,12 @@ void reiserfs_delete_inode(struct inode *inode)
31 JOURNAL_PER_BALANCE_CNT * 2 + 31 JOURNAL_PER_BALANCE_CNT * 2 +
32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
33 struct reiserfs_transaction_handle th; 33 struct reiserfs_transaction_handle th;
34 int depth;
34 int err; 35 int err;
35 36
36 truncate_inode_pages(&inode->i_data, 0); 37 truncate_inode_pages(&inode->i_data, 0);
37 38
38 reiserfs_write_lock(inode->i_sb); 39 depth = reiserfs_write_lock_once(inode->i_sb);
39 40
40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
41 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
@@ -74,7 +75,7 @@ void reiserfs_delete_inode(struct inode *inode)
74 out: 75 out:
75 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 76 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
76 inode->i_blocks = 0; 77 inode->i_blocks = 0;
77 reiserfs_write_unlock(inode->i_sb); 78 reiserfs_write_unlock_once(inode->i_sb, depth);
78} 79}
79 80
80static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 81static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -251,7 +252,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
251 struct cpu_key key; 252 struct cpu_key key;
252 struct buffer_head *bh; 253 struct buffer_head *bh;
253 struct item_head *ih, tmp_ih; 254 struct item_head *ih, tmp_ih;
254 int fs_gen;
255 b_blocknr_t blocknr; 255 b_blocknr_t blocknr;
256 char *p = NULL; 256 char *p = NULL;
257 int chars; 257 int chars;
@@ -265,7 +265,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
265 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 265 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
266 3); 266 3);
267 267
268 research:
269 result = search_for_position_by_key(inode->i_sb, &key, &path); 268 result = search_for_position_by_key(inode->i_sb, &key, &path);
270 if (result != POSITION_FOUND) { 269 if (result != POSITION_FOUND) {
271 pathrelse(&path); 270 pathrelse(&path);
@@ -340,7 +339,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
340 } 339 }
341 // read file tail into part of page 340 // read file tail into part of page
342 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); 341 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
343 fs_gen = get_generation(inode->i_sb);
344 copy_item_head(&tmp_ih, ih); 342 copy_item_head(&tmp_ih, ih);
345 343
346 /* we only want to kmap if we are reading the tail into the page. 344 /* we only want to kmap if we are reading the tail into the page.
@@ -348,13 +346,9 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
348 ** sure we need to. But, this means the item might move if 346 ** sure we need to. But, this means the item might move if
349 ** kmap schedules 347 ** kmap schedules
350 */ 348 */
351 if (!p) { 349 if (!p)
352 p = (char *)kmap(bh_result->b_page); 350 p = (char *)kmap(bh_result->b_page);
353 if (fs_changed(fs_gen, inode->i_sb) 351
354 && item_moved(&tmp_ih, &path)) {
355 goto research;
356 }
357 }
358 p += offset; 352 p += offset;
359 memset(p, 0, inode->i_sb->s_blocksize); 353 memset(p, 0, inode->i_sb->s_blocksize);
360 do { 354 do {
@@ -489,10 +483,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
489 disappeared */ 483 disappeared */
490 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { 484 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
491 int err; 485 int err;
492 lock_kernel(); 486
487 reiserfs_write_lock(inode->i_sb);
488
493 err = reiserfs_commit_for_inode(inode); 489 err = reiserfs_commit_for_inode(inode);
494 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 490 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
495 unlock_kernel(); 491
492 reiserfs_write_unlock(inode->i_sb);
493
496 if (err < 0) 494 if (err < 0)
497 ret = err; 495 ret = err;
498 } 496 }
@@ -601,6 +599,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
601 __le32 *item; 599 __le32 *item;
602 int done; 600 int done;
603 int fs_gen; 601 int fs_gen;
602 int lock_depth;
604 struct reiserfs_transaction_handle *th = NULL; 603 struct reiserfs_transaction_handle *th = NULL;
605 /* space reserved in transaction batch: 604 /* space reserved in transaction batch:
606 . 3 balancings in direct->indirect conversion 605 . 3 balancings in direct->indirect conversion
@@ -616,12 +615,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
616 loff_t new_offset = 615 loff_t new_offset =
617 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; 616 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
618 617
619 /* bad.... */ 618 lock_depth = reiserfs_write_lock_once(inode->i_sb);
620 reiserfs_write_lock(inode->i_sb);
621 version = get_inode_item_key_version(inode); 619 version = get_inode_item_key_version(inode);
622 620
623 if (!file_capable(inode, block)) { 621 if (!file_capable(inode, block)) {
624 reiserfs_write_unlock(inode->i_sb); 622 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
625 return -EFBIG; 623 return -EFBIG;
626 } 624 }
627 625
@@ -633,7 +631,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
633 /* find number of block-th logical block of the file */ 631 /* find number of block-th logical block of the file */
634 ret = _get_block_create_0(inode, block, bh_result, 632 ret = _get_block_create_0(inode, block, bh_result,
635 create | GET_BLOCK_READ_DIRECT); 633 create | GET_BLOCK_READ_DIRECT);
636 reiserfs_write_unlock(inode->i_sb); 634 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
637 return ret; 635 return ret;
638 } 636 }
639 /* 637 /*
@@ -751,7 +749,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
751 if (!dangle && th) 749 if (!dangle && th)
752 retval = reiserfs_end_persistent_transaction(th); 750 retval = reiserfs_end_persistent_transaction(th);
753 751
754 reiserfs_write_unlock(inode->i_sb); 752 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
755 753
756 /* the item was found, so new blocks were not added to the file 754 /* the item was found, so new blocks were not added to the file
757 ** there is no need to make sure the inode is updated with this 755 ** there is no need to make sure the inode is updated with this
@@ -935,7 +933,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
935 if (blocks_needed == 1) { 933 if (blocks_needed == 1) {
936 un = &unf_single; 934 un = &unf_single;
937 } else { 935 } else {
938 un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. 936 un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
939 if (!un) { 937 if (!un) {
940 un = &unf_single; 938 un = &unf_single;
941 blocks_needed = 1; 939 blocks_needed = 1;
@@ -997,10 +995,16 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
997 if (retval) 995 if (retval)
998 goto failure; 996 goto failure;
999 } 997 }
1000 /* inserting indirect pointers for a hole can take a 998 /*
1001 ** long time. reschedule if needed 999 * inserting indirect pointers for a hole can take a
1000 * long time. reschedule if needed and also release the write
1001 * lock for others.
1002 */ 1002 */
1003 cond_resched(); 1003 if (need_resched()) {
1004 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
1005 schedule();
1006 lock_depth = reiserfs_write_lock_once(inode->i_sb);
1007 }
1004 1008
1005 retval = search_for_position_by_key(inode->i_sb, &key, &path); 1009 retval = search_for_position_by_key(inode->i_sb, &key, &path);
1006 if (retval == IO_ERROR) { 1010 if (retval == IO_ERROR) {
@@ -1035,7 +1039,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
1035 retval = err; 1039 retval = err;
1036 } 1040 }
1037 1041
1038 reiserfs_write_unlock(inode->i_sb); 1042 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
1039 reiserfs_check_path(&path); 1043 reiserfs_check_path(&path);
1040 return retval; 1044 return retval;
1041} 1045}
@@ -2072,8 +2076,9 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2072 int error; 2076 int error;
2073 struct buffer_head *bh = NULL; 2077 struct buffer_head *bh = NULL;
2074 int err2; 2078 int err2;
2079 int lock_depth;
2075 2080
2076 reiserfs_write_lock(inode->i_sb); 2081 lock_depth = reiserfs_write_lock_once(inode->i_sb);
2077 2082
2078 if (inode->i_size > 0) { 2083 if (inode->i_size > 0) {
2079 error = grab_tail_page(inode, &page, &bh); 2084 error = grab_tail_page(inode, &page, &bh);
@@ -2142,14 +2147,17 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2142 page_cache_release(page); 2147 page_cache_release(page);
2143 } 2148 }
2144 2149
2145 reiserfs_write_unlock(inode->i_sb); 2150 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2151
2146 return 0; 2152 return 0;
2147 out: 2153 out:
2148 if (page) { 2154 if (page) {
2149 unlock_page(page); 2155 unlock_page(page);
2150 page_cache_release(page); 2156 page_cache_release(page);
2151 } 2157 }
2152 reiserfs_write_unlock(inode->i_sb); 2158
2159 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2160
2153 return error; 2161 return error;
2154} 2162}
2155 2163
@@ -2531,6 +2539,12 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2531 return reiserfs_write_full_page(page, wbc); 2539 return reiserfs_write_full_page(page, wbc);
2532} 2540}
2533 2541
2542static void reiserfs_truncate_failed_write(struct inode *inode)
2543{
2544 truncate_inode_pages(inode->i_mapping, inode->i_size);
2545 reiserfs_truncate_file(inode, 0);
2546}
2547
2534static int reiserfs_write_begin(struct file *file, 2548static int reiserfs_write_begin(struct file *file,
2535 struct address_space *mapping, 2549 struct address_space *mapping,
2536 loff_t pos, unsigned len, unsigned flags, 2550 loff_t pos, unsigned len, unsigned flags,
@@ -2597,6 +2611,8 @@ static int reiserfs_write_begin(struct file *file,
2597 if (ret) { 2611 if (ret) {
2598 unlock_page(page); 2612 unlock_page(page);
2599 page_cache_release(page); 2613 page_cache_release(page);
2614 /* Truncate allocated blocks */
2615 reiserfs_truncate_failed_write(inode);
2600 } 2616 }
2601 return ret; 2617 return ret;
2602} 2618}
@@ -2608,7 +2624,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
2608 int ret; 2624 int ret;
2609 int old_ref = 0; 2625 int old_ref = 0;
2610 2626
2627 reiserfs_write_unlock(inode->i_sb);
2611 reiserfs_wait_on_write_block(inode->i_sb); 2628 reiserfs_wait_on_write_block(inode->i_sb);
2629 reiserfs_write_lock(inode->i_sb);
2630
2612 fix_tail_page_for_writing(page); 2631 fix_tail_page_for_writing(page);
2613 if (reiserfs_transaction_running(inode->i_sb)) { 2632 if (reiserfs_transaction_running(inode->i_sb)) {
2614 struct reiserfs_transaction_handle *th; 2633 struct reiserfs_transaction_handle *th;
@@ -2664,6 +2683,8 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2664 int update_sd = 0; 2683 int update_sd = 0;
2665 struct reiserfs_transaction_handle *th; 2684 struct reiserfs_transaction_handle *th;
2666 unsigned start; 2685 unsigned start;
2686 int lock_depth = 0;
2687 bool locked = false;
2667 2688
2668 if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) 2689 if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
2669 pos ++; 2690 pos ++;
@@ -2689,10 +2710,10 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2689 ** transaction tracking stuff when the size changes. So, we have 2710 ** transaction tracking stuff when the size changes. So, we have
2690 ** to do the i_size updates here. 2711 ** to do the i_size updates here.
2691 */ 2712 */
2692 pos += copied; 2713 if (pos + copied > inode->i_size) {
2693 if (pos > inode->i_size) {
2694 struct reiserfs_transaction_handle myth; 2714 struct reiserfs_transaction_handle myth;
2695 reiserfs_write_lock(inode->i_sb); 2715 lock_depth = reiserfs_write_lock_once(inode->i_sb);
2716 locked = true;
2696 /* If the file have grown beyond the border where it 2717 /* If the file have grown beyond the border where it
2697 can have a tail, unmark it as needing a tail 2718 can have a tail, unmark it as needing a tail
2698 packing */ 2719 packing */
@@ -2703,12 +2724,11 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2703 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 2724 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2704 2725
2705 ret = journal_begin(&myth, inode->i_sb, 1); 2726 ret = journal_begin(&myth, inode->i_sb, 1);
2706 if (ret) { 2727 if (ret)
2707 reiserfs_write_unlock(inode->i_sb);
2708 goto journal_error; 2728 goto journal_error;
2709 } 2729
2710 reiserfs_update_inode_transaction(inode); 2730 reiserfs_update_inode_transaction(inode);
2711 inode->i_size = pos; 2731 inode->i_size = pos + copied;
2712 /* 2732 /*
2713 * this will just nest into our transaction. It's important 2733 * this will just nest into our transaction. It's important
2714 * to use mark_inode_dirty so the inode gets pushed around on the 2734 * to use mark_inode_dirty so the inode gets pushed around on the
@@ -2718,34 +2738,40 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2718 reiserfs_update_sd(&myth, inode); 2738 reiserfs_update_sd(&myth, inode);
2719 update_sd = 1; 2739 update_sd = 1;
2720 ret = journal_end(&myth, inode->i_sb, 1); 2740 ret = journal_end(&myth, inode->i_sb, 1);
2721 reiserfs_write_unlock(inode->i_sb);
2722 if (ret) 2741 if (ret)
2723 goto journal_error; 2742 goto journal_error;
2724 } 2743 }
2725 if (th) { 2744 if (th) {
2726 reiserfs_write_lock(inode->i_sb); 2745 if (!locked) {
2746 lock_depth = reiserfs_write_lock_once(inode->i_sb);
2747 locked = true;
2748 }
2727 if (!update_sd) 2749 if (!update_sd)
2728 mark_inode_dirty(inode); 2750 mark_inode_dirty(inode);
2729 ret = reiserfs_end_persistent_transaction(th); 2751 ret = reiserfs_end_persistent_transaction(th);
2730 reiserfs_write_unlock(inode->i_sb);
2731 if (ret) 2752 if (ret)
2732 goto out; 2753 goto out;
2733 } 2754 }
2734 2755
2735 out: 2756 out:
2757 if (locked)
2758 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2736 unlock_page(page); 2759 unlock_page(page);
2737 page_cache_release(page); 2760 page_cache_release(page);
2761
2762 if (pos + len > inode->i_size)
2763 reiserfs_truncate_failed_write(inode);
2764
2738 return ret == 0 ? copied : ret; 2765 return ret == 0 ? copied : ret;
2739 2766
2740 journal_error: 2767 journal_error:
2768 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2769 locked = false;
2741 if (th) { 2770 if (th) {
2742 reiserfs_write_lock(inode->i_sb);
2743 if (!update_sd) 2771 if (!update_sd)
2744 reiserfs_update_sd(th, inode); 2772 reiserfs_update_sd(th, inode);
2745 ret = reiserfs_end_persistent_transaction(th); 2773 ret = reiserfs_end_persistent_transaction(th);
2746 reiserfs_write_unlock(inode->i_sb);
2747 } 2774 }
2748
2749 goto out; 2775 goto out;
2750} 2776}
2751 2777
@@ -2758,7 +2784,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2758 int update_sd = 0; 2784 int update_sd = 0;
2759 struct reiserfs_transaction_handle *th = NULL; 2785 struct reiserfs_transaction_handle *th = NULL;
2760 2786
2787 reiserfs_write_unlock(inode->i_sb);
2761 reiserfs_wait_on_write_block(inode->i_sb); 2788 reiserfs_wait_on_write_block(inode->i_sb);
2789 reiserfs_write_lock(inode->i_sb);
2790
2762 if (reiserfs_transaction_running(inode->i_sb)) { 2791 if (reiserfs_transaction_running(inode->i_sb)) {
2763 th = current->journal_info; 2792 th = current->journal_info;
2764 } 2793 }
@@ -2770,7 +2799,6 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2770 */ 2799 */
2771 if (pos > inode->i_size) { 2800 if (pos > inode->i_size) {
2772 struct reiserfs_transaction_handle myth; 2801 struct reiserfs_transaction_handle myth;
2773 reiserfs_write_lock(inode->i_sb);
2774 /* If the file have grown beyond the border where it 2802 /* If the file have grown beyond the border where it
2775 can have a tail, unmark it as needing a tail 2803 can have a tail, unmark it as needing a tail
2776 packing */ 2804 packing */
@@ -2781,10 +2809,9 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2781 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 2809 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2782 2810
2783 ret = journal_begin(&myth, inode->i_sb, 1); 2811 ret = journal_begin(&myth, inode->i_sb, 1);
2784 if (ret) { 2812 if (ret)
2785 reiserfs_write_unlock(inode->i_sb);
2786 goto journal_error; 2813 goto journal_error;
2787 } 2814
2788 reiserfs_update_inode_transaction(inode); 2815 reiserfs_update_inode_transaction(inode);
2789 inode->i_size = pos; 2816 inode->i_size = pos;
2790 /* 2817 /*
@@ -2796,16 +2823,13 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2796 reiserfs_update_sd(&myth, inode); 2823 reiserfs_update_sd(&myth, inode);
2797 update_sd = 1; 2824 update_sd = 1;
2798 ret = journal_end(&myth, inode->i_sb, 1); 2825 ret = journal_end(&myth, inode->i_sb, 1);
2799 reiserfs_write_unlock(inode->i_sb);
2800 if (ret) 2826 if (ret)
2801 goto journal_error; 2827 goto journal_error;
2802 } 2828 }
2803 if (th) { 2829 if (th) {
2804 reiserfs_write_lock(inode->i_sb);
2805 if (!update_sd) 2830 if (!update_sd)
2806 mark_inode_dirty(inode); 2831 mark_inode_dirty(inode);
2807 ret = reiserfs_end_persistent_transaction(th); 2832 ret = reiserfs_end_persistent_transaction(th);
2808 reiserfs_write_unlock(inode->i_sb);
2809 if (ret) 2833 if (ret)
2810 goto out; 2834 goto out;
2811 } 2835 }
@@ -2815,11 +2839,9 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2815 2839
2816 journal_error: 2840 journal_error:
2817 if (th) { 2841 if (th) {
2818 reiserfs_write_lock(inode->i_sb);
2819 if (!update_sd) 2842 if (!update_sd)
2820 reiserfs_update_sd(th, inode); 2843 reiserfs_update_sd(th, inode);
2821 ret = reiserfs_end_persistent_transaction(th); 2844 ret = reiserfs_end_persistent_transaction(th);
2822 reiserfs_write_unlock(inode->i_sb);
2823 } 2845 }
2824 2846
2825 return ret; 2847 return ret;
@@ -3040,13 +3062,14 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3040int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 3062int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3041{ 3063{
3042 struct inode *inode = dentry->d_inode; 3064 struct inode *inode = dentry->d_inode;
3043 int error;
3044 unsigned int ia_valid; 3065 unsigned int ia_valid;
3066 int depth;
3067 int error;
3045 3068
3046 /* must be turned off for recursive notify_change calls */ 3069 /* must be turned off for recursive notify_change calls */
3047 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3070 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3048 3071
3049 reiserfs_write_lock(inode->i_sb); 3072 depth = reiserfs_write_lock_once(inode->i_sb);
3050 if (attr->ia_valid & ATTR_SIZE) { 3073 if (attr->ia_valid & ATTR_SIZE) {
3051 /* version 2 items will be caught by the s_maxbytes check 3074 /* version 2 items will be caught by the s_maxbytes check
3052 ** done for us in vmtruncate 3075 ** done for us in vmtruncate
@@ -3127,8 +3150,17 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3127 journal_end(&th, inode->i_sb, jbegin_count); 3150 journal_end(&th, inode->i_sb, jbegin_count);
3128 } 3151 }
3129 } 3152 }
3130 if (!error) 3153 if (!error) {
3154 /*
3155 * Relax the lock here, as it might truncate the
3156 * inode pages and wait for inode pages locks.
3157 * To release such page lock, the owner needs the
3158 * reiserfs lock
3159 */
3160 reiserfs_write_unlock_once(inode->i_sb, depth);
3131 error = inode_setattr(inode, attr); 3161 error = inode_setattr(inode, attr);
3162 depth = reiserfs_write_lock_once(inode->i_sb);
3163 }
3132 } 3164 }
3133 3165
3134 if (!error && reiserfs_posixacl(inode->i_sb)) { 3166 if (!error && reiserfs_posixacl(inode->i_sb)) {
@@ -3137,7 +3169,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3137 } 3169 }
3138 3170
3139 out: 3171 out:
3140 reiserfs_write_unlock(inode->i_sb); 3172 reiserfs_write_unlock_once(inode->i_sb, depth);
3173
3141 return error; 3174 return error;
3142} 3175}
3143 3176
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 0ccc3fdda7b..f53505de071 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -13,44 +13,52 @@
13#include <linux/compat.h> 13#include <linux/compat.h>
14 14
15/* 15/*
16** reiserfs_ioctl - handler for ioctl for inode 16 * reiserfs_ioctl - handler for ioctl for inode
17** supported commands: 17 * supported commands:
18** 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect 18 * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
19** and prevent packing file (argument arg has to be non-zero) 19 * and prevent packing file (argument arg has to be non-zero)
20** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION 20 * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
21** 3) That's all for a while ... 21 * 3) That's all for a while ...
22*/ 22 */
23int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 23long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
24 unsigned long arg)
25{ 24{
25 struct inode *inode = filp->f_path.dentry->d_inode;
26 unsigned int flags; 26 unsigned int flags;
27 int err = 0; 27 int err = 0;
28 28
29 reiserfs_write_lock(inode->i_sb);
30
29 switch (cmd) { 31 switch (cmd) {
30 case REISERFS_IOC_UNPACK: 32 case REISERFS_IOC_UNPACK:
31 if (S_ISREG(inode->i_mode)) { 33 if (S_ISREG(inode->i_mode)) {
32 if (arg) 34 if (arg)
33 return reiserfs_unpack(inode, filp); 35 err = reiserfs_unpack(inode, filp);
34 else
35 return 0;
36 } else 36 } else
37 return -ENOTTY; 37 err = -ENOTTY;
38 /* following two cases are taken from fs/ext2/ioctl.c by Remy 38 break;
39 Card (card@masi.ibp.fr) */ 39 /*
40 * following two cases are taken from fs/ext2/ioctl.c by Remy
41 * Card (card@masi.ibp.fr)
42 */
40 case REISERFS_IOC_GETFLAGS: 43 case REISERFS_IOC_GETFLAGS:
41 if (!reiserfs_attrs(inode->i_sb)) 44 if (!reiserfs_attrs(inode->i_sb)) {
42 return -ENOTTY; 45 err = -ENOTTY;
46 break;
47 }
43 48
44 flags = REISERFS_I(inode)->i_attrs; 49 flags = REISERFS_I(inode)->i_attrs;
45 i_attrs_to_sd_attrs(inode, (__u16 *) & flags); 50 i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
46 return put_user(flags, (int __user *)arg); 51 err = put_user(flags, (int __user *)arg);
52 break;
47 case REISERFS_IOC_SETFLAGS:{ 53 case REISERFS_IOC_SETFLAGS:{
48 if (!reiserfs_attrs(inode->i_sb)) 54 if (!reiserfs_attrs(inode->i_sb)) {
49 return -ENOTTY; 55 err = -ENOTTY;
56 break;
57 }
50 58
51 err = mnt_want_write(filp->f_path.mnt); 59 err = mnt_want_write(filp->f_path.mnt);
52 if (err) 60 if (err)
53 return err; 61 break;
54 62
55 if (!is_owner_or_cap(inode)) { 63 if (!is_owner_or_cap(inode)) {
56 err = -EPERM; 64 err = -EPERM;
@@ -90,16 +98,19 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
90 mark_inode_dirty(inode); 98 mark_inode_dirty(inode);
91setflags_out: 99setflags_out:
92 mnt_drop_write(filp->f_path.mnt); 100 mnt_drop_write(filp->f_path.mnt);
93 return err; 101 break;
94 } 102 }
95 case REISERFS_IOC_GETVERSION: 103 case REISERFS_IOC_GETVERSION:
96 return put_user(inode->i_generation, (int __user *)arg); 104 err = put_user(inode->i_generation, (int __user *)arg);
105 break;
97 case REISERFS_IOC_SETVERSION: 106 case REISERFS_IOC_SETVERSION:
98 if (!is_owner_or_cap(inode)) 107 if (!is_owner_or_cap(inode)) {
99 return -EPERM; 108 err = -EPERM;
109 break;
110 }
100 err = mnt_want_write(filp->f_path.mnt); 111 err = mnt_want_write(filp->f_path.mnt);
101 if (err) 112 if (err)
102 return err; 113 break;
103 if (get_user(inode->i_generation, (int __user *)arg)) { 114 if (get_user(inode->i_generation, (int __user *)arg)) {
104 err = -EFAULT; 115 err = -EFAULT;
105 goto setversion_out; 116 goto setversion_out;
@@ -108,19 +119,20 @@ setflags_out:
108 mark_inode_dirty(inode); 119 mark_inode_dirty(inode);
109setversion_out: 120setversion_out:
110 mnt_drop_write(filp->f_path.mnt); 121 mnt_drop_write(filp->f_path.mnt);
111 return err; 122 break;
112 default: 123 default:
113 return -ENOTTY; 124 err = -ENOTTY;
114 } 125 }
126
127 reiserfs_write_unlock(inode->i_sb);
128
129 return err;
115} 130}
116 131
117#ifdef CONFIG_COMPAT 132#ifdef CONFIG_COMPAT
118long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, 133long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
119 unsigned long arg) 134 unsigned long arg)
120{ 135{
121 struct inode *inode = file->f_path.dentry->d_inode;
122 int ret;
123
124 /* These are just misnamed, they actually get/put from/to user an int */ 136 /* These are just misnamed, they actually get/put from/to user an int */
125 switch (cmd) { 137 switch (cmd) {
126 case REISERFS_IOC32_UNPACK: 138 case REISERFS_IOC32_UNPACK:
@@ -141,10 +153,8 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
141 default: 153 default:
142 return -ENOIOCTLCMD; 154 return -ENOIOCTLCMD;
143 } 155 }
144 lock_kernel(); 156
145 ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); 157 return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
146 unlock_kernel();
147 return ret;
148} 158}
149#endif 159#endif
150 160
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 90622200b39..83ac4d3b3cb 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh)
429 clear_buffer_journal_restore_dirty(bh); 429 clear_buffer_journal_restore_dirty(bh);
430} 430}
431 431
432/* utility function to force a BUG if it is called without the big
433** kernel lock held. caller is the string printed just before calling BUG()
434*/
435void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
436{
437#ifdef CONFIG_SMP
438 if (current->lock_depth < 0) {
439 reiserfs_panic(sb, "journal-1", "%s called without kernel "
440 "lock held", caller);
441 }
442#else
443 ;
444#endif
445}
446
447/* return a cnode with same dev, block number and size in table, or null if not found */ 432/* return a cnode with same dev, block number and size in table, or null if not found */
448static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 433static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
449 super_block 434 super_block
@@ -556,7 +541,8 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
556static inline void lock_journal(struct super_block *sb) 541static inline void lock_journal(struct super_block *sb)
557{ 542{
558 PROC_INFO_INC(sb, journal.lock_journal); 543 PROC_INFO_INC(sb, journal.lock_journal);
559 mutex_lock(&SB_JOURNAL(sb)->j_mutex); 544
545 reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
560} 546}
561 547
562/* unlock the current transaction */ 548/* unlock the current transaction */
@@ -708,7 +694,9 @@ static void check_barrier_completion(struct super_block *s,
708 disable_barrier(s); 694 disable_barrier(s);
709 set_buffer_uptodate(bh); 695 set_buffer_uptodate(bh);
710 set_buffer_dirty(bh); 696 set_buffer_dirty(bh);
697 reiserfs_write_unlock(s);
711 sync_dirty_buffer(bh); 698 sync_dirty_buffer(bh);
699 reiserfs_write_lock(s);
712 } 700 }
713} 701}
714 702
@@ -996,8 +984,13 @@ static int reiserfs_async_progress_wait(struct super_block *s)
996{ 984{
997 DEFINE_WAIT(wait); 985 DEFINE_WAIT(wait);
998 struct reiserfs_journal *j = SB_JOURNAL(s); 986 struct reiserfs_journal *j = SB_JOURNAL(s);
999 if (atomic_read(&j->j_async_throttle)) 987
988 if (atomic_read(&j->j_async_throttle)) {
989 reiserfs_write_unlock(s);
1000 congestion_wait(BLK_RW_ASYNC, HZ / 10); 990 congestion_wait(BLK_RW_ASYNC, HZ / 10);
991 reiserfs_write_lock(s);
992 }
993
1001 return 0; 994 return 0;
1002} 995}
1003 996
@@ -1043,7 +1036,8 @@ static int flush_commit_list(struct super_block *s,
1043 } 1036 }
1044 1037
1045 /* make sure nobody is trying to flush this one at the same time */ 1038 /* make sure nobody is trying to flush this one at the same time */
1046 mutex_lock(&jl->j_commit_mutex); 1039 reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
1040
1047 if (!journal_list_still_alive(s, trans_id)) { 1041 if (!journal_list_still_alive(s, trans_id)) {
1048 mutex_unlock(&jl->j_commit_mutex); 1042 mutex_unlock(&jl->j_commit_mutex);
1049 goto put_jl; 1043 goto put_jl;
@@ -1061,12 +1055,17 @@ static int flush_commit_list(struct super_block *s,
1061 1055
1062 if (!list_empty(&jl->j_bh_list)) { 1056 if (!list_empty(&jl->j_bh_list)) {
1063 int ret; 1057 int ret;
1064 unlock_kernel(); 1058
1059 /*
1060 * We might sleep in numerous places inside
1061 * write_ordered_buffers. Relax the write lock.
1062 */
1063 reiserfs_write_unlock(s);
1065 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, 1064 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
1066 journal, jl, &jl->j_bh_list); 1065 journal, jl, &jl->j_bh_list);
1067 if (ret < 0 && retval == 0) 1066 if (ret < 0 && retval == 0)
1068 retval = ret; 1067 retval = ret;
1069 lock_kernel(); 1068 reiserfs_write_lock(s);
1070 } 1069 }
1071 BUG_ON(!list_empty(&jl->j_bh_list)); 1070 BUG_ON(!list_empty(&jl->j_bh_list));
1072 /* 1071 /*
@@ -1085,8 +1084,11 @@ static int flush_commit_list(struct super_block *s,
1085 SB_ONDISK_JOURNAL_SIZE(s); 1084 SB_ONDISK_JOURNAL_SIZE(s);
1086 tbh = journal_find_get_block(s, bn); 1085 tbh = journal_find_get_block(s, bn);
1087 if (tbh) { 1086 if (tbh) {
1088 if (buffer_dirty(tbh)) 1087 if (buffer_dirty(tbh)) {
1089 ll_rw_block(WRITE, 1, &tbh) ; 1088 reiserfs_write_unlock(s);
1089 ll_rw_block(WRITE, 1, &tbh);
1090 reiserfs_write_lock(s);
1091 }
1090 put_bh(tbh) ; 1092 put_bh(tbh) ;
1091 } 1093 }
1092 } 1094 }
@@ -1114,12 +1116,19 @@ static int flush_commit_list(struct super_block *s,
1114 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 1116 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
1115 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); 1117 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
1116 tbh = journal_find_get_block(s, bn); 1118 tbh = journal_find_get_block(s, bn);
1119
1120 reiserfs_write_unlock(s);
1117 wait_on_buffer(tbh); 1121 wait_on_buffer(tbh);
1122 reiserfs_write_lock(s);
1118 // since we're using ll_rw_blk above, it might have skipped over 1123 // since we're using ll_rw_blk above, it might have skipped over
1119 // a locked buffer. Double check here 1124 // a locked buffer. Double check here
1120 // 1125 //
1121 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ 1126 /* redundant, sync_dirty_buffer() checks */
1127 if (buffer_dirty(tbh)) {
1128 reiserfs_write_unlock(s);
1122 sync_dirty_buffer(tbh); 1129 sync_dirty_buffer(tbh);
1130 reiserfs_write_lock(s);
1131 }
1123 if (unlikely(!buffer_uptodate(tbh))) { 1132 if (unlikely(!buffer_uptodate(tbh))) {
1124#ifdef CONFIG_REISERFS_CHECK 1133#ifdef CONFIG_REISERFS_CHECK
1125 reiserfs_warning(s, "journal-601", 1134 reiserfs_warning(s, "journal-601",
@@ -1143,10 +1152,15 @@ static int flush_commit_list(struct super_block *s,
1143 if (buffer_dirty(jl->j_commit_bh)) 1152 if (buffer_dirty(jl->j_commit_bh))
1144 BUG(); 1153 BUG();
1145 mark_buffer_dirty(jl->j_commit_bh) ; 1154 mark_buffer_dirty(jl->j_commit_bh) ;
1155 reiserfs_write_unlock(s);
1146 sync_dirty_buffer(jl->j_commit_bh) ; 1156 sync_dirty_buffer(jl->j_commit_bh) ;
1157 reiserfs_write_lock(s);
1147 } 1158 }
1148 } else 1159 } else {
1160 reiserfs_write_unlock(s);
1149 wait_on_buffer(jl->j_commit_bh); 1161 wait_on_buffer(jl->j_commit_bh);
1162 reiserfs_write_lock(s);
1163 }
1150 1164
1151 check_barrier_completion(s, jl->j_commit_bh); 1165 check_barrier_completion(s, jl->j_commit_bh);
1152 1166
@@ -1286,7 +1300,9 @@ static int _update_journal_header_block(struct super_block *sb,
1286 1300
1287 if (trans_id >= journal->j_last_flush_trans_id) { 1301 if (trans_id >= journal->j_last_flush_trans_id) {
1288 if (buffer_locked((journal->j_header_bh))) { 1302 if (buffer_locked((journal->j_header_bh))) {
1303 reiserfs_write_unlock(sb);
1289 wait_on_buffer((journal->j_header_bh)); 1304 wait_on_buffer((journal->j_header_bh));
1305 reiserfs_write_lock(sb);
1290 if (unlikely(!buffer_uptodate(journal->j_header_bh))) { 1306 if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
1291#ifdef CONFIG_REISERFS_CHECK 1307#ifdef CONFIG_REISERFS_CHECK
1292 reiserfs_warning(sb, "journal-699", 1308 reiserfs_warning(sb, "journal-699",
@@ -1312,12 +1328,16 @@ static int _update_journal_header_block(struct super_block *sb,
1312 disable_barrier(sb); 1328 disable_barrier(sb);
1313 goto sync; 1329 goto sync;
1314 } 1330 }
1331 reiserfs_write_unlock(sb);
1315 wait_on_buffer(journal->j_header_bh); 1332 wait_on_buffer(journal->j_header_bh);
1333 reiserfs_write_lock(sb);
1316 check_barrier_completion(sb, journal->j_header_bh); 1334 check_barrier_completion(sb, journal->j_header_bh);
1317 } else { 1335 } else {
1318 sync: 1336 sync:
1319 set_buffer_dirty(journal->j_header_bh); 1337 set_buffer_dirty(journal->j_header_bh);
1338 reiserfs_write_unlock(sb);
1320 sync_dirty_buffer(journal->j_header_bh); 1339 sync_dirty_buffer(journal->j_header_bh);
1340 reiserfs_write_lock(sb);
1321 } 1341 }
1322 if (!buffer_uptodate(journal->j_header_bh)) { 1342 if (!buffer_uptodate(journal->j_header_bh)) {
1323 reiserfs_warning(sb, "journal-837", 1343 reiserfs_warning(sb, "journal-837",
@@ -1409,7 +1429,7 @@ static int flush_journal_list(struct super_block *s,
1409 1429
1410 /* if flushall == 0, the lock is already held */ 1430 /* if flushall == 0, the lock is already held */
1411 if (flushall) { 1431 if (flushall) {
1412 mutex_lock(&journal->j_flush_mutex); 1432 reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1413 } else if (mutex_trylock(&journal->j_flush_mutex)) { 1433 } else if (mutex_trylock(&journal->j_flush_mutex)) {
1414 BUG(); 1434 BUG();
1415 } 1435 }
@@ -1553,7 +1573,11 @@ static int flush_journal_list(struct super_block *s,
1553 reiserfs_panic(s, "journal-1011", 1573 reiserfs_panic(s, "journal-1011",
1554 "cn->bh is NULL"); 1574 "cn->bh is NULL");
1555 } 1575 }
1576
1577 reiserfs_write_unlock(s);
1556 wait_on_buffer(cn->bh); 1578 wait_on_buffer(cn->bh);
1579 reiserfs_write_lock(s);
1580
1557 if (!cn->bh) { 1581 if (!cn->bh) {
1558 reiserfs_panic(s, "journal-1012", 1582 reiserfs_panic(s, "journal-1012",
1559 "cn->bh is NULL"); 1583 "cn->bh is NULL");
@@ -1769,7 +1793,7 @@ static int kupdate_transactions(struct super_block *s,
1769 struct reiserfs_journal *journal = SB_JOURNAL(s); 1793 struct reiserfs_journal *journal = SB_JOURNAL(s);
1770 chunk.nr = 0; 1794 chunk.nr = 0;
1771 1795
1772 mutex_lock(&journal->j_flush_mutex); 1796 reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1773 if (!journal_list_still_alive(s, orig_trans_id)) { 1797 if (!journal_list_still_alive(s, orig_trans_id)) {
1774 goto done; 1798 goto done;
1775 } 1799 }
@@ -1973,7 +1997,14 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1973 reiserfs_mounted_fs_count--; 1997 reiserfs_mounted_fs_count--;
1974 /* wait for all commits to finish */ 1998 /* wait for all commits to finish */
1975 cancel_delayed_work(&SB_JOURNAL(sb)->j_work); 1999 cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
2000
2001 /*
2002 * We must release the write lock here because
2003 * the workqueue job (flush_async_commit) needs this lock
2004 */
2005 reiserfs_write_unlock(sb);
1976 flush_workqueue(commit_wq); 2006 flush_workqueue(commit_wq);
2007
1977 if (!reiserfs_mounted_fs_count) { 2008 if (!reiserfs_mounted_fs_count) {
1978 destroy_workqueue(commit_wq); 2009 destroy_workqueue(commit_wq);
1979 commit_wq = NULL; 2010 commit_wq = NULL;
@@ -1981,6 +2012,8 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1981 2012
1982 free_journal_ram(sb); 2013 free_journal_ram(sb);
1983 2014
2015 reiserfs_write_lock(sb);
2016
1984 return 0; 2017 return 0;
1985} 2018}
1986 2019
@@ -2243,7 +2276,11 @@ static int journal_read_transaction(struct super_block *sb,
2243 /* read in the log blocks, memcpy to the corresponding real block */ 2276 /* read in the log blocks, memcpy to the corresponding real block */
2244 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); 2277 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
2245 for (i = 0; i < get_desc_trans_len(desc); i++) { 2278 for (i = 0; i < get_desc_trans_len(desc); i++) {
2279
2280 reiserfs_write_unlock(sb);
2246 wait_on_buffer(log_blocks[i]); 2281 wait_on_buffer(log_blocks[i]);
2282 reiserfs_write_lock(sb);
2283
2247 if (!buffer_uptodate(log_blocks[i])) { 2284 if (!buffer_uptodate(log_blocks[i])) {
2248 reiserfs_warning(sb, "journal-1212", 2285 reiserfs_warning(sb, "journal-1212",
2249 "REPLAY FAILURE fsck required! " 2286 "REPLAY FAILURE fsck required! "
@@ -2722,11 +2759,18 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2722 struct reiserfs_journal *journal; 2759 struct reiserfs_journal *journal;
2723 struct reiserfs_journal_list *jl; 2760 struct reiserfs_journal_list *jl;
2724 char b[BDEVNAME_SIZE]; 2761 char b[BDEVNAME_SIZE];
2762 int ret;
2725 2763
2764 /*
2765 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
2766 * dependency inversion warnings.
2767 */
2768 reiserfs_write_unlock(sb);
2726 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); 2769 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
2727 if (!journal) { 2770 if (!journal) {
2728 reiserfs_warning(sb, "journal-1256", 2771 reiserfs_warning(sb, "journal-1256",
2729 "unable to get memory for journal structure"); 2772 "unable to get memory for journal structure");
2773 reiserfs_write_lock(sb);
2730 return 1; 2774 return 1;
2731 } 2775 }
2732 memset(journal, 0, sizeof(struct reiserfs_journal)); 2776 memset(journal, 0, sizeof(struct reiserfs_journal));
@@ -2735,10 +2779,12 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2735 INIT_LIST_HEAD(&journal->j_working_list); 2779 INIT_LIST_HEAD(&journal->j_working_list);
2736 INIT_LIST_HEAD(&journal->j_journal_list); 2780 INIT_LIST_HEAD(&journal->j_journal_list);
2737 journal->j_persistent_trans = 0; 2781 journal->j_persistent_trans = 0;
2738 if (reiserfs_allocate_list_bitmaps(sb, 2782 ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
2739 journal->j_list_bitmap, 2783 reiserfs_bmap_count(sb));
2740 reiserfs_bmap_count(sb))) 2784 reiserfs_write_lock(sb);
2785 if (ret)
2741 goto free_and_return; 2786 goto free_and_return;
2787
2742 allocate_bitmap_nodes(sb); 2788 allocate_bitmap_nodes(sb);
2743 2789
2744 /* reserved for journal area support */ 2790 /* reserved for journal area support */
@@ -2765,11 +2811,27 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2765 goto free_and_return; 2811 goto free_and_return;
2766 } 2812 }
2767 2813
2814 /*
2815 * We need to unlock here to avoid creating the following
2816 * dependency:
2817 * reiserfs_lock -> sysfs_mutex
2818 * Because the reiserfs mmap path creates the following dependency:
2819 * mm->mmap -> reiserfs_lock, hence we have
2820 * mm->mmap -> reiserfs_lock ->sysfs_mutex
2821 * This would ends up in a circular dependency with sysfs readdir path
2822 * which does sysfs_mutex -> mm->mmap_sem
2823 * This is fine because the reiserfs lock is useless in mount path,
2824 * at least until we call journal_begin. We keep it for paranoid
2825 * reasons.
2826 */
2827 reiserfs_write_unlock(sb);
2768 if (journal_init_dev(sb, journal, j_dev_name) != 0) { 2828 if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2829 reiserfs_write_lock(sb);
2769 reiserfs_warning(sb, "sh-462", 2830 reiserfs_warning(sb, "sh-462",
2770 "unable to initialize jornal device"); 2831 "unable to initialize jornal device");
2771 goto free_and_return; 2832 goto free_and_return;
2772 } 2833 }
2834 reiserfs_write_lock(sb);
2773 2835
2774 rs = SB_DISK_SUPER_BLOCK(sb); 2836 rs = SB_DISK_SUPER_BLOCK(sb);
2775 2837
@@ -2881,8 +2943,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2881 } 2943 }
2882 2944
2883 reiserfs_mounted_fs_count++; 2945 reiserfs_mounted_fs_count++;
2884 if (reiserfs_mounted_fs_count <= 1) 2946 if (reiserfs_mounted_fs_count <= 1) {
2947 reiserfs_write_unlock(sb);
2885 commit_wq = create_workqueue("reiserfs"); 2948 commit_wq = create_workqueue("reiserfs");
2949 reiserfs_write_lock(sb);
2950 }
2886 2951
2887 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); 2952 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2888 journal->j_work_sb = sb; 2953 journal->j_work_sb = sb;
@@ -2964,8 +3029,11 @@ static void queue_log_writer(struct super_block *s)
2964 init_waitqueue_entry(&wait, current); 3029 init_waitqueue_entry(&wait, current);
2965 add_wait_queue(&journal->j_join_wait, &wait); 3030 add_wait_queue(&journal->j_join_wait, &wait);
2966 set_current_state(TASK_UNINTERRUPTIBLE); 3031 set_current_state(TASK_UNINTERRUPTIBLE);
2967 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) 3032 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
3033 reiserfs_write_unlock(s);
2968 schedule(); 3034 schedule();
3035 reiserfs_write_lock(s);
3036 }
2969 __set_current_state(TASK_RUNNING); 3037 __set_current_state(TASK_RUNNING);
2970 remove_wait_queue(&journal->j_join_wait, &wait); 3038 remove_wait_queue(&journal->j_join_wait, &wait);
2971} 3039}
@@ -2982,7 +3050,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
2982 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3050 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2983 unsigned long bcount = journal->j_bcount; 3051 unsigned long bcount = journal->j_bcount;
2984 while (1) { 3052 while (1) {
3053 reiserfs_write_unlock(sb);
2985 schedule_timeout_uninterruptible(1); 3054 schedule_timeout_uninterruptible(1);
3055 reiserfs_write_lock(sb);
2986 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 3056 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
2987 while ((atomic_read(&journal->j_wcount) > 0 || 3057 while ((atomic_read(&journal->j_wcount) > 0 ||
2988 atomic_read(&journal->j_jlock)) && 3058 atomic_read(&journal->j_jlock)) &&
@@ -3033,7 +3103,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3033 3103
3034 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 3104 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3035 unlock_journal(sb); 3105 unlock_journal(sb);
3106 reiserfs_write_unlock(sb);
3036 reiserfs_wait_on_write_block(sb); 3107 reiserfs_wait_on_write_block(sb);
3108 reiserfs_write_lock(sb);
3037 PROC_INFO_INC(sb, journal.journal_relock_writers); 3109 PROC_INFO_INC(sb, journal.journal_relock_writers);
3038 goto relock; 3110 goto relock;
3039 } 3111 }
@@ -3506,14 +3578,14 @@ static void flush_async_commits(struct work_struct *work)
3506 struct reiserfs_journal_list *jl; 3578 struct reiserfs_journal_list *jl;
3507 struct list_head *entry; 3579 struct list_head *entry;
3508 3580
3509 lock_kernel(); 3581 reiserfs_write_lock(sb);
3510 if (!list_empty(&journal->j_journal_list)) { 3582 if (!list_empty(&journal->j_journal_list)) {
3511 /* last entry is the youngest, commit it and you get everything */ 3583 /* last entry is the youngest, commit it and you get everything */
3512 entry = journal->j_journal_list.prev; 3584 entry = journal->j_journal_list.prev;
3513 jl = JOURNAL_LIST_ENTRY(entry); 3585 jl = JOURNAL_LIST_ENTRY(entry);
3514 flush_commit_list(sb, jl, 1); 3586 flush_commit_list(sb, jl, 1);
3515 } 3587 }
3516 unlock_kernel(); 3588 reiserfs_write_unlock(sb);
3517} 3589}
3518 3590
3519/* 3591/*
@@ -4041,7 +4113,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4041 * the new transaction is fully setup, and we've already flushed the 4113 * the new transaction is fully setup, and we've already flushed the
4042 * ordered bh list 4114 * ordered bh list
4043 */ 4115 */
4044 mutex_lock(&jl->j_commit_mutex); 4116 reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
4045 4117
4046 /* save the transaction id in case we need to commit it later */ 4118 /* save the transaction id in case we need to commit it later */
4047 commit_trans_id = jl->j_trans_id; 4119 commit_trans_id = jl->j_trans_id;
@@ -4156,7 +4228,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4156 next = cn->next; 4228 next = cn->next;
4157 free_cnode(sb, cn); 4229 free_cnode(sb, cn);
4158 cn = next; 4230 cn = next;
4231 reiserfs_write_unlock(sb);
4159 cond_resched(); 4232 cond_resched();
4233 reiserfs_write_lock(sb);
4160 } 4234 }
4161 4235
4162 /* we are done with both the c_bh and d_bh, but 4236 /* we are done with both the c_bh and d_bh, but
@@ -4203,10 +4277,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4203 * is lost. 4277 * is lost.
4204 */ 4278 */
4205 if (!list_empty(&jl->j_tail_bh_list)) { 4279 if (!list_empty(&jl->j_tail_bh_list)) {
4206 unlock_kernel(); 4280 reiserfs_write_unlock(sb);
4207 write_ordered_buffers(&journal->j_dirty_buffers_lock, 4281 write_ordered_buffers(&journal->j_dirty_buffers_lock,
4208 journal, jl, &jl->j_tail_bh_list); 4282 journal, jl, &jl->j_tail_bh_list);
4209 lock_kernel(); 4283 reiserfs_write_lock(sb);
4210 } 4284 }
4211 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4285 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4212 mutex_unlock(&jl->j_commit_mutex); 4286 mutex_unlock(&jl->j_commit_mutex);
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
new file mode 100644
index 00000000000..b87aa2c1afc
--- /dev/null
+++ b/fs/reiserfs/lock.c
@@ -0,0 +1,97 @@
1#include <linux/reiserfs_fs.h>
2#include <linux/mutex.h>
3
4/*
5 * The previous reiserfs locking scheme was heavily based on
6 * the tricky properties of the Bkl:
7 *
8 * - it was acquired recursively by a same task
9 * - the performances relied on the release-while-schedule() property
10 *
11 * Now that we replace it by a mutex, we still want to keep the same
12 * recursive property to avoid big changes in the code structure.
13 * We use our own lock_owner here because the owner field on a mutex
14 * is only available in SMP or mutex debugging, also we only need this field
15 * for this mutex, no need for a system wide mutex facility.
16 *
17 * Also this lock is often released before a call that could block because
18 * reiserfs performances were partialy based on the release while schedule()
19 * property of the Bkl.
20 */
21void reiserfs_write_lock(struct super_block *s)
22{
23 struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
24
25 if (sb_i->lock_owner != current) {
26 mutex_lock(&sb_i->lock);
27 sb_i->lock_owner = current;
28 }
29
30 /* No need to protect it, only the current task touches it */
31 sb_i->lock_depth++;
32}
33
34void reiserfs_write_unlock(struct super_block *s)
35{
36 struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
37
38 /*
39 * Are we unlocking without even holding the lock?
40 * Such a situation must raise a BUG() if we don't want
41 * to corrupt the data.
42 */
43 BUG_ON(sb_i->lock_owner != current);
44
45 if (--sb_i->lock_depth == -1) {
46 sb_i->lock_owner = NULL;
47 mutex_unlock(&sb_i->lock);
48 }
49}
50
51/*
52 * If we already own the lock, just exit and don't increase the depth.
53 * Useful when we don't want to lock more than once.
54 *
55 * We always return the lock_depth we had before calling
56 * this function.
57 */
58int reiserfs_write_lock_once(struct super_block *s)
59{
60 struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
61
62 if (sb_i->lock_owner != current) {
63 mutex_lock(&sb_i->lock);
64 sb_i->lock_owner = current;
65 return sb_i->lock_depth++;
66 }
67
68 return sb_i->lock_depth;
69}
70
71void reiserfs_write_unlock_once(struct super_block *s, int lock_depth)
72{
73 if (lock_depth == -1)
74 reiserfs_write_unlock(s);
75}
76
77/*
78 * Utility function to force a BUG if it is called without the superblock
79 * write lock held. caller is the string printed just before calling BUG()
80 */
81void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
82{
83 struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
84
85 if (sb_i->lock_depth < 0)
86 reiserfs_panic(sb, "%s called without kernel lock held %d",
87 caller);
88}
89
90#ifdef CONFIG_REISERFS_CHECK
91void reiserfs_lock_check_recursive(struct super_block *sb)
92{
93 struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
94
95 WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
96}
97#endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 27157912863..9d4dcf0b07c 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -324,6 +324,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
324 struct nameidata *nd) 324 struct nameidata *nd)
325{ 325{
326 int retval; 326 int retval;
327 int lock_depth;
327 struct inode *inode = NULL; 328 struct inode *inode = NULL;
328 struct reiserfs_dir_entry de; 329 struct reiserfs_dir_entry de;
329 INITIALIZE_PATH(path_to_entry); 330 INITIALIZE_PATH(path_to_entry);
@@ -331,7 +332,13 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
331 if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len) 332 if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
332 return ERR_PTR(-ENAMETOOLONG); 333 return ERR_PTR(-ENAMETOOLONG);
333 334
334 reiserfs_write_lock(dir->i_sb); 335 /*
336 * Might be called with or without the write lock, must be careful
337 * to not recursively hold it in case we want to release the lock
338 * before rescheduling.
339 */
340 lock_depth = reiserfs_write_lock_once(dir->i_sb);
341
335 de.de_gen_number_bit_string = NULL; 342 de.de_gen_number_bit_string = NULL;
336 retval = 343 retval =
337 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, 344 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
@@ -341,7 +348,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
341 inode = reiserfs_iget(dir->i_sb, 348 inode = reiserfs_iget(dir->i_sb,
342 (struct cpu_key *)&(de.de_dir_id)); 349 (struct cpu_key *)&(de.de_dir_id));
343 if (!inode || IS_ERR(inode)) { 350 if (!inode || IS_ERR(inode)) {
344 reiserfs_write_unlock(dir->i_sb); 351 reiserfs_write_unlock_once(dir->i_sb, lock_depth);
345 return ERR_PTR(-EACCES); 352 return ERR_PTR(-EACCES);
346 } 353 }
347 354
@@ -350,7 +357,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
350 if (IS_PRIVATE(dir)) 357 if (IS_PRIVATE(dir))
351 inode->i_flags |= S_PRIVATE; 358 inode->i_flags |= S_PRIVATE;
352 } 359 }
353 reiserfs_write_unlock(dir->i_sb); 360 reiserfs_write_unlock_once(dir->i_sb, lock_depth);
354 if (retval == IO_ERROR) { 361 if (retval == IO_ERROR) {
355 return ERR_PTR(-EIO); 362 return ERR_PTR(-EIO);
356 } 363 }
@@ -725,6 +732,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
725 struct inode *inode; 732 struct inode *inode;
726 struct reiserfs_transaction_handle th; 733 struct reiserfs_transaction_handle th;
727 struct reiserfs_security_handle security; 734 struct reiserfs_security_handle security;
735 int lock_depth;
728 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 736 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
729 int jbegin_count = 737 int jbegin_count =
730 JOURNAL_PER_BALANCE_CNT * 3 + 738 JOURNAL_PER_BALANCE_CNT * 3 +
@@ -748,7 +756,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
748 return retval; 756 return retval;
749 } 757 }
750 jbegin_count += retval; 758 jbegin_count += retval;
751 reiserfs_write_lock(dir->i_sb); 759 lock_depth = reiserfs_write_lock_once(dir->i_sb);
752 760
753 retval = journal_begin(&th, dir->i_sb, jbegin_count); 761 retval = journal_begin(&th, dir->i_sb, jbegin_count);
754 if (retval) { 762 if (retval) {
@@ -798,8 +806,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
798 d_instantiate(dentry, inode); 806 d_instantiate(dentry, inode);
799 unlock_new_inode(inode); 807 unlock_new_inode(inode);
800 retval = journal_end(&th, dir->i_sb, jbegin_count); 808 retval = journal_end(&th, dir->i_sb, jbegin_count);
801 out_failed: 809out_failed:
802 reiserfs_write_unlock(dir->i_sb); 810 reiserfs_write_unlock_once(dir->i_sb, lock_depth);
803 return retval; 811 return retval;
804} 812}
805 813
@@ -913,6 +921,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
913 struct reiserfs_transaction_handle th; 921 struct reiserfs_transaction_handle th;
914 int jbegin_count; 922 int jbegin_count;
915 unsigned long savelink; 923 unsigned long savelink;
924 int depth;
916 925
917 inode = dentry->d_inode; 926 inode = dentry->d_inode;
918 927
@@ -924,7 +933,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
924 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 933 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
925 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 934 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
926 935
927 reiserfs_write_lock(dir->i_sb); 936 depth = reiserfs_write_lock_once(dir->i_sb);
928 retval = journal_begin(&th, dir->i_sb, jbegin_count); 937 retval = journal_begin(&th, dir->i_sb, jbegin_count);
929 if (retval) 938 if (retval)
930 goto out_unlink; 939 goto out_unlink;
@@ -985,7 +994,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
985 994
986 retval = journal_end(&th, dir->i_sb, jbegin_count); 995 retval = journal_end(&th, dir->i_sb, jbegin_count);
987 reiserfs_check_path(&path); 996 reiserfs_check_path(&path);
988 reiserfs_write_unlock(dir->i_sb); 997 reiserfs_write_unlock_once(dir->i_sb, depth);
989 return retval; 998 return retval;
990 999
991 end_unlink: 1000 end_unlink:
@@ -995,7 +1004,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
995 if (err) 1004 if (err)
996 retval = err; 1005 retval = err;
997 out_unlink: 1006 out_unlink:
998 reiserfs_write_unlock(dir->i_sb); 1007 reiserfs_write_unlock_once(dir->i_sb, depth);
999 return retval; 1008 return retval;
1000} 1009}
1001 1010
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 536eacaeb71..adbc6f53851 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -349,10 +349,6 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
349 349
350 . */ 350 . */
351 351
352#ifdef CONFIG_REISERFS_CHECK
353extern struct tree_balance *cur_tb;
354#endif
355
356void __reiserfs_panic(struct super_block *sb, const char *id, 352void __reiserfs_panic(struct super_block *sb, const char *id,
357 const char *function, const char *fmt, ...) 353 const char *function, const char *fmt, ...)
358{ 354{
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 9229e5514a4..7a9981196c1 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -17,8 +17,6 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19 19
20#ifdef CONFIG_REISERFS_PROC_INFO
21
22/* 20/*
23 * LOCKING: 21 * LOCKING:
24 * 22 *
@@ -48,14 +46,6 @@ static int show_version(struct seq_file *m, struct super_block *sb)
48 return 0; 46 return 0;
49} 47}
50 48
51int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset,
52 int count, int *eof, void *data)
53{
54 *start = buffer;
55 *eof = 1;
56 return 0;
57}
58
59#define SF( x ) ( r -> x ) 49#define SF( x ) ( r -> x )
60#define SFP( x ) SF( s_proc_info_data.x ) 50#define SFP( x ) SF( s_proc_info_data.x )
61#define SFPL( x ) SFP( x[ level ] ) 51#define SFPL( x ) SFP( x[ level ] )
@@ -538,19 +528,6 @@ int reiserfs_proc_info_done(struct super_block *sb)
538 return 0; 528 return 0;
539} 529}
540 530
541struct proc_dir_entry *reiserfs_proc_register_global(char *name,
542 read_proc_t * func)
543{
544 return (proc_info_root) ? create_proc_read_entry(name, 0,
545 proc_info_root,
546 func, NULL) : NULL;
547}
548
549void reiserfs_proc_unregister_global(const char *name)
550{
551 remove_proc_entry(name, proc_info_root);
552}
553
554int reiserfs_proc_info_global_init(void) 531int reiserfs_proc_info_global_init(void)
555{ 532{
556 if (proc_info_root == NULL) { 533 if (proc_info_root == NULL) {
@@ -572,48 +549,6 @@ int reiserfs_proc_info_global_done(void)
572 } 549 }
573 return 0; 550 return 0;
574} 551}
575
576/* REISERFS_PROC_INFO */
577#else
578
579int reiserfs_proc_info_init(struct super_block *sb)
580{
581 return 0;
582}
583int reiserfs_proc_info_done(struct super_block *sb)
584{
585 return 0;
586}
587
588struct proc_dir_entry *reiserfs_proc_register_global(char *name,
589 read_proc_t * func)
590{
591 return NULL;
592}
593
594void reiserfs_proc_unregister_global(const char *name)
595{;
596}
597
598int reiserfs_proc_info_global_init(void)
599{
600 return 0;
601}
602int reiserfs_proc_info_global_done(void)
603{
604 return 0;
605}
606
607int reiserfs_global_version_in_proc(char *buffer, char **start,
608 off_t offset,
609 int count, int *eof, void *data)
610{
611 return 0;
612}
613
614/* REISERFS_PROC_INFO */
615#endif
616
617/* 552/*
618 * Revision 1.1.8.2 2001/07/15 17:08:42 god 553 * Revision 1.1.8.2 2001/07/15 17:08:42 god
619 * . use get_super() in procfs.c 554 * . use get_super() in procfs.c
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 18b315d3d10..b3a94d20f0f 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
141 141
142 set_buffer_uptodate(bh); 142 set_buffer_uptodate(bh);
143 mark_buffer_dirty(bh); 143 mark_buffer_dirty(bh);
144 reiserfs_write_unlock(s);
144 sync_dirty_buffer(bh); 145 sync_dirty_buffer(bh);
146 reiserfs_write_lock(s);
145 // update bitmap_info stuff 147 // update bitmap_info stuff
146 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; 148 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
147 brelse(bh); 149 brelse(bh);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index d036ee5b1c8..5fa7118f04e 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -222,9 +222,6 @@ static inline int bin_search(const void *key, /* Key to search for. */
222 return ITEM_NOT_FOUND; 222 return ITEM_NOT_FOUND;
223} 223}
224 224
225#ifdef CONFIG_REISERFS_CHECK
226extern struct tree_balance *cur_tb;
227#endif
228 225
229/* Minimal possible key. It is never in the tree. */ 226/* Minimal possible key. It is never in the tree. */
230const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; 227const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
@@ -519,25 +516,48 @@ static int is_tree_node(struct buffer_head *bh, int level)
519 516
520#define SEARCH_BY_KEY_READA 16 517#define SEARCH_BY_KEY_READA 16
521 518
522/* The function is NOT SCHEDULE-SAFE! */ 519/*
523static void search_by_key_reada(struct super_block *s, 520 * The function is NOT SCHEDULE-SAFE!
521 * It might unlock the write lock if we needed to wait for a block
522 * to be read. Note that in this case it won't recover the lock to avoid
523 * high contention resulting from too much lock requests, especially
524 * the caller (search_by_key) will perform other schedule-unsafe
525 * operations just after calling this function.
526 *
527 * @return true if we have unlocked
528 */
529static bool search_by_key_reada(struct super_block *s,
524 struct buffer_head **bh, 530 struct buffer_head **bh,
525 b_blocknr_t *b, int num) 531 b_blocknr_t *b, int num)
526{ 532{
527 int i, j; 533 int i, j;
534 bool unlocked = false;
528 535
529 for (i = 0; i < num; i++) { 536 for (i = 0; i < num; i++) {
530 bh[i] = sb_getblk(s, b[i]); 537 bh[i] = sb_getblk(s, b[i]);
531 } 538 }
539 /*
540 * We are going to read some blocks on which we
541 * have a reference. It's safe, though we might be
542 * reading blocks concurrently changed if we release
543 * the lock. But it's still fine because we check later
544 * if the tree changed
545 */
532 for (j = 0; j < i; j++) { 546 for (j = 0; j < i; j++) {
533 /* 547 /*
534 * note, this needs attention if we are getting rid of the BKL 548 * note, this needs attention if we are getting rid of the BKL
535 * you have to make sure the prepared bit isn't set on this buffer 549 * you have to make sure the prepared bit isn't set on this buffer
536 */ 550 */
537 if (!buffer_uptodate(bh[j])) 551 if (!buffer_uptodate(bh[j])) {
552 if (!unlocked) {
553 reiserfs_write_unlock(s);
554 unlocked = true;
555 }
538 ll_rw_block(READA, 1, bh + j); 556 ll_rw_block(READA, 1, bh + j);
557 }
539 brelse(bh[j]); 558 brelse(bh[j]);
540 } 559 }
560 return unlocked;
541} 561}
542 562
543/************************************************************************** 563/**************************************************************************
@@ -625,11 +645,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
625 have a pointer to it. */ 645 have a pointer to it. */
626 if ((bh = last_element->pe_buffer = 646 if ((bh = last_element->pe_buffer =
627 sb_getblk(sb, block_number))) { 647 sb_getblk(sb, block_number))) {
648 bool unlocked = false;
649
628 if (!buffer_uptodate(bh) && reada_count > 1) 650 if (!buffer_uptodate(bh) && reada_count > 1)
629 search_by_key_reada(sb, reada_bh, 651 /* may unlock the write lock */
652 unlocked = search_by_key_reada(sb, reada_bh,
630 reada_blocks, reada_count); 653 reada_blocks, reada_count);
654 /*
655 * If we haven't already unlocked the write lock,
656 * then we need to do that here before reading
657 * the current block
658 */
659 if (!buffer_uptodate(bh) && !unlocked) {
660 reiserfs_write_unlock(sb);
661 unlocked = true;
662 }
631 ll_rw_block(READ, 1, &bh); 663 ll_rw_block(READ, 1, &bh);
632 wait_on_buffer(bh); 664 wait_on_buffer(bh);
665
666 if (unlocked)
667 reiserfs_write_lock(sb);
633 if (!buffer_uptodate(bh)) 668 if (!buffer_uptodate(bh))
634 goto io_error; 669 goto io_error;
635 } else { 670 } else {
@@ -673,7 +708,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
673 !key_in_buffer(search_path, key, sb), 708 !key_in_buffer(search_path, key, sb),
674 "PAP-5130: key is not in the buffer"); 709 "PAP-5130: key is not in the buffer");
675#ifdef CONFIG_REISERFS_CHECK 710#ifdef CONFIG_REISERFS_CHECK
676 if (cur_tb) { 711 if (REISERFS_SB(sb)->cur_tb) {
677 print_cur_tb("5140"); 712 print_cur_tb("5140");
678 reiserfs_panic(sb, "PAP-5140", 713 reiserfs_panic(sb, "PAP-5140",
679 "schedule occurred in do_balance!"); 714 "schedule occurred in do_balance!");
@@ -1024,7 +1059,9 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1024 reiserfs_free_block(th, inode, block, 1); 1059 reiserfs_free_block(th, inode, block, 1);
1025 } 1060 }
1026 1061
1062 reiserfs_write_unlock(sb);
1027 cond_resched(); 1063 cond_resched();
1064 reiserfs_write_lock(sb);
1028 1065
1029 if (item_moved (&s_ih, path)) { 1066 if (item_moved (&s_ih, path)) {
1030 need_re_search = 1; 1067 need_re_search = 1;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f0ad05f3802..b4a7dd03bdb 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s)
465 struct reiserfs_transaction_handle th; 465 struct reiserfs_transaction_handle th;
466 th.t_trans_id = 0; 466 th.t_trans_id = 0;
467 467
468 lock_kernel(); 468 reiserfs_write_lock(s);
469 469
470 if (s->s_dirt) 470 if (s->s_dirt)
471 reiserfs_write_super(s); 471 reiserfs_write_super(s);
@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s)
499 499
500 reiserfs_proc_info_done(s); 500 reiserfs_proc_info_done(s);
501 501
502 reiserfs_write_unlock(s);
503 mutex_destroy(&REISERFS_SB(s)->lock);
502 kfree(s->s_fs_info); 504 kfree(s->s_fs_info);
503 s->s_fs_info = NULL; 505 s->s_fs_info = NULL;
504
505 unlock_kernel();
506} 506}
507 507
508static struct kmem_cache *reiserfs_inode_cachep; 508static struct kmem_cache *reiserfs_inode_cachep;
@@ -554,25 +554,28 @@ static void reiserfs_dirty_inode(struct inode *inode)
554 struct reiserfs_transaction_handle th; 554 struct reiserfs_transaction_handle th;
555 555
556 int err = 0; 556 int err = 0;
557 int lock_depth;
558
557 if (inode->i_sb->s_flags & MS_RDONLY) { 559 if (inode->i_sb->s_flags & MS_RDONLY) {
558 reiserfs_warning(inode->i_sb, "clm-6006", 560 reiserfs_warning(inode->i_sb, "clm-6006",
559 "writing inode %lu on readonly FS", 561 "writing inode %lu on readonly FS",
560 inode->i_ino); 562 inode->i_ino);
561 return; 563 return;
562 } 564 }
563 reiserfs_write_lock(inode->i_sb); 565 lock_depth = reiserfs_write_lock_once(inode->i_sb);
564 566
565 /* this is really only used for atime updates, so they don't have 567 /* this is really only used for atime updates, so they don't have
566 ** to be included in O_SYNC or fsync 568 ** to be included in O_SYNC or fsync
567 */ 569 */
568 err = journal_begin(&th, inode->i_sb, 1); 570 err = journal_begin(&th, inode->i_sb, 1);
569 if (err) { 571 if (err)
570 reiserfs_write_unlock(inode->i_sb); 572 goto out;
571 return; 573
572 }
573 reiserfs_update_sd(&th, inode); 574 reiserfs_update_sd(&th, inode);
574 journal_end(&th, inode->i_sb, 1); 575 journal_end(&th, inode->i_sb, 1);
575 reiserfs_write_unlock(inode->i_sb); 576
577out:
578 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
576} 579}
577 580
578#ifdef CONFIG_QUOTA 581#ifdef CONFIG_QUOTA
@@ -1168,11 +1171,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1168 unsigned int qfmt = 0; 1171 unsigned int qfmt = 0;
1169#ifdef CONFIG_QUOTA 1172#ifdef CONFIG_QUOTA
1170 int i; 1173 int i;
1174#endif
1175
1176 reiserfs_write_lock(s);
1171 1177
1178#ifdef CONFIG_QUOTA
1172 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); 1179 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
1173#endif 1180#endif
1174 1181
1175 lock_kernel();
1176 rs = SB_DISK_SUPER_BLOCK(s); 1182 rs = SB_DISK_SUPER_BLOCK(s);
1177 1183
1178 if (!reiserfs_parse_options 1184 if (!reiserfs_parse_options
@@ -1295,12 +1301,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1295 1301
1296out_ok: 1302out_ok:
1297 replace_mount_options(s, new_opts); 1303 replace_mount_options(s, new_opts);
1298 unlock_kernel(); 1304 reiserfs_write_unlock(s);
1299 return 0; 1305 return 0;
1300 1306
1301out_err: 1307out_err:
1302 kfree(new_opts); 1308 kfree(new_opts);
1303 unlock_kernel(); 1309 reiserfs_write_unlock(s);
1304 return err; 1310 return err;
1305} 1311}
1306 1312
@@ -1404,7 +1410,9 @@ static int read_super_block(struct super_block *s, int offset)
1404static int reread_meta_blocks(struct super_block *s) 1410static int reread_meta_blocks(struct super_block *s)
1405{ 1411{
1406 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); 1412 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
1413 reiserfs_write_unlock(s);
1407 wait_on_buffer(SB_BUFFER_WITH_SB(s)); 1414 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1415 reiserfs_write_lock(s);
1408 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1416 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1409 reiserfs_warning(s, "reiserfs-2504", "error reading the super"); 1417 reiserfs_warning(s, "reiserfs-2504", "error reading the super");
1410 return 1; 1418 return 1;
@@ -1613,7 +1621,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1613 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); 1621 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
1614 if (!sbi) { 1622 if (!sbi) {
1615 errval = -ENOMEM; 1623 errval = -ENOMEM;
1616 goto error; 1624 goto error_alloc;
1617 } 1625 }
1618 s->s_fs_info = sbi; 1626 s->s_fs_info = sbi;
1619 /* Set default values for options: non-aggressive tails, RO on errors */ 1627 /* Set default values for options: non-aggressive tails, RO on errors */
@@ -1627,6 +1635,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1627 /* setup default block allocator options */ 1635 /* setup default block allocator options */
1628 reiserfs_init_alloc_options(s); 1636 reiserfs_init_alloc_options(s);
1629 1637
1638 mutex_init(&REISERFS_SB(s)->lock);
1639 REISERFS_SB(s)->lock_depth = -1;
1640
1641 /*
1642 * This function is called with the bkl, which also was the old
1643 * locking used here.
1644 * do_journal_begin() will soon check if we hold the lock (ie: was the
1645 * bkl). This is likely because do_journal_begin() has several another
1646 * callers because at this time, it doesn't seem to be necessary to
1647 * protect against anything.
1648 * Anyway, let's be conservative and lock for now.
1649 */
1650 reiserfs_write_lock(s);
1651
1630 jdev_name = NULL; 1652 jdev_name = NULL;
1631 if (reiserfs_parse_options 1653 if (reiserfs_parse_options
1632 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1654 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
@@ -1852,9 +1874,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1852 init_waitqueue_head(&(sbi->s_wait)); 1874 init_waitqueue_head(&(sbi->s_wait));
1853 spin_lock_init(&sbi->bitmap_lock); 1875 spin_lock_init(&sbi->bitmap_lock);
1854 1876
1877 reiserfs_write_unlock(s);
1878
1855 return (0); 1879 return (0);
1856 1880
1857error: 1881error:
1882 reiserfs_write_unlock(s);
1883error_alloc:
1858 if (jinit_done) { /* kill the commit thread, free journal ram */ 1884 if (jinit_done) { /* kill the commit thread, free journal ram */
1859 journal_release_error(NULL, s); 1885 journal_release_error(NULL, s);
1860 } 1886 }
@@ -2196,8 +2222,6 @@ static int __init init_reiserfs_fs(void)
2196 } 2222 }
2197 2223
2198 reiserfs_proc_info_global_init(); 2224 reiserfs_proc_info_global_init();
2199 reiserfs_proc_register_global("version",
2200 reiserfs_global_version_in_proc);
2201 2225
2202 ret = register_filesystem(&reiserfs_fs_type); 2226 ret = register_filesystem(&reiserfs_fs_type);
2203 2227
@@ -2205,7 +2229,6 @@ static int __init init_reiserfs_fs(void)
2205 return 0; 2229 return 0;
2206 } 2230 }
2207 2231
2208 reiserfs_proc_unregister_global("version");
2209 reiserfs_proc_info_global_done(); 2232 reiserfs_proc_info_global_done();
2210 destroy_inodecache(); 2233 destroy_inodecache();
2211 2234
@@ -2214,7 +2237,6 @@ static int __init init_reiserfs_fs(void)
2214 2237
2215static void __exit exit_reiserfs_fs(void) 2238static void __exit exit_reiserfs_fs(void)
2216{ 2239{
2217 reiserfs_proc_unregister_global("version");
2218 reiserfs_proc_info_global_done(); 2240 reiserfs_proc_info_global_done();
2219 unregister_filesystem(&reiserfs_fs_type); 2241 unregister_filesystem(&reiserfs_fs_type);
2220 destroy_inodecache(); 2242 destroy_inodecache();
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 6925b835a43..81f09fab8ae 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -48,6 +48,7 @@
48#include <net/checksum.h> 48#include <net/checksum.h>
49#include <linux/stat.h> 49#include <linux/stat.h>
50#include <linux/quotaops.h> 50#include <linux/quotaops.h>
51#include <linux/security.h>
51 52
52#define PRIVROOT_NAME ".reiserfs_priv" 53#define PRIVROOT_NAME ".reiserfs_priv"
53#define XAROOT_NAME "xattrs" 54#define XAROOT_NAME "xattrs"
@@ -82,7 +83,8 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
82 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 83 BUG_ON(!mutex_is_locked(&dir->i_mutex));
83 vfs_dq_init(dir); 84 vfs_dq_init(dir);
84 85
85 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 86 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
87 I_MUTEX_CHILD, dir->i_sb);
86 error = dir->i_op->unlink(dir, dentry); 88 error = dir->i_op->unlink(dir, dentry);
87 mutex_unlock(&dentry->d_inode->i_mutex); 89 mutex_unlock(&dentry->d_inode->i_mutex);
88 90
@@ -97,7 +99,8 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
97 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 99 BUG_ON(!mutex_is_locked(&dir->i_mutex));
98 vfs_dq_init(dir); 100 vfs_dq_init(dir);
99 101
100 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 102 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
103 I_MUTEX_CHILD, dir->i_sb);
101 dentry_unhash(dentry); 104 dentry_unhash(dentry);
102 error = dir->i_op->rmdir(dir, dentry); 105 error = dir->i_op->rmdir(dir, dentry);
103 if (!error) 106 if (!error)
@@ -234,16 +237,22 @@ static int reiserfs_for_each_xattr(struct inode *inode,
234 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) 237 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
235 return 0; 238 return 0;
236 239
240 reiserfs_write_unlock(inode->i_sb);
237 dir = open_xa_dir(inode, XATTR_REPLACE); 241 dir = open_xa_dir(inode, XATTR_REPLACE);
238 if (IS_ERR(dir)) { 242 if (IS_ERR(dir)) {
239 err = PTR_ERR(dir); 243 err = PTR_ERR(dir);
244 reiserfs_write_lock(inode->i_sb);
240 goto out; 245 goto out;
241 } else if (!dir->d_inode) { 246 } else if (!dir->d_inode) {
242 err = 0; 247 err = 0;
248 reiserfs_write_lock(inode->i_sb);
243 goto out_dir; 249 goto out_dir;
244 } 250 }
245 251
246 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); 252 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
253
254 reiserfs_write_lock(inode->i_sb);
255
247 buf.xadir = dir; 256 buf.xadir = dir;
248 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); 257 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
249 while ((err == 0 || err == -ENOSPC) && buf.count) { 258 while ((err == 0 || err == -ENOSPC) && buf.count) {
@@ -282,8 +291,9 @@ static int reiserfs_for_each_xattr(struct inode *inode,
282 err = journal_begin(&th, inode->i_sb, blocks); 291 err = journal_begin(&th, inode->i_sb, blocks);
283 if (!err) { 292 if (!err) {
284 int jerror; 293 int jerror;
285 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, 294 reiserfs_mutex_lock_nested_safe(
286 I_MUTEX_XATTR); 295 &dir->d_parent->d_inode->i_mutex,
296 I_MUTEX_XATTR, inode->i_sb);
287 err = action(dir, data); 297 err = action(dir, data);
288 jerror = journal_end(&th, inode->i_sb, blocks); 298 jerror = journal_end(&th, inode->i_sb, blocks);
289 mutex_unlock(&dir->d_parent->d_inode->i_mutex); 299 mutex_unlock(&dir->d_parent->d_inode->i_mutex);
@@ -442,7 +452,9 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
442 } 452 }
443 453
444 if (dentry->d_inode) { 454 if (dentry->d_inode) {
455 reiserfs_write_lock(inode->i_sb);
445 err = xattr_unlink(xadir->d_inode, dentry); 456 err = xattr_unlink(xadir->d_inode, dentry);
457 reiserfs_write_unlock(inode->i_sb);
446 update_ctime(inode); 458 update_ctime(inode);
447 } 459 }
448 460
@@ -476,15 +488,24 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
476 if (get_inode_sd_version(inode) == STAT_DATA_V1) 488 if (get_inode_sd_version(inode) == STAT_DATA_V1)
477 return -EOPNOTSUPP; 489 return -EOPNOTSUPP;
478 490
479 if (!buffer) 491 reiserfs_write_unlock(inode->i_sb);
480 return lookup_and_delete_xattr(inode, name); 492
493 if (!buffer) {
494 err = lookup_and_delete_xattr(inode, name);
495 reiserfs_write_lock(inode->i_sb);
496 return err;
497 }
481 498
482 dentry = xattr_lookup(inode, name, flags); 499 dentry = xattr_lookup(inode, name, flags);
483 if (IS_ERR(dentry)) 500 if (IS_ERR(dentry)) {
501 reiserfs_write_lock(inode->i_sb);
484 return PTR_ERR(dentry); 502 return PTR_ERR(dentry);
503 }
485 504
486 down_write(&REISERFS_I(inode)->i_xattr_sem); 505 down_write(&REISERFS_I(inode)->i_xattr_sem);
487 506
507 reiserfs_write_lock(inode->i_sb);
508
488 xahash = xattr_hash(buffer, buffer_size); 509 xahash = xattr_hash(buffer, buffer_size);
489 while (buffer_pos < buffer_size || buffer_pos == 0) { 510 while (buffer_pos < buffer_size || buffer_pos == 0) {
490 size_t chunk; 511 size_t chunk;
@@ -539,8 +560,12 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
539 .ia_size = buffer_size, 560 .ia_size = buffer_size,
540 .ia_valid = ATTR_SIZE | ATTR_CTIME, 561 .ia_valid = ATTR_SIZE | ATTR_CTIME,
541 }; 562 };
563
564 reiserfs_write_unlock(inode->i_sb);
542 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); 565 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
543 down_write(&dentry->d_inode->i_alloc_sem); 566 down_write(&dentry->d_inode->i_alloc_sem);
567 reiserfs_write_lock(inode->i_sb);
568
544 err = reiserfs_setattr(dentry, &newattrs); 569 err = reiserfs_setattr(dentry, &newattrs);
545 up_write(&dentry->d_inode->i_alloc_sem); 570 up_write(&dentry->d_inode->i_alloc_sem);
546 mutex_unlock(&dentry->d_inode->i_mutex); 571 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -726,15 +751,14 @@ ssize_t
726reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, 751reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
727 size_t size) 752 size_t size)
728{ 753{
729 struct inode *inode = dentry->d_inode;
730 struct xattr_handler *handler; 754 struct xattr_handler *handler;
731 755
732 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 756 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
733 757
734 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 758 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
735 return -EOPNOTSUPP; 759 return -EOPNOTSUPP;
736 760
737 return handler->get(inode, name, buffer, size); 761 return handler->get(dentry, name, buffer, size, handler->flags);
738} 762}
739 763
740/* 764/*
@@ -746,15 +770,14 @@ int
746reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, 770reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
747 size_t size, int flags) 771 size_t size, int flags)
748{ 772{
749 struct inode *inode = dentry->d_inode;
750 struct xattr_handler *handler; 773 struct xattr_handler *handler;
751 774
752 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 775 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
753 776
754 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 777 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
755 return -EOPNOTSUPP; 778 return -EOPNOTSUPP;
756 779
757 return handler->set(inode, name, value, size, flags); 780 return handler->set(dentry, name, value, size, flags, handler->flags);
758} 781}
759 782
760/* 783/*
@@ -764,21 +787,20 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
764 */ 787 */
765int reiserfs_removexattr(struct dentry *dentry, const char *name) 788int reiserfs_removexattr(struct dentry *dentry, const char *name)
766{ 789{
767 struct inode *inode = dentry->d_inode;
768 struct xattr_handler *handler; 790 struct xattr_handler *handler;
769 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 791 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
770 792
771 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 793 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
772 return -EOPNOTSUPP; 794 return -EOPNOTSUPP;
773 795
774 return handler->set(inode, name, NULL, 0, XATTR_REPLACE); 796 return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
775} 797}
776 798
777struct listxattr_buf { 799struct listxattr_buf {
778 size_t size; 800 size_t size;
779 size_t pos; 801 size_t pos;
780 char *buf; 802 char *buf;
781 struct inode *inode; 803 struct dentry *dentry;
782}; 804};
783 805
784static int listxattr_filler(void *buf, const char *name, int namelen, 806static int listxattr_filler(void *buf, const char *name, int namelen,
@@ -789,17 +811,19 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
789 if (name[0] != '.' || 811 if (name[0] != '.' ||
790 (namelen != 1 && (name[1] != '.' || namelen != 2))) { 812 (namelen != 1 && (name[1] != '.' || namelen != 2))) {
791 struct xattr_handler *handler; 813 struct xattr_handler *handler;
792 handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr, 814 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
793 name); 815 name);
794 if (!handler) /* Unsupported xattr name */ 816 if (!handler) /* Unsupported xattr name */
795 return 0; 817 return 0;
796 if (b->buf) { 818 if (b->buf) {
797 size = handler->list(b->inode, b->buf + b->pos, 819 size = handler->list(b->dentry, b->buf + b->pos,
798 b->size, name, namelen); 820 b->size, name, namelen,
821 handler->flags);
799 if (size > b->size) 822 if (size > b->size)
800 return -ERANGE; 823 return -ERANGE;
801 } else { 824 } else {
802 size = handler->list(b->inode, NULL, 0, name, namelen); 825 size = handler->list(b->dentry, NULL, 0, name,
826 namelen, handler->flags);
803 } 827 }
804 828
805 b->pos += size; 829 b->pos += size;
@@ -820,7 +844,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
820 int err = 0; 844 int err = 0;
821 loff_t pos = 0; 845 loff_t pos = 0;
822 struct listxattr_buf buf = { 846 struct listxattr_buf buf = {
823 .inode = dentry->d_inode, 847 .dentry = dentry,
824 .buf = buffer, 848 .buf = buffer,
825 .size = buffer ? size : 0, 849 .size = buffer ? size : 0,
826 }; 850 };
@@ -975,7 +999,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
975 int err = 0; 999 int err = 0;
976 1000
977 /* If we don't have the privroot located yet - go find it */ 1001 /* If we don't have the privroot located yet - go find it */
978 mutex_lock(&s->s_root->d_inode->i_mutex); 1002 reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
979 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, 1003 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
980 strlen(PRIVROOT_NAME)); 1004 strlen(PRIVROOT_NAME));
981 if (!IS_ERR(dentry)) { 1005 if (!IS_ERR(dentry)) {
@@ -1004,14 +1028,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1004 goto error; 1028 goto error;
1005 1029
1006 if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { 1030 if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
1007 mutex_lock(&s->s_root->d_inode->i_mutex); 1031 reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
1008 err = create_privroot(REISERFS_SB(s)->priv_root); 1032 err = create_privroot(REISERFS_SB(s)->priv_root);
1009 mutex_unlock(&s->s_root->d_inode->i_mutex); 1033 mutex_unlock(&s->s_root->d_inode->i_mutex);
1010 } 1034 }
1011 1035
1012 if (privroot->d_inode) { 1036 if (privroot->d_inode) {
1013 s->s_xattr = reiserfs_xattr_handlers; 1037 s->s_xattr = reiserfs_xattr_handlers;
1014 mutex_lock(&privroot->d_inode->i_mutex); 1038 reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
1015 if (!REISERFS_SB(s)->xattr_root) { 1039 if (!REISERFS_SB(s)->xattr_root) {
1016 struct dentry *dentry; 1040 struct dentry *dentry;
1017 dentry = lookup_one_len(XAROOT_NAME, privroot, 1041 dentry = lookup_one_len(XAROOT_NAME, privroot,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 35d6e672a27..dd20a7883f0 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -15,8 +15,10 @@ static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
15 struct posix_acl *acl); 15 struct posix_acl *acl);
16 16
17static int 17static int
18xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) 18posix_acl_set(struct dentry *dentry, const char *name, const void *value,
19 size_t size, int flags, int type)
19{ 20{
21 struct inode *inode = dentry->d_inode;
20 struct posix_acl *acl; 22 struct posix_acl *acl;
21 int error, error2; 23 int error, error2;
22 struct reiserfs_transaction_handle th; 24 struct reiserfs_transaction_handle th;
@@ -60,15 +62,16 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
60} 62}
61 63
62static int 64static int
63xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 65posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
66 size_t size, int type)
64{ 67{
65 struct posix_acl *acl; 68 struct posix_acl *acl;
66 int error; 69 int error;
67 70
68 if (!reiserfs_posixacl(inode->i_sb)) 71 if (!reiserfs_posixacl(dentry->d_sb))
69 return -EOPNOTSUPP; 72 return -EOPNOTSUPP;
70 73
71 acl = reiserfs_get_acl(inode, type); 74 acl = reiserfs_get_acl(dentry->d_inode, type);
72 if (IS_ERR(acl)) 75 if (IS_ERR(acl))
73 return PTR_ERR(acl); 76 return PTR_ERR(acl);
74 if (acl == NULL) 77 if (acl == NULL)
@@ -452,7 +455,9 @@ int reiserfs_acl_chmod(struct inode *inode)
452 return 0; 455 return 0;
453 } 456 }
454 457
458 reiserfs_write_unlock(inode->i_sb);
455 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 459 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
460 reiserfs_write_lock(inode->i_sb);
456 if (!acl) 461 if (!acl)
457 return 0; 462 return 0;
458 if (IS_ERR(acl)) 463 if (IS_ERR(acl))
@@ -482,30 +487,12 @@ int reiserfs_acl_chmod(struct inode *inode)
482 return error; 487 return error;
483} 488}
484 489
485static int 490static size_t posix_acl_access_list(struct dentry *dentry, char *list,
486posix_acl_access_get(struct inode *inode, const char *name,
487 void *buffer, size_t size)
488{
489 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
490 return -EINVAL;
491 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
492}
493
494static int
495posix_acl_access_set(struct inode *inode, const char *name,
496 const void *value, size_t size, int flags)
497{
498 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
499 return -EINVAL;
500 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
501}
502
503static size_t posix_acl_access_list(struct inode *inode, char *list,
504 size_t list_size, const char *name, 491 size_t list_size, const char *name,
505 size_t name_len) 492 size_t name_len, int type)
506{ 493{
507 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 494 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
508 if (!reiserfs_posixacl(inode->i_sb)) 495 if (!reiserfs_posixacl(dentry->d_sb))
509 return 0; 496 return 0;
510 if (list && size <= list_size) 497 if (list && size <= list_size)
511 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 498 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -514,35 +501,18 @@ static size_t posix_acl_access_list(struct inode *inode, char *list,
514 501
515struct xattr_handler reiserfs_posix_acl_access_handler = { 502struct xattr_handler reiserfs_posix_acl_access_handler = {
516 .prefix = POSIX_ACL_XATTR_ACCESS, 503 .prefix = POSIX_ACL_XATTR_ACCESS,
517 .get = posix_acl_access_get, 504 .flags = ACL_TYPE_ACCESS,
518 .set = posix_acl_access_set, 505 .get = posix_acl_get,
506 .set = posix_acl_set,
519 .list = posix_acl_access_list, 507 .list = posix_acl_access_list,
520}; 508};
521 509
522static int 510static size_t posix_acl_default_list(struct dentry *dentry, char *list,
523posix_acl_default_get(struct inode *inode, const char *name,
524 void *buffer, size_t size)
525{
526 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
527 return -EINVAL;
528 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
529}
530
531static int
532posix_acl_default_set(struct inode *inode, const char *name,
533 const void *value, size_t size, int flags)
534{
535 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
536 return -EINVAL;
537 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
538}
539
540static size_t posix_acl_default_list(struct inode *inode, char *list,
541 size_t list_size, const char *name, 511 size_t list_size, const char *name,
542 size_t name_len) 512 size_t name_len, int type)
543{ 513{
544 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 514 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
545 if (!reiserfs_posixacl(inode->i_sb)) 515 if (!reiserfs_posixacl(dentry->d_sb))
546 return 0; 516 return 0;
547 if (list && size <= list_size) 517 if (list && size <= list_size)
548 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 518 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -551,7 +521,8 @@ static size_t posix_acl_default_list(struct inode *inode, char *list,
551 521
552struct xattr_handler reiserfs_posix_acl_default_handler = { 522struct xattr_handler reiserfs_posix_acl_default_handler = {
553 .prefix = POSIX_ACL_XATTR_DEFAULT, 523 .prefix = POSIX_ACL_XATTR_DEFAULT,
554 .get = posix_acl_default_get, 524 .flags = ACL_TYPE_DEFAULT,
555 .set = posix_acl_default_set, 525 .get = posix_acl_get,
526 .set = posix_acl_set,
556 .list = posix_acl_default_list, 527 .list = posix_acl_default_list,
557}; 528};
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index a92c8792c0f..d8b5bfcbdd3 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -8,36 +8,37 @@
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
11security_get(struct inode *inode, const char *name, void *buffer, size_t size) 11security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
12 int handler_flags)
12{ 13{
13 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 16
16 if (IS_PRIVATE(inode)) 17 if (IS_PRIVATE(dentry->d_inode))
17 return -EPERM; 18 return -EPERM;
18 19
19 return reiserfs_xattr_get(inode, name, buffer, size); 20 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
20} 21}
21 22
22static int 23static int
23security_set(struct inode *inode, const char *name, const void *buffer, 24security_set(struct dentry *dentry, const char *name, const void *buffer,
24 size_t size, int flags) 25 size_t size, int flags, int handler_flags)
25{ 26{
26 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
27 return -EINVAL; 28 return -EINVAL;
28 29
29 if (IS_PRIVATE(inode)) 30 if (IS_PRIVATE(dentry->d_inode))
30 return -EPERM; 31 return -EPERM;
31 32
32 return reiserfs_xattr_set(inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
33} 34}
34 35
35static size_t security_list(struct inode *inode, char *list, size_t list_len, 36static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
36 const char *name, size_t namelen) 37 const char *name, size_t namelen, int handler_flags)
37{ 38{
38 const size_t len = namelen + 1; 39 const size_t len = namelen + 1;
39 40
40 if (IS_PRIVATE(inode)) 41 if (IS_PRIVATE(dentry->d_inode))
41 return 0; 42 return 0;
42 43
43 if (list && len <= list_len) { 44 if (list && len <= list_len) {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a865042f75e..5b08aaca3da 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -8,36 +8,37 @@
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
11trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) 11trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
12 int handler_flags)
12{ 13{
13 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 16
16 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 17 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
17 return -EPERM; 18 return -EPERM;
18 19
19 return reiserfs_xattr_get(inode, name, buffer, size); 20 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
20} 21}
21 22
22static int 23static int
23trusted_set(struct inode *inode, const char *name, const void *buffer, 24trusted_set(struct dentry *dentry, const char *name, const void *buffer,
24 size_t size, int flags) 25 size_t size, int flags, int handler_flags)
25{ 26{
26 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
27 return -EINVAL; 28 return -EINVAL;
28 29
29 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 30 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
30 return -EPERM; 31 return -EPERM;
31 32
32 return reiserfs_xattr_set(inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
33} 34}
34 35
35static size_t trusted_list(struct inode *inode, char *list, size_t list_size, 36static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
36 const char *name, size_t name_len) 37 const char *name, size_t name_len, int handler_flags)
37{ 38{
38 const size_t len = name_len + 1; 39 const size_t len = name_len + 1;
39 40
40 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 41 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
41 return 0; 42 return 0;
42 43
43 if (list && len <= list_size) { 44 if (list && len <= list_size) {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index e3238dc4f3d..75d59c49b91 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -7,34 +7,35 @@
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9static int 9static int
10user_get(struct inode *inode, const char *name, void *buffer, size_t size) 10user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
11 int handler_flags)
11{ 12{
12 13
13 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 if (!reiserfs_xattrs_user(inode->i_sb)) 16 if (!reiserfs_xattrs_user(dentry->d_sb))
16 return -EOPNOTSUPP; 17 return -EOPNOTSUPP;
17 return reiserfs_xattr_get(inode, name, buffer, size); 18 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
18} 19}
19 20
20static int 21static int
21user_set(struct inode *inode, const char *name, const void *buffer, 22user_set(struct dentry *dentry, const char *name, const void *buffer,
22 size_t size, int flags) 23 size_t size, int flags, int handler_flags)
23{ 24{
24 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 25 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
25 return -EINVAL; 26 return -EINVAL;
26 27
27 if (!reiserfs_xattrs_user(inode->i_sb)) 28 if (!reiserfs_xattrs_user(dentry->d_sb))
28 return -EOPNOTSUPP; 29 return -EOPNOTSUPP;
29 return reiserfs_xattr_set(inode, name, buffer, size, flags); 30 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
30} 31}
31 32
32static size_t user_list(struct inode *inode, char *list, size_t list_size, 33static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
33 const char *name, size_t name_len) 34 const char *name, size_t name_len, int handler_flags)
34{ 35{
35 const size_t len = name_len + 1; 36 const size_t len = name_len + 1;
36 37
37 if (!reiserfs_xattrs_user(inode->i_sb)) 38 if (!reiserfs_xattrs_user(dentry->d_sb))
38 return 0; 39 return 0;
39 if (list && len <= list_size) { 40 if (list && len <= list_size) {
40 memcpy(list, name, name_len); 41 memcpy(list, name, name_len);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index b07565c9438..1dabe4ee02f 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -236,7 +236,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
236 * anon_inode_getfd() will install the fd. 236 * anon_inode_getfd() will install the fd.
237 */ 237 */
238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx, 238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
239 flags & (O_CLOEXEC | O_NONBLOCK)); 239 O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
240 if (ufd < 0) 240 if (ufd < 0)
241 kfree(ctx); 241 kfree(ctx);
242 } else { 242 } else {
diff --git a/fs/splice.c b/fs/splice.c
index 7394e9e1753..39208663aaf 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -648,9 +648,11 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
648 ret = buf->ops->confirm(pipe, buf); 648 ret = buf->ops->confirm(pipe, buf);
649 if (!ret) { 649 if (!ret) {
650 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 650 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
651 651 if (file->f_op && file->f_op->sendpage)
652 ret = file->f_op->sendpage(file, buf->page, buf->offset, 652 ret = file->f_op->sendpage(file, buf->page, buf->offset,
653 sd->len, &pos, more); 653 sd->len, &pos, more);
654 else
655 ret = -EINVAL;
654 } 656 }
655 657
656 return ret; 658 return ret;
@@ -1068,8 +1070,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
1068 if (unlikely(ret < 0)) 1070 if (unlikely(ret < 0))
1069 return ret; 1071 return ret;
1070 1072
1071 splice_write = out->f_op->splice_write; 1073 if (out->f_op && out->f_op->splice_write)
1072 if (!splice_write) 1074 splice_write = out->f_op->splice_write;
1075 else
1073 splice_write = default_file_splice_write; 1076 splice_write = default_file_splice_write;
1074 1077
1075 return splice_write(pipe, out, ppos, len, flags); 1078 return splice_write(pipe, out, ppos, len, flags);
@@ -1093,8 +1096,9 @@ static long do_splice_to(struct file *in, loff_t *ppos,
1093 if (unlikely(ret < 0)) 1096 if (unlikely(ret < 0))
1094 return ret; 1097 return ret;
1095 1098
1096 splice_read = in->f_op->splice_read; 1099 if (in->f_op && in->f_op->splice_read)
1097 if (!splice_read) 1100 splice_read = in->f_op->splice_read;
1101 else
1098 splice_read = default_file_splice_read; 1102 splice_read = default_file_splice_read;
1099 1103
1100 return splice_read(in, ppos, pipe, len, flags); 1104 return splice_read(in, ppos, pipe, len, flags);
@@ -1316,7 +1320,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1316 if (off_in) 1320 if (off_in)
1317 return -ESPIPE; 1321 return -ESPIPE;
1318 if (off_out) { 1322 if (off_out) {
1319 if (out->f_op->llseek == no_llseek) 1323 if (!out->f_op || !out->f_op->llseek ||
1324 out->f_op->llseek == no_llseek)
1320 return -EINVAL; 1325 return -EINVAL;
1321 if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1326 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1322 return -EFAULT; 1327 return -EFAULT;
@@ -1336,7 +1341,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1336 if (off_out) 1341 if (off_out)
1337 return -ESPIPE; 1342 return -ESPIPE;
1338 if (off_in) { 1343 if (off_in) {
1339 if (in->f_op->llseek == no_llseek) 1344 if (!in->f_op || !in->f_op->llseek ||
1345 in->f_op->llseek == no_llseek)
1340 return -EINVAL; 1346 return -EINVAL;
1341 if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1347 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1342 return -EFAULT; 1348 return -EFAULT;
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4..4a6f7f44065 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,18 +7,63 @@
7 * This function cannot be inlined since i_size_{read,write} is rather 7 * This function cannot be inlined since i_size_{read,write} is rather
8 * heavy-weight on 32-bit systems 8 * heavy-weight on 32-bit systems
9 */ 9 */
10void fsstack_copy_inode_size(struct inode *dst, const struct inode *src) 10void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
11{ 11{
12 i_size_write(dst, i_size_read((struct inode *)src)); 12 loff_t i_size;
13 dst->i_blocks = src->i_blocks; 13 blkcnt_t i_blocks;
14
15 /*
16 * i_size_read() includes its own seqlocking and protection from
17 * preemption (see include/linux/fs.h): we need nothing extra for
18 * that here, and prefer to avoid nesting locks than attempt to keep
19 * i_size and i_blocks in sync together.
20 */
21 i_size = i_size_read(src);
22
23 /*
24 * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
25 * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
26 * though stat's generic_fillattr() doesn't bother, and we won't be
27 * applying quotas (where i_blocks does become important) at the
28 * upper level.
29 *
30 * We don't actually know what locking is used at the lower level;
31 * but if it's a filesystem that supports quotas, it will be using
32 * i_lock as in inode_add_bytes(). tmpfs uses other locking, and
33 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
34 * holes; but its i_blocks cannot carry into the upper long without
35 * almost 2TB swap - let's ignore that case.
36 */
37 if (sizeof(i_blocks) > sizeof(long))
38 spin_lock(&src->i_lock);
39 i_blocks = src->i_blocks;
40 if (sizeof(i_blocks) > sizeof(long))
41 spin_unlock(&src->i_lock);
42
43 /*
44 * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
45 * fsstack_copy_inode_size() to hold some lock around
46 * i_size_write(), otherwise i_size_read() may spin forever (see
47 * include/linux/fs.h). We don't necessarily hold i_mutex when this
48 * is called, so take i_lock for that case.
49 *
50 * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
51 * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
52 * for that case too, and do both at once by combining the tests.
53 *
54 * There is none of this locking overhead in the 64-bit case.
55 */
56 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
57 spin_lock(&dst->i_lock);
58 i_size_write(dst, i_size);
59 dst->i_blocks = i_blocks;
60 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
61 spin_unlock(&dst->i_lock);
14} 62}
15EXPORT_SYMBOL_GPL(fsstack_copy_inode_size); 63EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
16 64
17/* copy all attributes; get_nlinks is optional way to override the i_nlink 65/* copy all attributes */
18 * copying 66void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
19 */
20void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
21 int (*get_nlinks)(struct inode *))
22{ 67{
23 dest->i_mode = src->i_mode; 68 dest->i_mode = src->i_mode;
24 dest->i_uid = src->i_uid; 69 dest->i_uid = src->i_uid;
@@ -29,14 +74,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
29 dest->i_ctime = src->i_ctime; 74 dest->i_ctime = src->i_ctime;
30 dest->i_blkbits = src->i_blkbits; 75 dest->i_blkbits = src->i_blkbits;
31 dest->i_flags = src->i_flags; 76 dest->i_flags = src->i_flags;
32 77 dest->i_nlink = src->i_nlink;
33 /*
34 * Update the nlinks AFTER updating the above fields, because the
35 * get_links callback may depend on them.
36 */
37 if (!get_nlinks)
38 dest->i_nlink = src->i_nlink;
39 else
40 dest->i_nlink = (*get_nlinks)(dest);
41} 78}
42EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); 79EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/stat.c b/fs/stat.c
index 075694e31d8..c4ecd52c573 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -401,9 +401,9 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
401} 401}
402#endif /* __ARCH_WANT_STAT64 */ 402#endif /* __ARCH_WANT_STAT64 */
403 403
404void inode_add_bytes(struct inode *inode, loff_t bytes) 404/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
405void __inode_add_bytes(struct inode *inode, loff_t bytes)
405{ 406{
406 spin_lock(&inode->i_lock);
407 inode->i_blocks += bytes >> 9; 407 inode->i_blocks += bytes >> 9;
408 bytes &= 511; 408 bytes &= 511;
409 inode->i_bytes += bytes; 409 inode->i_bytes += bytes;
@@ -411,6 +411,12 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
411 inode->i_blocks++; 411 inode->i_blocks++;
412 inode->i_bytes -= 512; 412 inode->i_bytes -= 512;
413 } 413 }
414}
415
416void inode_add_bytes(struct inode *inode, loff_t bytes)
417{
418 spin_lock(&inode->i_lock);
419 __inode_add_bytes(inode, bytes);
414 spin_unlock(&inode->i_lock); 420 spin_unlock(&inode->i_lock);
415} 421}
416 422
diff --git a/fs/super.c b/fs/super.c
index 19eb70b374b..aff046b0fe7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -901,8 +901,9 @@ int get_sb_single(struct file_system_type *fs_type,
901 return error; 901 return error;
902 } 902 }
903 s->s_flags |= MS_ACTIVE; 903 s->s_flags |= MS_ACTIVE;
904 } else {
905 do_remount_sb(s, flags, data, 0);
904 } 906 }
905 do_remount_sb(s, flags, data, 0);
906 simple_set_mnt(mnt, s); 907 simple_set_mnt(mnt, s);
907 return 0; 908 return 0;
908} 909}
diff --git a/fs/sync.c b/fs/sync.c
index d104591b066..418727a2a23 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -295,10 +295,11 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
295 */ 295 */
296int generic_write_sync(struct file *file, loff_t pos, loff_t count) 296int generic_write_sync(struct file *file, loff_t pos, loff_t count)
297{ 297{
298 if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host)) 298 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
299 return 0; 299 return 0;
300 return vfs_fsync_range(file, file->f_path.dentry, pos, 300 return vfs_fsync_range(file, file->f_path.dentry, pos,
301 pos + count - 1, 1); 301 pos + count - 1,
302 (file->f_flags & __O_SYNC) ? 0 : 1);
302} 303}
303EXPORT_SYMBOL(generic_write_sync); 304EXPORT_SYMBOL(generic_write_sync);
304 305
@@ -354,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
354{ 355{
355 int ret; 356 int ret;
356 struct file *file; 357 struct file *file;
358 struct address_space *mapping;
357 loff_t endbyte; /* inclusive */ 359 loff_t endbyte; /* inclusive */
358 int fput_needed; 360 int fput_needed;
359 umode_t i_mode; 361 umode_t i_mode;
@@ -404,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
404 !S_ISLNK(i_mode)) 406 !S_ISLNK(i_mode))
405 goto out_put; 407 goto out_put;
406 408
407 ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); 409 mapping = file->f_mapping;
410 if (!mapping) {
411 ret = -EINVAL;
412 goto out_put;
413 }
414
415 ret = 0;
416 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
417 ret = filemap_fdatawait_range(mapping, offset, endbyte);
418 if (ret < 0)
419 goto out_put;
420 }
421
422 if (flags & SYNC_FILE_RANGE_WRITE) {
423 ret = filemap_fdatawrite_range(mapping, offset, endbyte);
424 if (ret < 0)
425 goto out_put;
426 }
427
428 if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
429 ret = filemap_fdatawait_range(mapping, offset, endbyte);
430
408out_put: 431out_put:
409 fput_light(file, fput_needed); 432 fput_light(file, fput_needed);
410out: 433out:
@@ -436,42 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
436} 459}
437SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); 460SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
438#endif 461#endif
439
440/*
441 * `endbyte' is inclusive
442 */
443int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
444 loff_t endbyte, unsigned int flags)
445{
446 int ret;
447
448 if (!mapping) {
449 ret = -EINVAL;
450 goto out;
451 }
452
453 ret = 0;
454 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
455 ret = wait_on_page_writeback_range(mapping,
456 offset >> PAGE_CACHE_SHIFT,
457 endbyte >> PAGE_CACHE_SHIFT);
458 if (ret < 0)
459 goto out;
460 }
461
462 if (flags & SYNC_FILE_RANGE_WRITE) {
463 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
464 WB_SYNC_ALL);
465 if (ret < 0)
466 goto out;
467 }
468
469 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
470 ret = wait_on_page_writeback_range(mapping,
471 offset >> PAGE_CACHE_SHIFT,
472 endbyte >> PAGE_CACHE_SHIFT);
473 }
474out:
475 return ret;
476}
477EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 60c702bc10a..a0a500af24a 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -483,7 +483,8 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd)
483 * @attr: attribute descriptor. 483 * @attr: attribute descriptor.
484 */ 484 */
485 485
486int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) 486int sysfs_create_bin_file(struct kobject *kobj,
487 const struct bin_attribute *attr)
487{ 488{
488 BUG_ON(!kobj || !kobj->sd || !attr); 489 BUG_ON(!kobj || !kobj->sd || !attr);
489 490
@@ -497,7 +498,8 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
497 * @attr: attribute descriptor. 498 * @attr: attribute descriptor.
498 */ 499 */
499 500
500void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) 501void sysfs_remove_bin_file(struct kobject *kobj,
502 const struct bin_attribute *attr)
501{ 503{
502 sysfs_hash_and_remove(kobj->sd, attr->attr.name); 504 sysfs_hash_and_remove(kobj->sd, attr->attr.name);
503} 505}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e0201837d24..699f371b9f1 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -25,7 +25,6 @@
25#include "sysfs.h" 25#include "sysfs.h"
26 26
27DEFINE_MUTEX(sysfs_mutex); 27DEFINE_MUTEX(sysfs_mutex);
28DEFINE_MUTEX(sysfs_rename_mutex);
29DEFINE_SPINLOCK(sysfs_assoc_lock); 28DEFINE_SPINLOCK(sysfs_assoc_lock);
30 29
31static DEFINE_SPINLOCK(sysfs_ino_lock); 30static DEFINE_SPINLOCK(sysfs_ino_lock);
@@ -85,46 +84,6 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
85} 84}
86 85
87/** 86/**
88 * sysfs_get_dentry - get dentry for the given sysfs_dirent
89 * @sd: sysfs_dirent of interest
90 *
91 * Get dentry for @sd. Dentry is looked up if currently not
92 * present. This function descends from the root looking up
93 * dentry for each step.
94 *
95 * LOCKING:
96 * mutex_lock(sysfs_rename_mutex)
97 *
98 * RETURNS:
99 * Pointer to found dentry on success, ERR_PTR() value on error.
100 */
101struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
102{
103 struct dentry *dentry = dget(sysfs_sb->s_root);
104
105 while (dentry->d_fsdata != sd) {
106 struct sysfs_dirent *cur;
107 struct dentry *parent;
108
109 /* find the first ancestor which hasn't been looked up */
110 cur = sd;
111 while (cur->s_parent != dentry->d_fsdata)
112 cur = cur->s_parent;
113
114 /* look it up */
115 parent = dentry;
116 mutex_lock(&parent->d_inode->i_mutex);
117 dentry = lookup_one_noperm(cur->s_name, parent);
118 mutex_unlock(&parent->d_inode->i_mutex);
119 dput(parent);
120
121 if (IS_ERR(dentry))
122 break;
123 }
124 return dentry;
125}
126
127/**
128 * sysfs_get_active - get an active reference to sysfs_dirent 87 * sysfs_get_active - get an active reference to sysfs_dirent
129 * @sd: sysfs_dirent to get an active reference to 88 * @sd: sysfs_dirent to get an active reference to
130 * 89 *
@@ -147,8 +106,10 @@ static struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
147 return NULL; 106 return NULL;
148 107
149 t = atomic_cmpxchg(&sd->s_active, v, v + 1); 108 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
150 if (likely(t == v)) 109 if (likely(t == v)) {
110 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
151 return sd; 111 return sd;
112 }
152 if (t < 0) 113 if (t < 0)
153 return NULL; 114 return NULL;
154 115
@@ -171,6 +132,7 @@ static void sysfs_put_active(struct sysfs_dirent *sd)
171 if (unlikely(!sd)) 132 if (unlikely(!sd))
172 return; 133 return;
173 134
135 rwsem_release(&sd->dep_map, 1, _RET_IP_);
174 v = atomic_dec_return(&sd->s_active); 136 v = atomic_dec_return(&sd->s_active);
175 if (likely(v != SD_DEACTIVATED_BIAS)) 137 if (likely(v != SD_DEACTIVATED_BIAS))
176 return; 138 return;
@@ -235,15 +197,21 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
235 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); 197 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
236 sd->s_sibling = (void *)&wait; 198 sd->s_sibling = (void *)&wait;
237 199
200 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
238 /* atomic_add_return() is a mb(), put_active() will always see 201 /* atomic_add_return() is a mb(), put_active() will always see
239 * the updated sd->s_sibling. 202 * the updated sd->s_sibling.
240 */ 203 */
241 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); 204 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
242 205
243 if (v != SD_DEACTIVATED_BIAS) 206 if (v != SD_DEACTIVATED_BIAS) {
207 lock_contended(&sd->dep_map, _RET_IP_);
244 wait_for_completion(&wait); 208 wait_for_completion(&wait);
209 }
245 210
246 sd->s_sibling = NULL; 211 sd->s_sibling = NULL;
212
213 lock_acquired(&sd->dep_map, _RET_IP_);
214 rwsem_release(&sd->dep_map, 1, _RET_IP_);
247} 215}
248 216
249static int sysfs_alloc_ino(ino_t *pino) 217static int sysfs_alloc_ino(ino_t *pino)
@@ -298,7 +266,61 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
298 goto repeat; 266 goto repeat;
299} 267}
300 268
301static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) 269static int sysfs_dentry_delete(struct dentry *dentry)
270{
271 struct sysfs_dirent *sd = dentry->d_fsdata;
272 return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
273}
274
275static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
276{
277 struct sysfs_dirent *sd = dentry->d_fsdata;
278 int is_dir;
279
280 mutex_lock(&sysfs_mutex);
281
282 /* The sysfs dirent has been deleted */
283 if (sd->s_flags & SYSFS_FLAG_REMOVED)
284 goto out_bad;
285
286 /* The sysfs dirent has been moved? */
287 if (dentry->d_parent->d_fsdata != sd->s_parent)
288 goto out_bad;
289
290 /* The sysfs dirent has been renamed */
291 if (strcmp(dentry->d_name.name, sd->s_name) != 0)
292 goto out_bad;
293
294 mutex_unlock(&sysfs_mutex);
295out_valid:
296 return 1;
297out_bad:
298 /* Remove the dentry from the dcache hashes.
299 * If this is a deleted dentry we use d_drop instead of d_delete
300 * so sysfs doesn't need to cope with negative dentries.
301 *
302 * If this is a dentry that has simply been renamed we
303 * use d_drop to remove it from the dcache lookup on its
304 * old parent. If this dentry persists later when a lookup
305 * is performed at its new name the dentry will be readded
306 * to the dcache hashes.
307 */
308 is_dir = (sysfs_type(sd) == SYSFS_DIR);
309 mutex_unlock(&sysfs_mutex);
310 if (is_dir) {
311 /* If we have submounts we must allow the vfs caches
312 * to lie about the state of the filesystem to prevent
313 * leaks and other nasty things.
314 */
315 if (have_submounts(dentry))
316 goto out_valid;
317 shrink_dcache_parent(dentry);
318 }
319 d_drop(dentry);
320 return 0;
321}
322
323static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode)
302{ 324{
303 struct sysfs_dirent * sd = dentry->d_fsdata; 325 struct sysfs_dirent * sd = dentry->d_fsdata;
304 326
@@ -307,7 +329,9 @@ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
307} 329}
308 330
309static const struct dentry_operations sysfs_dentry_ops = { 331static const struct dentry_operations sysfs_dentry_ops = {
310 .d_iput = sysfs_d_iput, 332 .d_revalidate = sysfs_dentry_revalidate,
333 .d_delete = sysfs_dentry_delete,
334 .d_iput = sysfs_dentry_iput,
311}; 335};
312 336
313struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) 337struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
@@ -330,6 +354,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
330 354
331 atomic_set(&sd->s_count, 1); 355 atomic_set(&sd->s_count, 1);
332 atomic_set(&sd->s_active, 0); 356 atomic_set(&sd->s_active, 0);
357 sysfs_dirent_init_lockdep(sd);
333 358
334 sd->s_name = name; 359 sd->s_name = name;
335 sd->s_mode = mode; 360 sd->s_mode = mode;
@@ -344,12 +369,6 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
344 return NULL; 369 return NULL;
345} 370}
346 371
347static int sysfs_ilookup_test(struct inode *inode, void *arg)
348{
349 struct sysfs_dirent *sd = arg;
350 return inode->i_ino == sd->s_ino;
351}
352
353/** 372/**
354 * sysfs_addrm_start - prepare for sysfs_dirent add/remove 373 * sysfs_addrm_start - prepare for sysfs_dirent add/remove
355 * @acxt: pointer to sysfs_addrm_cxt to be used 374 * @acxt: pointer to sysfs_addrm_cxt to be used
@@ -357,47 +376,20 @@ static int sysfs_ilookup_test(struct inode *inode, void *arg)
357 * 376 *
358 * This function is called when the caller is about to add or 377 * This function is called when the caller is about to add or
359 * remove sysfs_dirent under @parent_sd. This function acquires 378 * remove sysfs_dirent under @parent_sd. This function acquires
360 * sysfs_mutex, grabs inode for @parent_sd if available and lock 379 * sysfs_mutex. @acxt is used to keep and pass context to
361 * i_mutex of it. @acxt is used to keep and pass context to
362 * other addrm functions. 380 * other addrm functions.
363 * 381 *
364 * LOCKING: 382 * LOCKING:
365 * Kernel thread context (may sleep). sysfs_mutex is locked on 383 * Kernel thread context (may sleep). sysfs_mutex is locked on
366 * return. i_mutex of parent inode is locked on return if 384 * return.
367 * available.
368 */ 385 */
369void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, 386void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
370 struct sysfs_dirent *parent_sd) 387 struct sysfs_dirent *parent_sd)
371{ 388{
372 struct inode *inode;
373
374 memset(acxt, 0, sizeof(*acxt)); 389 memset(acxt, 0, sizeof(*acxt));
375 acxt->parent_sd = parent_sd; 390 acxt->parent_sd = parent_sd;
376 391
377 /* Lookup parent inode. inode initialization is protected by
378 * sysfs_mutex, so inode existence can be determined by
379 * looking up inode while holding sysfs_mutex.
380 */
381 mutex_lock(&sysfs_mutex); 392 mutex_lock(&sysfs_mutex);
382
383 inode = ilookup5(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
384 parent_sd);
385 if (inode) {
386 WARN_ON(inode->i_state & I_NEW);
387
388 /* parent inode available */
389 acxt->parent_inode = inode;
390
391 /* sysfs_mutex is below i_mutex in lock hierarchy.
392 * First, trylock i_mutex. If fails, unlock
393 * sysfs_mutex and lock them in order.
394 */
395 if (!mutex_trylock(&inode->i_mutex)) {
396 mutex_unlock(&sysfs_mutex);
397 mutex_lock(&inode->i_mutex);
398 mutex_lock(&sysfs_mutex);
399 }
400 }
401} 393}
402 394
403/** 395/**
@@ -422,18 +414,22 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
422 */ 414 */
423int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 415int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
424{ 416{
417 struct sysfs_inode_attrs *ps_iattr;
418
425 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) 419 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
426 return -EEXIST; 420 return -EEXIST;
427 421
428 sd->s_parent = sysfs_get(acxt->parent_sd); 422 sd->s_parent = sysfs_get(acxt->parent_sd);
429 423
430 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
431 inc_nlink(acxt->parent_inode);
432
433 acxt->cnt++;
434
435 sysfs_link_sibling(sd); 424 sysfs_link_sibling(sd);
436 425
426 /* Update timestamps on the parent */
427 ps_iattr = acxt->parent_sd->s_iattr;
428 if (ps_iattr) {
429 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
430 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
431 }
432
437 return 0; 433 return 0;
438} 434}
439 435
@@ -512,70 +508,22 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
512 */ 508 */
513void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 509void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
514{ 510{
511 struct sysfs_inode_attrs *ps_iattr;
512
515 BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED); 513 BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED);
516 514
517 sysfs_unlink_sibling(sd); 515 sysfs_unlink_sibling(sd);
518 516
517 /* Update timestamps on the parent */
518 ps_iattr = acxt->parent_sd->s_iattr;
519 if (ps_iattr) {
520 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
521 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
522 }
523
519 sd->s_flags |= SYSFS_FLAG_REMOVED; 524 sd->s_flags |= SYSFS_FLAG_REMOVED;
520 sd->s_sibling = acxt->removed; 525 sd->s_sibling = acxt->removed;
521 acxt->removed = sd; 526 acxt->removed = sd;
522
523 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
524 drop_nlink(acxt->parent_inode);
525
526 acxt->cnt++;
527}
528
529/**
530 * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
531 * @sd: target sysfs_dirent
532 *
533 * Drop dentry for @sd. @sd must have been unlinked from its
534 * parent on entry to this function such that it can't be looked
535 * up anymore.
536 */
537static void sysfs_drop_dentry(struct sysfs_dirent *sd)
538{
539 struct inode *inode;
540 struct dentry *dentry;
541
542 inode = ilookup(sysfs_sb, sd->s_ino);
543 if (!inode)
544 return;
545
546 /* Drop any existing dentries associated with sd.
547 *
548 * For the dentry to be properly freed we need to grab a
549 * reference to the dentry under the dcache lock, unhash it,
550 * and then put it. The playing with the dentry count allows
551 * dput to immediately free the dentry if it is not in use.
552 */
553repeat:
554 spin_lock(&dcache_lock);
555 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
556 if (d_unhashed(dentry))
557 continue;
558 dget_locked(dentry);
559 spin_lock(&dentry->d_lock);
560 __d_drop(dentry);
561 spin_unlock(&dentry->d_lock);
562 spin_unlock(&dcache_lock);
563 dput(dentry);
564 goto repeat;
565 }
566 spin_unlock(&dcache_lock);
567
568 /* adjust nlink and update timestamp */
569 mutex_lock(&inode->i_mutex);
570
571 inode->i_ctime = CURRENT_TIME;
572 drop_nlink(inode);
573 if (sysfs_type(sd) == SYSFS_DIR)
574 drop_nlink(inode);
575
576 mutex_unlock(&inode->i_mutex);
577
578 iput(inode);
579} 527}
580 528
581/** 529/**
@@ -584,25 +532,15 @@ repeat:
584 * 532 *
585 * Finish up sysfs_dirent add/remove. Resources acquired by 533 * Finish up sysfs_dirent add/remove. Resources acquired by
586 * sysfs_addrm_start() are released and removed sysfs_dirents are 534 * sysfs_addrm_start() are released and removed sysfs_dirents are
587 * cleaned up. Timestamps on the parent inode are updated. 535 * cleaned up.
588 * 536 *
589 * LOCKING: 537 * LOCKING:
590 * All mutexes acquired by sysfs_addrm_start() are released. 538 * sysfs_mutex is released.
591 */ 539 */
592void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) 540void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
593{ 541{
594 /* release resources acquired by sysfs_addrm_start() */ 542 /* release resources acquired by sysfs_addrm_start() */
595 mutex_unlock(&sysfs_mutex); 543 mutex_unlock(&sysfs_mutex);
596 if (acxt->parent_inode) {
597 struct inode *inode = acxt->parent_inode;
598
599 /* if added/removed, update timestamps on the parent */
600 if (acxt->cnt)
601 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
602
603 mutex_unlock(&inode->i_mutex);
604 iput(inode);
605 }
606 544
607 /* kill removed sysfs_dirents */ 545 /* kill removed sysfs_dirents */
608 while (acxt->removed) { 546 while (acxt->removed) {
@@ -611,7 +549,6 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
611 acxt->removed = sd->s_sibling; 549 acxt->removed = sd->s_sibling;
612 sd->s_sibling = NULL; 550 sd->s_sibling = NULL;
613 551
614 sysfs_drop_dentry(sd);
615 sysfs_deactivate(sd); 552 sysfs_deactivate(sd);
616 unmap_bin_file(sd); 553 unmap_bin_file(sd);
617 sysfs_put(sd); 554 sysfs_put(sd);
@@ -751,10 +688,15 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
751 } 688 }
752 689
753 /* instantiate and hash dentry */ 690 /* instantiate and hash dentry */
754 dentry->d_op = &sysfs_dentry_ops; 691 ret = d_find_alias(inode);
755 dentry->d_fsdata = sysfs_get(sd); 692 if (!ret) {
756 d_instantiate(dentry, inode); 693 dentry->d_op = &sysfs_dentry_ops;
757 d_rehash(dentry); 694 dentry->d_fsdata = sysfs_get(sd);
695 d_add(dentry, inode);
696 } else {
697 d_move(ret, dentry);
698 iput(inode);
699 }
758 700
759 out_unlock: 701 out_unlock:
760 mutex_unlock(&sysfs_mutex); 702 mutex_unlock(&sysfs_mutex);
@@ -763,7 +705,9 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
763 705
764const struct inode_operations sysfs_dir_inode_operations = { 706const struct inode_operations sysfs_dir_inode_operations = {
765 .lookup = sysfs_lookup, 707 .lookup = sysfs_lookup,
708 .permission = sysfs_permission,
766 .setattr = sysfs_setattr, 709 .setattr = sysfs_setattr,
710 .getattr = sysfs_getattr,
767 .setxattr = sysfs_setxattr, 711 .setxattr = sysfs_setxattr,
768}; 712};
769 713
@@ -826,141 +770,65 @@ void sysfs_remove_dir(struct kobject * kobj)
826 __sysfs_remove_dir(sd); 770 __sysfs_remove_dir(sd);
827} 771}
828 772
829int sysfs_rename_dir(struct kobject * kobj, const char *new_name) 773int sysfs_rename(struct sysfs_dirent *sd,
774 struct sysfs_dirent *new_parent_sd, const char *new_name)
830{ 775{
831 struct sysfs_dirent *sd = kobj->sd;
832 struct dentry *parent = NULL;
833 struct dentry *old_dentry = NULL, *new_dentry = NULL;
834 const char *dup_name = NULL; 776 const char *dup_name = NULL;
835 int error; 777 int error;
836 778
837 mutex_lock(&sysfs_rename_mutex); 779 mutex_lock(&sysfs_mutex);
838 780
839 error = 0; 781 error = 0;
840 if (strcmp(sd->s_name, new_name) == 0) 782 if ((sd->s_parent == new_parent_sd) &&
783 (strcmp(sd->s_name, new_name) == 0))
841 goto out; /* nothing to rename */ 784 goto out; /* nothing to rename */
842 785
843 /* get the original dentry */
844 old_dentry = sysfs_get_dentry(sd);
845 if (IS_ERR(old_dentry)) {
846 error = PTR_ERR(old_dentry);
847 old_dentry = NULL;
848 goto out;
849 }
850
851 parent = old_dentry->d_parent;
852
853 /* lock parent and get dentry for new name */
854 mutex_lock(&parent->d_inode->i_mutex);
855 mutex_lock(&sysfs_mutex);
856
857 error = -EEXIST; 786 error = -EEXIST;
858 if (sysfs_find_dirent(sd->s_parent, new_name)) 787 if (sysfs_find_dirent(new_parent_sd, new_name))
859 goto out_unlock; 788 goto out;
860
861 error = -ENOMEM;
862 new_dentry = d_alloc_name(parent, new_name);
863 if (!new_dentry)
864 goto out_unlock;
865 789
866 /* rename sysfs_dirent */ 790 /* rename sysfs_dirent */
867 error = -ENOMEM; 791 if (strcmp(sd->s_name, new_name) != 0) {
868 new_name = dup_name = kstrdup(new_name, GFP_KERNEL); 792 error = -ENOMEM;
869 if (!new_name) 793 new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
870 goto out_unlock; 794 if (!new_name)
871 795 goto out;
872 dup_name = sd->s_name; 796
873 sd->s_name = new_name; 797 dup_name = sd->s_name;
798 sd->s_name = new_name;
799 }
874 800
875 /* rename */ 801 /* Remove from old parent's list and insert into new parent's list. */
876 d_add(new_dentry, NULL); 802 if (sd->s_parent != new_parent_sd) {
877 d_move(old_dentry, new_dentry); 803 sysfs_unlink_sibling(sd);
804 sysfs_get(new_parent_sd);
805 sysfs_put(sd->s_parent);
806 sd->s_parent = new_parent_sd;
807 sysfs_link_sibling(sd);
808 }
878 809
879 error = 0; 810 error = 0;
880 out_unlock: 811 out:
881 mutex_unlock(&sysfs_mutex); 812 mutex_unlock(&sysfs_mutex);
882 mutex_unlock(&parent->d_inode->i_mutex);
883 kfree(dup_name); 813 kfree(dup_name);
884 dput(old_dentry);
885 dput(new_dentry);
886 out:
887 mutex_unlock(&sysfs_rename_mutex);
888 return error; 814 return error;
889} 815}
890 816
817int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
818{
819 return sysfs_rename(kobj->sd, kobj->sd->s_parent, new_name);
820}
821
891int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) 822int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
892{ 823{
893 struct sysfs_dirent *sd = kobj->sd; 824 struct sysfs_dirent *sd = kobj->sd;
894 struct sysfs_dirent *new_parent_sd; 825 struct sysfs_dirent *new_parent_sd;
895 struct dentry *old_parent, *new_parent = NULL;
896 struct dentry *old_dentry = NULL, *new_dentry = NULL;
897 int error;
898 826
899 mutex_lock(&sysfs_rename_mutex);
900 BUG_ON(!sd->s_parent); 827 BUG_ON(!sd->s_parent);
901 new_parent_sd = (new_parent_kobj && new_parent_kobj->sd) ? 828 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
902 new_parent_kobj->sd : &sysfs_root; 829 new_parent_kobj->sd : &sysfs_root;
903 830
904 error = 0; 831 return sysfs_rename(sd, new_parent_sd, sd->s_name);
905 if (sd->s_parent == new_parent_sd)
906 goto out; /* nothing to move */
907
908 /* get dentries */
909 old_dentry = sysfs_get_dentry(sd);
910 if (IS_ERR(old_dentry)) {
911 error = PTR_ERR(old_dentry);
912 old_dentry = NULL;
913 goto out;
914 }
915 old_parent = old_dentry->d_parent;
916
917 new_parent = sysfs_get_dentry(new_parent_sd);
918 if (IS_ERR(new_parent)) {
919 error = PTR_ERR(new_parent);
920 new_parent = NULL;
921 goto out;
922 }
923
924again:
925 mutex_lock(&old_parent->d_inode->i_mutex);
926 if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
927 mutex_unlock(&old_parent->d_inode->i_mutex);
928 goto again;
929 }
930 mutex_lock(&sysfs_mutex);
931
932 error = -EEXIST;
933 if (sysfs_find_dirent(new_parent_sd, sd->s_name))
934 goto out_unlock;
935
936 error = -ENOMEM;
937 new_dentry = d_alloc_name(new_parent, sd->s_name);
938 if (!new_dentry)
939 goto out_unlock;
940
941 error = 0;
942 d_add(new_dentry, NULL);
943 d_move(old_dentry, new_dentry);
944
945 /* Remove from old parent's list and insert into new parent's list. */
946 sysfs_unlink_sibling(sd);
947 sysfs_get(new_parent_sd);
948 drop_nlink(old_parent->d_inode);
949 sysfs_put(sd->s_parent);
950 sd->s_parent = new_parent_sd;
951 inc_nlink(new_parent->d_inode);
952 sysfs_link_sibling(sd);
953
954 out_unlock:
955 mutex_unlock(&sysfs_mutex);
956 mutex_unlock(&new_parent->d_inode->i_mutex);
957 mutex_unlock(&old_parent->d_inode->i_mutex);
958 out:
959 dput(new_parent);
960 dput(old_dentry);
961 dput(new_dentry);
962 mutex_unlock(&sysfs_rename_mutex);
963 return error;
964} 832}
965 833
966/* Relationship between s_mode and the DT_xxx types */ 834/* Relationship between s_mode and the DT_xxx types */
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f5ea4680f15..dc30d9e3168 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -579,46 +579,23 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
579 */ 579 */
580int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) 580int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
581{ 581{
582 struct sysfs_dirent *victim_sd = NULL; 582 struct sysfs_dirent *sd;
583 struct dentry *victim = NULL;
584 struct inode * inode;
585 struct iattr newattrs; 583 struct iattr newattrs;
586 int rc; 584 int rc;
587 585
588 rc = -ENOENT; 586 mutex_lock(&sysfs_mutex);
589 victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
590 if (!victim_sd)
591 goto out;
592 587
593 mutex_lock(&sysfs_rename_mutex); 588 rc = -ENOENT;
594 victim = sysfs_get_dentry(victim_sd); 589 sd = sysfs_find_dirent(kobj->sd, attr->name);
595 mutex_unlock(&sysfs_rename_mutex); 590 if (!sd)
596 if (IS_ERR(victim)) {
597 rc = PTR_ERR(victim);
598 victim = NULL;
599 goto out; 591 goto out;
600 }
601
602 inode = victim->d_inode;
603
604 mutex_lock(&inode->i_mutex);
605 592
606 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 593 newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO);
607 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 594 newattrs.ia_valid = ATTR_MODE;
608 newattrs.ia_ctime = current_fs_time(inode->i_sb); 595 rc = sysfs_sd_setattr(sd, &newattrs);
609 rc = sysfs_setattr(victim, &newattrs);
610 596
611 if (rc == 0) {
612 fsnotify_change(victim, newattrs.ia_valid);
613 mutex_lock(&sysfs_mutex);
614 victim_sd->s_mode = newattrs.ia_mode;
615 mutex_unlock(&sysfs_mutex);
616 }
617
618 mutex_unlock(&inode->i_mutex);
619 out: 597 out:
620 dput(victim); 598 mutex_unlock(&sysfs_mutex);
621 sysfs_put(victim_sd);
622 return rc; 599 return rc;
623} 600}
624EXPORT_SYMBOL_GPL(sysfs_chmod_file); 601EXPORT_SYMBOL_GPL(sysfs_chmod_file);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e28cecf179f..220b758523a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -37,7 +37,9 @@ static struct backing_dev_info sysfs_backing_dev_info = {
37}; 37};
38 38
39static const struct inode_operations sysfs_inode_operations ={ 39static const struct inode_operations sysfs_inode_operations ={
40 .permission = sysfs_permission,
40 .setattr = sysfs_setattr, 41 .setattr = sysfs_setattr,
42 .getattr = sysfs_getattr,
41 .setxattr = sysfs_setxattr, 43 .setxattr = sysfs_setxattr,
42}; 44};
43 45
@@ -46,7 +48,7 @@ int __init sysfs_inode_init(void)
46 return bdi_init(&sysfs_backing_dev_info); 48 return bdi_init(&sysfs_backing_dev_info);
47} 49}
48 50
49struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) 51static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
50{ 52{
51 struct sysfs_inode_attrs *attrs; 53 struct sysfs_inode_attrs *attrs;
52 struct iattr *iattrs; 54 struct iattr *iattrs;
@@ -64,30 +66,15 @@ struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
64 66
65 return attrs; 67 return attrs;
66} 68}
67int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) 69
70int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr * iattr)
68{ 71{
69 struct inode * inode = dentry->d_inode;
70 struct sysfs_dirent * sd = dentry->d_fsdata;
71 struct sysfs_inode_attrs *sd_attrs; 72 struct sysfs_inode_attrs *sd_attrs;
72 struct iattr *iattrs; 73 struct iattr *iattrs;
73 unsigned int ia_valid = iattr->ia_valid; 74 unsigned int ia_valid = iattr->ia_valid;
74 int error;
75
76 if (!sd)
77 return -EINVAL;
78 75
79 sd_attrs = sd->s_iattr; 76 sd_attrs = sd->s_iattr;
80 77
81 error = inode_change_ok(inode, iattr);
82 if (error)
83 return error;
84
85 iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
86
87 error = inode_setattr(inode, iattr);
88 if (error)
89 return error;
90
91 if (!sd_attrs) { 78 if (!sd_attrs) {
92 /* setting attributes for the first time, allocate now */ 79 /* setting attributes for the first time, allocate now */
93 sd_attrs = sysfs_init_inode_attrs(sd); 80 sd_attrs = sysfs_init_inode_attrs(sd);
@@ -103,42 +90,78 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
103 if (ia_valid & ATTR_GID) 90 if (ia_valid & ATTR_GID)
104 iattrs->ia_gid = iattr->ia_gid; 91 iattrs->ia_gid = iattr->ia_gid;
105 if (ia_valid & ATTR_ATIME) 92 if (ia_valid & ATTR_ATIME)
106 iattrs->ia_atime = timespec_trunc(iattr->ia_atime, 93 iattrs->ia_atime = iattr->ia_atime;
107 inode->i_sb->s_time_gran);
108 if (ia_valid & ATTR_MTIME) 94 if (ia_valid & ATTR_MTIME)
109 iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime, 95 iattrs->ia_mtime = iattr->ia_mtime;
110 inode->i_sb->s_time_gran);
111 if (ia_valid & ATTR_CTIME) 96 if (ia_valid & ATTR_CTIME)
112 iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime, 97 iattrs->ia_ctime = iattr->ia_ctime;
113 inode->i_sb->s_time_gran);
114 if (ia_valid & ATTR_MODE) { 98 if (ia_valid & ATTR_MODE) {
115 umode_t mode = iattr->ia_mode; 99 umode_t mode = iattr->ia_mode;
116
117 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
118 mode &= ~S_ISGID;
119 iattrs->ia_mode = sd->s_mode = mode; 100 iattrs->ia_mode = sd->s_mode = mode;
120 } 101 }
121 } 102 }
103 return 0;
104}
105
106int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
107{
108 struct inode *inode = dentry->d_inode;
109 struct sysfs_dirent *sd = dentry->d_fsdata;
110 int error;
111
112 if (!sd)
113 return -EINVAL;
114
115 error = inode_change_ok(inode, iattr);
116 if (error)
117 return error;
118
119 iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
120
121 error = inode_setattr(inode, iattr);
122 if (error)
123 return error;
124
125 mutex_lock(&sysfs_mutex);
126 error = sysfs_sd_setattr(sd, iattr);
127 mutex_unlock(&sysfs_mutex);
128
122 return error; 129 return error;
123} 130}
124 131
132static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *secdata_len)
133{
134 struct sysfs_inode_attrs *iattrs;
135 void *old_secdata;
136 size_t old_secdata_len;
137
138 iattrs = sd->s_iattr;
139 if (!iattrs)
140 iattrs = sysfs_init_inode_attrs(sd);
141 if (!iattrs)
142 return -ENOMEM;
143
144 old_secdata = iattrs->ia_secdata;
145 old_secdata_len = iattrs->ia_secdata_len;
146
147 iattrs->ia_secdata = *secdata;
148 iattrs->ia_secdata_len = *secdata_len;
149
150 *secdata = old_secdata;
151 *secdata_len = old_secdata_len;
152 return 0;
153}
154
125int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 155int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
126 size_t size, int flags) 156 size_t size, int flags)
127{ 157{
128 struct sysfs_dirent *sd = dentry->d_fsdata; 158 struct sysfs_dirent *sd = dentry->d_fsdata;
129 struct sysfs_inode_attrs *iattrs;
130 void *secdata; 159 void *secdata;
131 int error; 160 int error;
132 u32 secdata_len = 0; 161 u32 secdata_len = 0;
133 162
134 if (!sd) 163 if (!sd)
135 return -EINVAL; 164 return -EINVAL;
136 if (!sd->s_iattr)
137 sd->s_iattr = sysfs_init_inode_attrs(sd);
138 if (!sd->s_iattr)
139 return -ENOMEM;
140
141 iattrs = sd->s_iattr;
142 165
143 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { 166 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
144 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; 167 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
@@ -150,12 +173,13 @@ int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
150 &secdata, &secdata_len); 173 &secdata, &secdata_len);
151 if (error) 174 if (error)
152 goto out; 175 goto out;
153 if (iattrs->ia_secdata)
154 security_release_secctx(iattrs->ia_secdata,
155 iattrs->ia_secdata_len);
156 iattrs->ia_secdata = secdata;
157 iattrs->ia_secdata_len = secdata_len;
158 176
177 mutex_lock(&sysfs_mutex);
178 error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len);
179 mutex_unlock(&sysfs_mutex);
180
181 if (secdata)
182 security_release_secctx(secdata, secdata_len);
159 } else 183 } else
160 return -EINVAL; 184 return -EINVAL;
161out: 185out:
@@ -170,7 +194,6 @@ static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
170 194
171static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) 195static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
172{ 196{
173 inode->i_mode = iattr->ia_mode;
174 inode->i_uid = iattr->ia_uid; 197 inode->i_uid = iattr->ia_uid;
175 inode->i_gid = iattr->ia_gid; 198 inode->i_gid = iattr->ia_gid;
176 inode->i_atime = iattr->ia_atime; 199 inode->i_atime = iattr->ia_atime;
@@ -178,17 +201,6 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
178 inode->i_ctime = iattr->ia_ctime; 201 inode->i_ctime = iattr->ia_ctime;
179} 202}
180 203
181
182/*
183 * sysfs has a different i_mutex lock order behavior for i_mutex than other
184 * filesystems; sysfs i_mutex is called in many places with subsystem locks
185 * held. At the same time, many of the VFS locking rules do not apply to
186 * sysfs at all (cross directory rename for example). To untangle this mess
187 * (which gives false positives in lockdep), we're giving sysfs inodes their
188 * own class for i_mutex.
189 */
190static struct lock_class_key sysfs_inode_imutex_key;
191
192static int sysfs_count_nlink(struct sysfs_dirent *sd) 204static int sysfs_count_nlink(struct sysfs_dirent *sd)
193{ 205{
194 struct sysfs_dirent *child; 206 struct sysfs_dirent *child;
@@ -201,38 +213,55 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd)
201 return nr + 2; 213 return nr + 2;
202} 214}
203 215
216static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
217{
218 struct sysfs_inode_attrs *iattrs = sd->s_iattr;
219
220 inode->i_mode = sd->s_mode;
221 if (iattrs) {
222 /* sysfs_dirent has non-default attributes
223 * get them from persistent copy in sysfs_dirent
224 */
225 set_inode_attr(inode, &iattrs->ia_iattr);
226 security_inode_notifysecctx(inode,
227 iattrs->ia_secdata,
228 iattrs->ia_secdata_len);
229 }
230
231 if (sysfs_type(sd) == SYSFS_DIR)
232 inode->i_nlink = sysfs_count_nlink(sd);
233}
234
235int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
236{
237 struct sysfs_dirent *sd = dentry->d_fsdata;
238 struct inode *inode = dentry->d_inode;
239
240 mutex_lock(&sysfs_mutex);
241 sysfs_refresh_inode(sd, inode);
242 mutex_unlock(&sysfs_mutex);
243
244 generic_fillattr(inode, stat);
245 return 0;
246}
247
204static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) 248static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
205{ 249{
206 struct bin_attribute *bin_attr; 250 struct bin_attribute *bin_attr;
207 struct sysfs_inode_attrs *iattrs;
208 251
209 inode->i_private = sysfs_get(sd); 252 inode->i_private = sysfs_get(sd);
210 inode->i_mapping->a_ops = &sysfs_aops; 253 inode->i_mapping->a_ops = &sysfs_aops;
211 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 254 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
212 inode->i_op = &sysfs_inode_operations; 255 inode->i_op = &sysfs_inode_operations;
213 inode->i_ino = sd->s_ino;
214 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
215 256
216 iattrs = sd->s_iattr; 257 set_default_inode_attr(inode, sd->s_mode);
217 if (iattrs) { 258 sysfs_refresh_inode(sd, inode);
218 /* sysfs_dirent has non-default attributes
219 * get them for the new inode from persistent copy
220 * in sysfs_dirent
221 */
222 set_inode_attr(inode, &iattrs->ia_iattr);
223 if (iattrs->ia_secdata)
224 security_inode_notifysecctx(inode,
225 iattrs->ia_secdata,
226 iattrs->ia_secdata_len);
227 } else
228 set_default_inode_attr(inode, sd->s_mode);
229 259
230 /* initialize inode according to type */ 260 /* initialize inode according to type */
231 switch (sysfs_type(sd)) { 261 switch (sysfs_type(sd)) {
232 case SYSFS_DIR: 262 case SYSFS_DIR:
233 inode->i_op = &sysfs_dir_inode_operations; 263 inode->i_op = &sysfs_dir_inode_operations;
234 inode->i_fop = &sysfs_dir_operations; 264 inode->i_fop = &sysfs_dir_operations;
235 inode->i_nlink = sysfs_count_nlink(sd);
236 break; 265 break;
237 case SYSFS_KOBJ_ATTR: 266 case SYSFS_KOBJ_ATTR:
238 inode->i_size = PAGE_SIZE; 267 inode->i_size = PAGE_SIZE;
@@ -315,3 +344,14 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
315 else 344 else
316 return -ENOENT; 345 return -ENOENT;
317} 346}
347
348int sysfs_permission(struct inode *inode, int mask)
349{
350 struct sysfs_dirent *sd = inode->i_private;
351
352 mutex_lock(&sysfs_mutex);
353 sysfs_refresh_inode(sd, inode);
354 mutex_unlock(&sysfs_mutex);
355
356 return generic_permission(inode, mask, NULL);
357}
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index c5081ad7702..c5eff49fa41 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -210,10 +210,13 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
210} 210}
211 211
212const struct inode_operations sysfs_symlink_inode_operations = { 212const struct inode_operations sysfs_symlink_inode_operations = {
213 .setxattr = sysfs_setxattr, 213 .setxattr = sysfs_setxattr,
214 .readlink = generic_readlink, 214 .readlink = generic_readlink,
215 .follow_link = sysfs_follow_link, 215 .follow_link = sysfs_follow_link,
216 .put_link = sysfs_put_link, 216 .put_link = sysfs_put_link,
217 .setattr = sysfs_setattr,
218 .getattr = sysfs_getattr,
219 .permission = sysfs_permission,
217}; 220};
218 221
219 222
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index af4c4e7482a..cdd9377a6e0 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,7 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/lockdep.h>
11#include <linux/fs.h> 12#include <linux/fs.h>
12 13
13struct sysfs_open_dirent; 14struct sysfs_open_dirent;
@@ -50,6 +51,9 @@ struct sysfs_inode_attrs {
50struct sysfs_dirent { 51struct sysfs_dirent {
51 atomic_t s_count; 52 atomic_t s_count;
52 atomic_t s_active; 53 atomic_t s_active;
54#ifdef CONFIG_DEBUG_LOCK_ALLOC
55 struct lockdep_map dep_map;
56#endif
53 struct sysfs_dirent *s_parent; 57 struct sysfs_dirent *s_parent;
54 struct sysfs_dirent *s_sibling; 58 struct sysfs_dirent *s_sibling;
55 const char *s_name; 59 const char *s_name;
@@ -84,14 +88,23 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
84 return sd->s_flags & SYSFS_TYPE_MASK; 88 return sd->s_flags & SYSFS_TYPE_MASK;
85} 89}
86 90
91#ifdef CONFIG_DEBUG_LOCK_ALLOC
92#define sysfs_dirent_init_lockdep(sd) \
93do { \
94 static struct lock_class_key __key; \
95 \
96 lockdep_init_map(&sd->dep_map, "s_active", &__key, 0); \
97} while(0)
98#else
99#define sysfs_dirent_init_lockdep(sd) do {} while(0)
100#endif
101
87/* 102/*
88 * Context structure to be used while adding/removing nodes. 103 * Context structure to be used while adding/removing nodes.
89 */ 104 */
90struct sysfs_addrm_cxt { 105struct sysfs_addrm_cxt {
91 struct sysfs_dirent *parent_sd; 106 struct sysfs_dirent *parent_sd;
92 struct inode *parent_inode;
93 struct sysfs_dirent *removed; 107 struct sysfs_dirent *removed;
94 int cnt;
95}; 108};
96 109
97/* 110/*
@@ -105,7 +118,6 @@ extern struct kmem_cache *sysfs_dir_cachep;
105 * dir.c 118 * dir.c
106 */ 119 */
107extern struct mutex sysfs_mutex; 120extern struct mutex sysfs_mutex;
108extern struct mutex sysfs_rename_mutex;
109extern spinlock_t sysfs_assoc_lock; 121extern spinlock_t sysfs_assoc_lock;
110 122
111extern const struct file_operations sysfs_dir_operations; 123extern const struct file_operations sysfs_dir_operations;
@@ -133,6 +145,9 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
133 struct sysfs_dirent **p_sd); 145 struct sysfs_dirent **p_sd);
134void sysfs_remove_subdir(struct sysfs_dirent *sd); 146void sysfs_remove_subdir(struct sysfs_dirent *sd);
135 147
148int sysfs_rename(struct sysfs_dirent *sd,
149 struct sysfs_dirent *new_parent_sd, const char *new_name);
150
136static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) 151static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
137{ 152{
138 if (sd) { 153 if (sd) {
@@ -155,7 +170,10 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
155 */ 170 */
156struct inode *sysfs_get_inode(struct sysfs_dirent *sd); 171struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
157void sysfs_delete_inode(struct inode *inode); 172void sysfs_delete_inode(struct inode *inode);
173int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
174int sysfs_permission(struct inode *inode, int mask);
158int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 175int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
176int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
159int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 177int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
160 size_t size, int flags); 178 size_t size, int flags);
161int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); 179int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b042bd7034b..1bfc95ad5f7 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -200,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
201 201
202 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 202 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
203 flags & TFD_SHARED_FCNTL_FLAGS); 203 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
204 if (ufd < 0) 204 if (ufd < 0)
205 kfree(ctx); 205 kfree(ctx);
206 206
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index dbc093afd94..90492327b38 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -350,13 +350,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
350 le32_to_cpu(sup->fmt_version)); 350 le32_to_cpu(sup->fmt_version));
351 printk(KERN_DEBUG "\ttime_gran %u\n", 351 printk(KERN_DEBUG "\ttime_gran %u\n",
352 le32_to_cpu(sup->time_gran)); 352 le32_to_cpu(sup->time_gran));
353 printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X" 353 printk(KERN_DEBUG "\tUUID %pUB\n",
354 "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n", 354 sup->uuid);
355 sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3],
356 sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7],
357 sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11],
358 sup->uuid[12], sup->uuid[13], sup->uuid[14],
359 sup->uuid[15]);
360 break; 355 break;
361 } 356 }
362 case UBIFS_MST_NODE: 357 case UBIFS_MST_NODE:
@@ -2014,7 +2009,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2014 inum = key_inum_flash(c, &dent->key); 2009 inum = key_inum_flash(c, &dent->key);
2015 fscki1 = read_add_inode(c, priv, inum); 2010 fscki1 = read_add_inode(c, priv, inum);
2016 if (IS_ERR(fscki1)) { 2011 if (IS_ERR(fscki1)) {
2017 err = PTR_ERR(fscki); 2012 err = PTR_ERR(fscki1);
2018 ubifs_err("error %d while processing entry node and " 2013 ubifs_err("error %d while processing entry node and "
2019 "trying to find parent inode node %lu", 2014 "trying to find parent inode node %lu",
2020 err, (unsigned long)inum); 2015 err, (unsigned long)inum);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 1009adc8d60..16a6444330e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
45 * 45 *
46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the 46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> 47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
48 * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not 48 * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
49 * set as well. However, UBIFS disables readahead. 49 * set as well. However, UBIFS disables readahead.
50 */ 50 */
51 51
@@ -1389,7 +1389,6 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
1389 unsigned long nr_segs, loff_t pos) 1389 unsigned long nr_segs, loff_t pos)
1390{ 1390{
1391 int err; 1391 int err;
1392 ssize_t ret;
1393 struct inode *inode = iocb->ki_filp->f_mapping->host; 1392 struct inode *inode = iocb->ki_filp->f_mapping->host;
1394 struct ubifs_info *c = inode->i_sb->s_fs_info; 1393 struct ubifs_info *c = inode->i_sb->s_fs_info;
1395 1394
@@ -1397,17 +1396,7 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
1397 if (err) 1396 if (err)
1398 return err; 1397 return err;
1399 1398
1400 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 1399 return generic_file_aio_write(iocb, iov, nr_segs, pos);
1401 if (ret < 0)
1402 return ret;
1403
1404 if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) {
1405 err = ubifs_sync_wbufs_by_inode(c, inode);
1406 if (err)
1407 return err;
1408 }
1409
1410 return ret;
1411} 1400}
1412 1401
1413static int ubifs_set_page_dirty(struct page *page) 1402static int ubifs_set_page_dirty(struct page *page)
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a..e5a3d8e96bb 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
54 */ 54 */
55 55
56#include <linux/pagemap.h> 56#include <linux/pagemap.h>
57#include <linux/list_sort.h>
57#include "ubifs.h" 58#include "ubifs.h"
58 59
59/* 60/*
@@ -108,101 +109,6 @@ static int switch_gc_head(struct ubifs_info *c)
108} 109}
109 110
110/** 111/**
111 * list_sort - sort a list.
112 * @priv: private data, passed to @cmp
113 * @head: the list to sort
114 * @cmp: the elements comparison function
115 *
116 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
117 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
118 * in ascending order.
119 *
120 * The comparison function @cmp is supposed to return a negative value if @a is
121 * than @b, and a positive value if @a is greater than @b. If @a and @b are
122 * equivalent, then it does not matter what this function returns.
123 */
124static void list_sort(void *priv, struct list_head *head,
125 int (*cmp)(void *priv, struct list_head *a,
126 struct list_head *b))
127{
128 struct list_head *p, *q, *e, *list, *tail, *oldhead;
129 int insize, nmerges, psize, qsize, i;
130
131 if (list_empty(head))
132 return;
133
134 list = head->next;
135 list_del(head);
136 insize = 1;
137 for (;;) {
138 p = oldhead = list;
139 list = tail = NULL;
140 nmerges = 0;
141
142 while (p) {
143 nmerges++;
144 q = p;
145 psize = 0;
146 for (i = 0; i < insize; i++) {
147 psize++;
148 q = q->next == oldhead ? NULL : q->next;
149 if (!q)
150 break;
151 }
152
153 qsize = insize;
154 while (psize > 0 || (qsize > 0 && q)) {
155 if (!psize) {
156 e = q;
157 q = q->next;
158 qsize--;
159 if (q == oldhead)
160 q = NULL;
161 } else if (!qsize || !q) {
162 e = p;
163 p = p->next;
164 psize--;
165 if (p == oldhead)
166 p = NULL;
167 } else if (cmp(priv, p, q) <= 0) {
168 e = p;
169 p = p->next;
170 psize--;
171 if (p == oldhead)
172 p = NULL;
173 } else {
174 e = q;
175 q = q->next;
176 qsize--;
177 if (q == oldhead)
178 q = NULL;
179 }
180 if (tail)
181 tail->next = e;
182 else
183 list = e;
184 e->prev = tail;
185 tail = e;
186 }
187 p = q;
188 }
189
190 tail->next = list;
191 list->prev = tail;
192
193 if (nmerges <= 1)
194 break;
195
196 insize *= 2;
197 }
198
199 head->next = list;
200 head->prev = list->prev;
201 list->prev->next = head;
202 list->prev = head;
203}
204
205/**
206 * data_nodes_cmp - compare 2 data nodes. 112 * data_nodes_cmp - compare 2 data nodes.
207 * @priv: UBIFS file-system description object 113 * @priv: UBIFS file-system description object
208 * @a: first data node 114 * @a: first data node
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index f94ddf7efba..868a55ee080 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -23,7 +23,7 @@
23/* 23/*
24 * This file implements functions needed to recover from unclean un-mounts. 24 * This file implements functions needed to recover from unclean un-mounts.
25 * When UBIFS is mounted, it checks a flag on the master node to determine if 25 * When UBIFS is mounted, it checks a flag on the master node to determine if
26 * an un-mount was completed sucessfully. If not, the process of mounting 26 * an un-mount was completed successfully. If not, the process of mounting
27 * incorparates additional checking and fixing of on-flash data structures. 27 * incorparates additional checking and fixing of on-flash data structures.
28 * UBIFS always cleans away all remnants of an unclean un-mount, so that 28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
29 * errors do not accumulate. However UBIFS defers recovery if it is mounted 29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 333e181ee98..43f9d19a6f3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1393,12 +1393,7 @@ static int mount_ubifs(struct ubifs_info *c)
1393 c->leb_size, c->leb_size >> 10); 1393 c->leb_size, c->leb_size >> 10);
1394 dbg_msg("data journal heads: %d", 1394 dbg_msg("data journal heads: %d",
1395 c->jhead_cnt - NONDATA_JHEADS_CNT); 1395 c->jhead_cnt - NONDATA_JHEADS_CNT);
1396 dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" 1396 dbg_msg("UUID: %pUB", c->uuid);
1397 "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
1398 c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3],
1399 c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],
1400 c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],
1401 c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);
1402 dbg_msg("big_lpt %d", c->big_lpt); 1397 dbg_msg("big_lpt %d", c->big_lpt);
1403 dbg_msg("log LEBs: %d (%d - %d)", 1398 dbg_msg("log LEBs: %d (%d - %d)",
1404 c->log_lebs, UBIFS_LOG_LNUM, c->log_last); 1399 c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
@@ -1842,22 +1837,32 @@ const struct super_operations ubifs_super_operations = {
1842 * @name: UBI volume name 1837 * @name: UBI volume name
1843 * @mode: UBI volume open mode 1838 * @mode: UBI volume open mode
1844 * 1839 *
1845 * There are several ways to specify UBI volumes when mounting UBIFS: 1840 * The primary method of mounting UBIFS is by specifying the UBI volume
1846 * o ubiX_Y - UBI device number X, volume Y; 1841 * character device node path. However, UBIFS may also be mounted withoug any
1847 * o ubiY - UBI device number 0, volume Y; 1842 * character device node using one of the following methods:
1843 *
1844 * o ubiX_Y - mount UBI device number X, volume Y;
1845 * o ubiY - mount UBI device number 0, volume Y;
1848 * o ubiX:NAME - mount UBI device X, volume with name NAME; 1846 * o ubiX:NAME - mount UBI device X, volume with name NAME;
1849 * o ubi:NAME - mount UBI device 0, volume with name NAME. 1847 * o ubi:NAME - mount UBI device 0, volume with name NAME.
1850 * 1848 *
1851 * Alternative '!' separator may be used instead of ':' (because some shells 1849 * Alternative '!' separator may be used instead of ':' (because some shells
1852 * like busybox may interpret ':' as an NFS host name separator). This function 1850 * like busybox may interpret ':' as an NFS host name separator). This function
1853 * returns ubi volume object in case of success and a negative error code in 1851 * returns UBI volume description object in case of success and a negative
1854 * case of failure. 1852 * error code in case of failure.
1855 */ 1853 */
1856static struct ubi_volume_desc *open_ubi(const char *name, int mode) 1854static struct ubi_volume_desc *open_ubi(const char *name, int mode)
1857{ 1855{
1856 struct ubi_volume_desc *ubi;
1858 int dev, vol; 1857 int dev, vol;
1859 char *endptr; 1858 char *endptr;
1860 1859
1860 /* First, try to open using the device node path method */
1861 ubi = ubi_open_volume_path(name, mode);
1862 if (!IS_ERR(ubi))
1863 return ubi;
1864
1865 /* Try the "nodev" method */
1861 if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') 1866 if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
1862 return ERR_PTR(-EINVAL); 1867 return ERR_PTR(-EINVAL);
1863 1868
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1e068535b58..82372e332f0 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -440,7 +440,7 @@ static void udf_table_free_blocks(struct super_block *sb,
440 (bloc->logicalBlockNum + count) > 440 (bloc->logicalBlockNum + count) >
441 partmap->s_partition_len) { 441 partmap->s_partition_len) {
442 udf_debug("%d < %d || %d + %d > %d\n", 442 udf_debug("%d < %d || %d + %d > %d\n",
443 bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, 443 bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count,
444 partmap->s_partition_len); 444 partmap->s_partition_len);
445 goto error_return; 445 goto error_return;
446 } 446 }
diff --git a/fs/udf/file.c b/fs/udf/file.c
index b80cbd78833..f311d509b6a 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -196,6 +196,7 @@ static int udf_release_file(struct inode *inode, struct file *filp)
196 mutex_lock(&inode->i_mutex); 196 mutex_lock(&inode->i_mutex);
197 lock_kernel(); 197 lock_kernel();
198 udf_discard_prealloc(inode); 198 udf_discard_prealloc(inode);
199 udf_truncate_tail_extent(inode);
199 unlock_kernel(); 200 unlock_kernel();
200 mutex_unlock(&inode->i_mutex); 201 mutex_unlock(&inode->i_mutex);
201 } 202 }
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 6d24c2c63f9..f90231eb291 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -97,15 +97,17 @@ no_delete:
97 */ 97 */
98void udf_clear_inode(struct inode *inode) 98void udf_clear_inode(struct inode *inode)
99{ 99{
100 struct udf_inode_info *iinfo; 100 struct udf_inode_info *iinfo = UDF_I(inode);
101 if (!(inode->i_sb->s_flags & MS_RDONLY)) { 101
102 lock_kernel(); 102 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
103 udf_truncate_tail_extent(inode); 103 inode->i_size != iinfo->i_lenExtents) {
104 unlock_kernel(); 104 printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has "
105 write_inode_now(inode, 0); 105 "inode size %llu different from extent lenght %llu. "
106 invalidate_inode_buffers(inode); 106 "Filesystem need not be standards compliant.\n",
107 inode->i_sb->s_id, inode->i_ino, inode->i_mode,
108 (unsigned long long)inode->i_size,
109 (unsigned long long)iinfo->i_lenExtents);
107 } 110 }
108 iinfo = UDF_I(inode);
109 kfree(iinfo->i_ext.i_data); 111 kfree(iinfo->i_ext.i_data);
110 iinfo->i_ext.i_data = NULL; 112 iinfo->i_ext.i_data = NULL;
111} 113}
@@ -198,7 +200,6 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
198 int newblock; 200 int newblock;
199 struct buffer_head *dbh = NULL; 201 struct buffer_head *dbh = NULL;
200 struct kernel_lb_addr eloc; 202 struct kernel_lb_addr eloc;
201 uint32_t elen;
202 uint8_t alloctype; 203 uint8_t alloctype;
203 struct extent_position epos; 204 struct extent_position epos;
204 205
@@ -273,12 +274,11 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
273 eloc.logicalBlockNum = *block; 274 eloc.logicalBlockNum = *block;
274 eloc.partitionReferenceNum = 275 eloc.partitionReferenceNum =
275 iinfo->i_location.partitionReferenceNum; 276 iinfo->i_location.partitionReferenceNum;
276 elen = inode->i_sb->s_blocksize; 277 iinfo->i_lenExtents = inode->i_size;
277 iinfo->i_lenExtents = elen;
278 epos.bh = NULL; 278 epos.bh = NULL;
279 epos.block = iinfo->i_location; 279 epos.block = iinfo->i_location;
280 epos.offset = udf_file_entry_alloc_offset(inode); 280 epos.offset = udf_file_entry_alloc_offset(inode);
281 udf_add_aext(inode, &epos, &eloc, elen, 0); 281 udf_add_aext(inode, &epos, &eloc, inode->i_size, 0);
282 /* UniqueID stuff */ 282 /* UniqueID stuff */
283 283
284 brelse(epos.bh); 284 brelse(epos.bh);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 21dad8c608f..cd2115060fd 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -408,15 +408,6 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
408 } 408 }
409 409
410add: 410add:
411 /* Is there any extent whose size we need to round up? */
412 if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) {
413 elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1);
414 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
415 epos.offset -= sizeof(struct short_ad);
416 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
417 epos.offset -= sizeof(struct long_ad);
418 udf_write_aext(dir, &epos, &eloc, elen, 1);
419 }
420 f_pos += nfidlen; 411 f_pos += nfidlen;
421 412
422 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB && 413 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB &&
@@ -439,6 +430,7 @@ add:
439 udf_current_aext(dir, &epos, &eloc, &elen, 1); 430 udf_current_aext(dir, &epos, &eloc, &elen, 1);
440 } 431 }
441 432
433 /* Entry fits into current block? */
442 if (sb->s_blocksize - fibh->eoffset >= nfidlen) { 434 if (sb->s_blocksize - fibh->eoffset >= nfidlen) {
443 fibh->soffset = fibh->eoffset; 435 fibh->soffset = fibh->eoffset;
444 fibh->eoffset += nfidlen; 436 fibh->eoffset += nfidlen;
@@ -462,6 +454,16 @@ add:
462 (fibh->sbh->b_data + fibh->soffset); 454 (fibh->sbh->b_data + fibh->soffset);
463 } 455 }
464 } else { 456 } else {
457 /* Round up last extent in the file */
458 elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1);
459 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
460 epos.offset -= sizeof(struct short_ad);
461 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
462 epos.offset -= sizeof(struct long_ad);
463 udf_write_aext(dir, &epos, &eloc, elen, 1);
464 dinfo->i_lenExtents = (dinfo->i_lenExtents + sb->s_blocksize
465 - 1) & ~(sb->s_blocksize - 1);
466
465 fibh->soffset = fibh->eoffset - sb->s_blocksize; 467 fibh->soffset = fibh->eoffset - sb->s_blocksize;
466 fibh->eoffset += nfidlen - sb->s_blocksize; 468 fibh->eoffset += nfidlen - sb->s_blocksize;
467 if (fibh->sbh != fibh->ebh) { 469 if (fibh->sbh != fibh->ebh) {
@@ -508,6 +510,20 @@ add:
508 dir->i_size += nfidlen; 510 dir->i_size += nfidlen;
509 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) 511 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
510 dinfo->i_lenAlloc += nfidlen; 512 dinfo->i_lenAlloc += nfidlen;
513 else {
514 /* Find the last extent and truncate it to proper size */
515 while (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
516 (EXT_RECORDED_ALLOCATED >> 30))
517 ;
518 elen -= dinfo->i_lenExtents - dir->i_size;
519 if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
520 epos.offset -= sizeof(struct short_ad);
521 else if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
522 epos.offset -= sizeof(struct long_ad);
523 udf_write_aext(dir, &epos, &eloc, elen, 1);
524 dinfo->i_lenExtents = dir->i_size;
525 }
526
511 mark_inode_dirty(dir); 527 mark_inode_dirty(dir);
512 goto out_ok; 528 goto out_ok;
513 } else { 529 } else {
@@ -922,7 +938,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
922 block = udf_get_pblock(inode->i_sb, block, 938 block = udf_get_pblock(inode->i_sb, block,
923 iinfo->i_location.partitionReferenceNum, 939 iinfo->i_location.partitionReferenceNum,
924 0); 940 0);
925 epos.bh = udf_tread(inode->i_sb, block); 941 epos.bh = udf_tgetblk(inode->i_sb, block);
926 lock_buffer(epos.bh); 942 lock_buffer(epos.bh);
927 memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize); 943 memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize);
928 set_buffer_uptodate(epos.bh); 944 set_buffer_uptodate(epos.bh);
@@ -999,6 +1015,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
999 inode->i_size = elen; 1015 inode->i_size = elen;
1000 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) 1016 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
1001 iinfo->i_lenAlloc = inode->i_size; 1017 iinfo->i_lenAlloc = inode->i_size;
1018 else
1019 udf_truncate_tail_extent(inode);
1002 mark_inode_dirty(inode); 1020 mark_inode_dirty(inode);
1003 1021
1004 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1022 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 9d1b8c2e6c4..1e4543cbcd2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1078,21 +1078,39 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1078 return 0; 1078 return 0;
1079} 1079}
1080 1080
1081static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) 1081static void udf_find_vat_block(struct super_block *sb, int p_index,
1082 int type1_index, sector_t start_block)
1082{ 1083{
1083 struct udf_sb_info *sbi = UDF_SB(sb); 1084 struct udf_sb_info *sbi = UDF_SB(sb);
1084 struct udf_part_map *map = &sbi->s_partmaps[p_index]; 1085 struct udf_part_map *map = &sbi->s_partmaps[p_index];
1086 sector_t vat_block;
1085 struct kernel_lb_addr ino; 1087 struct kernel_lb_addr ino;
1088
1089 /*
1090 * VAT file entry is in the last recorded block. Some broken disks have
1091 * it a few blocks before so try a bit harder...
1092 */
1093 ino.partitionReferenceNum = type1_index;
1094 for (vat_block = start_block;
1095 vat_block >= map->s_partition_root &&
1096 vat_block >= start_block - 3 &&
1097 !sbi->s_vat_inode; vat_block--) {
1098 ino.logicalBlockNum = vat_block - map->s_partition_root;
1099 sbi->s_vat_inode = udf_iget(sb, &ino);
1100 }
1101}
1102
1103static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
1104{
1105 struct udf_sb_info *sbi = UDF_SB(sb);
1106 struct udf_part_map *map = &sbi->s_partmaps[p_index];
1086 struct buffer_head *bh = NULL; 1107 struct buffer_head *bh = NULL;
1087 struct udf_inode_info *vati; 1108 struct udf_inode_info *vati;
1088 uint32_t pos; 1109 uint32_t pos;
1089 struct virtualAllocationTable20 *vat20; 1110 struct virtualAllocationTable20 *vat20;
1090 sector_t blocks = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 1111 sector_t blocks = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
1091 1112
1092 /* VAT file entry is in the last recorded block */ 1113 udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block);
1093 ino.partitionReferenceNum = type1_index;
1094 ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root;
1095 sbi->s_vat_inode = udf_iget(sb, &ino);
1096 if (!sbi->s_vat_inode && 1114 if (!sbi->s_vat_inode &&
1097 sbi->s_last_block != blocks - 1) { 1115 sbi->s_last_block != blocks - 1) {
1098 printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the" 1116 printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the"
@@ -1100,9 +1118,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
1100 "block of the device (%lu).\n", 1118 "block of the device (%lu).\n",
1101 (unsigned long)sbi->s_last_block, 1119 (unsigned long)sbi->s_last_block,
1102 (unsigned long)blocks - 1); 1120 (unsigned long)blocks - 1);
1103 ino.partitionReferenceNum = type1_index; 1121 udf_find_vat_block(sb, p_index, type1_index, blocks - 1);
1104 ino.logicalBlockNum = blocks - 1 - map->s_partition_root;
1105 sbi->s_vat_inode = udf_iget(sb, &ino);
1106 } 1122 }
1107 if (!sbi->s_vat_inode) 1123 if (!sbi->s_vat_inode)
1108 return 1; 1124 return 1;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 6f671f1ac27..22af68f8b68 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -70,13 +70,13 @@ static inline unsigned long ufs_dir_pages(struct inode *inode)
70 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; 70 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
71} 71}
72 72
73ino_t ufs_inode_by_name(struct inode *dir, struct dentry *dentry) 73ino_t ufs_inode_by_name(struct inode *dir, struct qstr *qstr)
74{ 74{
75 ino_t res = 0; 75 ino_t res = 0;
76 struct ufs_dir_entry *de; 76 struct ufs_dir_entry *de;
77 struct page *page; 77 struct page *page;
78 78
79 de = ufs_find_entry(dir, dentry, &page); 79 de = ufs_find_entry(dir, qstr, &page);
80 if (de) { 80 if (de) {
81 res = fs32_to_cpu(dir->i_sb, de->d_ino); 81 res = fs32_to_cpu(dir->i_sb, de->d_ino);
82 ufs_put_page(page); 82 ufs_put_page(page);
@@ -249,12 +249,12 @@ struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
249 * (as a parameter - res_dir). Page is returned mapped and unlocked. 249 * (as a parameter - res_dir). Page is returned mapped and unlocked.
250 * Entry is guaranteed to be valid. 250 * Entry is guaranteed to be valid.
251 */ 251 */
252struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct dentry *dentry, 252struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct qstr *qstr,
253 struct page **res_page) 253 struct page **res_page)
254{ 254{
255 struct super_block *sb = dir->i_sb; 255 struct super_block *sb = dir->i_sb;
256 const char *name = dentry->d_name.name; 256 const char *name = qstr->name;
257 int namelen = dentry->d_name.len; 257 int namelen = qstr->len;
258 unsigned reclen = UFS_DIR_REC_LEN(namelen); 258 unsigned reclen = UFS_DIR_REC_LEN(namelen);
259 unsigned long start, n; 259 unsigned long start, n;
260 unsigned long npages = ufs_dir_pages(dir); 260 unsigned long npages = ufs_dir_pages(dir);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 23119fe7ad6..4c26d9e8bc9 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -56,7 +56,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
56 return ERR_PTR(-ENAMETOOLONG); 56 return ERR_PTR(-ENAMETOOLONG);
57 57
58 lock_kernel(); 58 lock_kernel();
59 ino = ufs_inode_by_name(dir, dentry); 59 ino = ufs_inode_by_name(dir, &dentry->d_name);
60 if (ino) { 60 if (ino) {
61 inode = ufs_iget(dir->i_sb, ino); 61 inode = ufs_iget(dir->i_sb, ino);
62 if (IS_ERR(inode)) { 62 if (IS_ERR(inode)) {
@@ -237,7 +237,7 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
237 struct page *page; 237 struct page *page;
238 int err = -ENOENT; 238 int err = -ENOENT;
239 239
240 de = ufs_find_entry(dir, dentry, &page); 240 de = ufs_find_entry(dir, &dentry->d_name, &page);
241 if (!de) 241 if (!de)
242 goto out; 242 goto out;
243 243
@@ -281,7 +281,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
281 struct ufs_dir_entry *old_de; 281 struct ufs_dir_entry *old_de;
282 int err = -ENOENT; 282 int err = -ENOENT;
283 283
284 old_de = ufs_find_entry(old_dir, old_dentry, &old_page); 284 old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
285 if (!old_de) 285 if (!old_de)
286 goto out; 286 goto out;
287 287
@@ -301,7 +301,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
301 goto out_dir; 301 goto out_dir;
302 302
303 err = -ENOENT; 303 err = -ENOENT;
304 new_de = ufs_find_entry(new_dir, new_dentry, &new_page); 304 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
305 if (!new_de) 305 if (!new_de)
306 goto out_dir; 306 goto out_dir;
307 inode_inc_link_count(old_inode); 307 inode_inc_link_count(old_inode);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 5faed7954d0..143c20bfb04 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -66,6 +66,7 @@
66 */ 66 */
67 67
68 68
69#include <linux/exportfs.h>
69#include <linux/module.h> 70#include <linux/module.h>
70#include <linux/bitops.h> 71#include <linux/bitops.h>
71 72
@@ -96,6 +97,56 @@
96#include "swab.h" 97#include "swab.h"
97#include "util.h" 98#include "util.h"
98 99
100static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
101{
102 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
103 struct inode *inode;
104
105 if (ino < UFS_ROOTINO || ino > uspi->s_ncg * uspi->s_ipg)
106 return ERR_PTR(-ESTALE);
107
108 inode = ufs_iget(sb, ino);
109 if (IS_ERR(inode))
110 return ERR_CAST(inode);
111 if (generation && inode->i_generation != generation) {
112 iput(inode);
113 return ERR_PTR(-ESTALE);
114 }
115 return inode;
116}
117
118static struct dentry *ufs_fh_to_dentry(struct super_block *sb, struct fid *fid,
119 int fh_len, int fh_type)
120{
121 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, ufs_nfs_get_inode);
122}
123
124static struct dentry *ufs_fh_to_parent(struct super_block *sb, struct fid *fid,
125 int fh_len, int fh_type)
126{
127 return generic_fh_to_parent(sb, fid, fh_len, fh_type, ufs_nfs_get_inode);
128}
129
130static struct dentry *ufs_get_parent(struct dentry *child)
131{
132 struct qstr dot_dot = {
133 .name = "..",
134 .len = 2,
135 };
136 ino_t ino;
137
138 ino = ufs_inode_by_name(child->d_inode, &dot_dot);
139 if (!ino)
140 return ERR_PTR(-ENOENT);
141 return d_obtain_alias(ufs_iget(child->d_inode->i_sb, ino));
142}
143
144static const struct export_operations ufs_export_ops = {
145 .fh_to_dentry = ufs_fh_to_dentry,
146 .fh_to_parent = ufs_fh_to_parent,
147 .get_parent = ufs_get_parent,
148};
149
99#ifdef CONFIG_UFS_DEBUG 150#ifdef CONFIG_UFS_DEBUG
100/* 151/*
101 * Print contents of ufs_super_block, useful for debugging 152 * Print contents of ufs_super_block, useful for debugging
@@ -990,6 +1041,7 @@ magic_found:
990 * Read ufs_super_block into internal data structures 1041 * Read ufs_super_block into internal data structures
991 */ 1042 */
992 sb->s_op = &ufs_super_ops; 1043 sb->s_op = &ufs_super_ops;
1044 sb->s_export_op = &ufs_export_ops;
993 sb->dq_op = NULL; /***/ 1045 sb->dq_op = NULL; /***/
994 sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic); 1046 sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic);
995 1047
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 644e77e1359..0b4c39bc0d9 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -86,9 +86,9 @@ extern void ufs_put_cylinder (struct super_block *, unsigned);
86/* dir.c */ 86/* dir.c */
87extern const struct inode_operations ufs_dir_inode_operations; 87extern const struct inode_operations ufs_dir_inode_operations;
88extern int ufs_add_link (struct dentry *, struct inode *); 88extern int ufs_add_link (struct dentry *, struct inode *);
89extern ino_t ufs_inode_by_name(struct inode *, struct dentry *); 89extern ino_t ufs_inode_by_name(struct inode *, struct qstr *);
90extern int ufs_make_empty(struct inode *, struct inode *); 90extern int ufs_make_empty(struct inode *, struct inode *);
91extern struct ufs_dir_entry *ufs_find_entry(struct inode *, struct dentry *, struct page **); 91extern struct ufs_dir_entry *ufs_find_entry(struct inode *, struct qstr *, struct page **);
92extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *); 92extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *);
93extern int ufs_empty_dir (struct inode *); 93extern int ufs_empty_dir (struct inode *);
94extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **); 94extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **);
diff --git a/fs/xattr.c b/fs/xattr.c
index 6d4f6d3449f..46f87e828b4 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -615,12 +615,11 @@ ssize_t
615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) 615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
616{ 616{
617 struct xattr_handler *handler; 617 struct xattr_handler *handler;
618 struct inode *inode = dentry->d_inode;
619 618
620 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 619 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
621 if (!handler) 620 if (!handler)
622 return -EOPNOTSUPP; 621 return -EOPNOTSUPP;
623 return handler->get(inode, name, buffer, size); 622 return handler->get(dentry, name, buffer, size, handler->flags);
624} 623}
625 624
626/* 625/*
@@ -630,18 +629,20 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
630ssize_t 629ssize_t
631generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) 630generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
632{ 631{
633 struct inode *inode = dentry->d_inode; 632 struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
634 struct xattr_handler *handler, **handlers = inode->i_sb->s_xattr;
635 unsigned int size = 0; 633 unsigned int size = 0;
636 634
637 if (!buffer) { 635 if (!buffer) {
638 for_each_xattr_handler(handlers, handler) 636 for_each_xattr_handler(handlers, handler) {
639 size += handler->list(inode, NULL, 0, NULL, 0); 637 size += handler->list(dentry, NULL, 0, NULL, 0,
638 handler->flags);
639 }
640 } else { 640 } else {
641 char *buf = buffer; 641 char *buf = buffer;
642 642
643 for_each_xattr_handler(handlers, handler) { 643 for_each_xattr_handler(handlers, handler) {
644 size = handler->list(inode, buf, buffer_size, NULL, 0); 644 size = handler->list(dentry, buf, buffer_size,
645 NULL, 0, handler->flags);
645 if (size > buffer_size) 646 if (size > buffer_size)
646 return -ERANGE; 647 return -ERANGE;
647 buf += size; 648 buf += size;
@@ -659,14 +660,13 @@ int
659generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) 660generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
660{ 661{
661 struct xattr_handler *handler; 662 struct xattr_handler *handler;
662 struct inode *inode = dentry->d_inode;
663 663
664 if (size == 0) 664 if (size == 0)
665 value = ""; /* empty EA, do not remove */ 665 value = ""; /* empty EA, do not remove */
666 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
667 if (!handler) 667 if (!handler)
668 return -EOPNOTSUPP; 668 return -EOPNOTSUPP;
669 return handler->set(inode, name, value, size, flags); 669 return handler->set(dentry, name, value, size, 0, handler->flags);
670} 670}
671 671
672/* 672/*
@@ -677,12 +677,12 @@ int
677generic_removexattr(struct dentry *dentry, const char *name) 677generic_removexattr(struct dentry *dentry, const char *name)
678{ 678{
679 struct xattr_handler *handler; 679 struct xattr_handler *handler;
680 struct inode *inode = dentry->d_inode;
681 680
682 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 681 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
683 if (!handler) 682 if (!handler)
684 return -EOPNOTSUPP; 683 return -EOPNOTSUPP;
685 return handler->set(inode, name, NULL, 0, XATTR_REPLACE); 684 return handler->set(dentry, name, NULL, 0,
685 XATTR_REPLACE, handler->flags);
686} 686}
687 687
688EXPORT_SYMBOL(generic_getxattr); 688EXPORT_SYMBOL(generic_getxattr);
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c
index c6ad7c7e3ee..05ac0fe9c4d 100644
--- a/fs/xattr_acl.c
+++ b/fs/xattr_acl.c
@@ -36,7 +36,7 @@ posix_acl_from_xattr(const void *value, size_t size)
36 if (count == 0) 36 if (count == 0)
37 return NULL; 37 return NULL;
38 38
39 acl = posix_acl_alloc(count, GFP_KERNEL); 39 acl = posix_acl_alloc(count, GFP_NOFS);
40 if (!acl) 40 if (!acl)
41 return ERR_PTR(-ENOMEM); 41 return ERR_PTR(-ENOMEM);
42 acl_e = acl->a_entries; 42 acl_e = acl->a_entries;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 7a59daed178..56641fe52a2 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -26,6 +26,8 @@ endif
26 26
27obj-$(CONFIG_XFS_FS) += xfs.o 27obj-$(CONFIG_XFS_FS) += xfs.o
28 28
29xfs-y += linux-2.6/xfs_trace.o
30
29xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ 31xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
30 xfs_dquot.o \ 32 xfs_dquot.o \
31 xfs_dquot_item.o \ 33 xfs_dquot_item.o \
@@ -90,8 +92,7 @@ xfs-y += xfs_alloc.o \
90 xfs_rw.o \ 92 xfs_rw.o \
91 xfs_dmops.o 93 xfs_dmops.o
92 94
93xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \ 95xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o
94 xfs_dir2_trace.o
95 96
96# Objects in linux/ 97# Objects in linux/
97xfs-y += $(addprefix $(XFS_LINUX)/, \ 98xfs-y += $(addprefix $(XFS_LINUX)/, \
@@ -113,6 +114,3 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
113xfs-y += $(addprefix support/, \ 114xfs-y += $(addprefix support/, \
114 debug.o \ 115 debug.o \
115 uuid.o) 116 uuid.o)
116
117xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
118
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b23a5450644..883ca5ab8af 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -21,6 +21,7 @@
21#include "xfs_bmap_btree.h" 21#include "xfs_bmap_btree.h"
22#include "xfs_inode.h" 22#include "xfs_inode.h"
23#include "xfs_vnodeops.h" 23#include "xfs_vnodeops.h"
24#include "xfs_trace.h"
24#include <linux/xattr.h> 25#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h> 26#include <linux/posix_acl_xattr.h>
26 27
@@ -250,8 +251,9 @@ xfs_set_mode(struct inode *inode, mode_t mode)
250 if (mode != inode->i_mode) { 251 if (mode != inode->i_mode) {
251 struct iattr iattr; 252 struct iattr iattr;
252 253
253 iattr.ia_valid = ATTR_MODE; 254 iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
254 iattr.ia_mode = mode; 255 iattr.ia_mode = mode;
256 iattr.ia_ctime = current_fs_time(inode->i_sb);
255 257
256 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 258 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
257 } 259 }
@@ -353,37 +355,14 @@ xfs_acl_chmod(struct inode *inode)
353 return error; 355 return error;
354} 356}
355 357
356/*
357 * System xattr handlers.
358 *
359 * Currently Posix ACLs are the only system namespace extended attribute
360 * handlers supported by XFS, so we just implement the handlers here.
361 * If we ever support other system extended attributes this will need
362 * some refactoring.
363 */
364
365static int 358static int
366xfs_decode_acl(const char *name) 359xfs_xattr_acl_get(struct dentry *dentry, const char *name,
367{ 360 void *value, size_t size, int type)
368 if (strcmp(name, "posix_acl_access") == 0)
369 return ACL_TYPE_ACCESS;
370 else if (strcmp(name, "posix_acl_default") == 0)
371 return ACL_TYPE_DEFAULT;
372 return -EINVAL;
373}
374
375static int
376xfs_xattr_system_get(struct inode *inode, const char *name,
377 void *value, size_t size)
378{ 361{
379 struct posix_acl *acl; 362 struct posix_acl *acl;
380 int type, error; 363 int error;
381
382 type = xfs_decode_acl(name);
383 if (type < 0)
384 return type;
385 364
386 acl = xfs_get_acl(inode, type); 365 acl = xfs_get_acl(dentry->d_inode, type);
387 if (IS_ERR(acl)) 366 if (IS_ERR(acl))
388 return PTR_ERR(acl); 367 return PTR_ERR(acl);
389 if (acl == NULL) 368 if (acl == NULL)
@@ -396,15 +375,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
396} 375}
397 376
398static int 377static int
399xfs_xattr_system_set(struct inode *inode, const char *name, 378xfs_xattr_acl_set(struct dentry *dentry, const char *name,
400 const void *value, size_t size, int flags) 379 const void *value, size_t size, int flags, int type)
401{ 380{
381 struct inode *inode = dentry->d_inode;
402 struct posix_acl *acl = NULL; 382 struct posix_acl *acl = NULL;
403 int error = 0, type; 383 int error = 0;
404 384
405 type = xfs_decode_acl(name);
406 if (type < 0)
407 return type;
408 if (flags & XATTR_CREATE) 385 if (flags & XATTR_CREATE)
409 return -EINVAL; 386 return -EINVAL;
410 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 387 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -461,8 +438,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
461 return error; 438 return error;
462} 439}
463 440
464struct xattr_handler xfs_xattr_system_handler = { 441struct xattr_handler xfs_xattr_acl_access_handler = {
465 .prefix = XATTR_SYSTEM_PREFIX, 442 .prefix = POSIX_ACL_XATTR_ACCESS,
466 .get = xfs_xattr_system_get, 443 .flags = ACL_TYPE_ACCESS,
467 .set = xfs_xattr_system_set, 444 .get = xfs_xattr_acl_get,
445 .set = xfs_xattr_acl_set,
446};
447
448struct xattr_handler xfs_xattr_acl_default_handler = {
449 .prefix = POSIX_ACL_XATTR_DEFAULT,
450 .flags = ACL_TYPE_DEFAULT,
451 .get = xfs_xattr_acl_get,
452 .set = xfs_xattr_acl_set,
468}; 453};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c2e30eea74d..66abe36c121 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -38,6 +38,7 @@
38#include "xfs_rw.h" 38#include "xfs_rw.h"
39#include "xfs_iomap.h" 39#include "xfs_iomap.h"
40#include "xfs_vnodeops.h" 40#include "xfs_vnodeops.h"
41#include "xfs_trace.h"
41#include <linux/mpage.h> 42#include <linux/mpage.h>
42#include <linux/pagevec.h> 43#include <linux/pagevec.h>
43#include <linux/writeback.h> 44#include <linux/writeback.h>
@@ -76,7 +77,7 @@ xfs_ioend_wake(
76 wake_up(to_ioend_wq(ip)); 77 wake_up(to_ioend_wq(ip));
77} 78}
78 79
79STATIC void 80void
80xfs_count_page_state( 81xfs_count_page_state(
81 struct page *page, 82 struct page *page,
82 int *delalloc, 83 int *delalloc,
@@ -98,48 +99,6 @@ xfs_count_page_state(
98 } while ((bh = bh->b_this_page) != head); 99 } while ((bh = bh->b_this_page) != head);
99} 100}
100 101
101#if defined(XFS_RW_TRACE)
102void
103xfs_page_trace(
104 int tag,
105 struct inode *inode,
106 struct page *page,
107 unsigned long pgoff)
108{
109 xfs_inode_t *ip;
110 loff_t isize = i_size_read(inode);
111 loff_t offset = page_offset(page);
112 int delalloc = -1, unmapped = -1, unwritten = -1;
113
114 if (page_has_buffers(page))
115 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
116
117 ip = XFS_I(inode);
118 if (!ip->i_rwtrace)
119 return;
120
121 ktrace_enter(ip->i_rwtrace,
122 (void *)((unsigned long)tag),
123 (void *)ip,
124 (void *)inode,
125 (void *)page,
126 (void *)pgoff,
127 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
128 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
129 (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
130 (void *)((unsigned long)(isize & 0xffffffff)),
131 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
132 (void *)((unsigned long)(offset & 0xffffffff)),
133 (void *)((unsigned long)delalloc),
134 (void *)((unsigned long)unmapped),
135 (void *)((unsigned long)unwritten),
136 (void *)((unsigned long)current_pid()),
137 (void *)NULL);
138}
139#else
140#define xfs_page_trace(tag, inode, page, pgoff)
141#endif
142
143STATIC struct block_device * 102STATIC struct block_device *
144xfs_find_bdev_for_inode( 103xfs_find_bdev_for_inode(
145 struct xfs_inode *ip) 104 struct xfs_inode *ip)
@@ -235,71 +194,36 @@ xfs_setfilesize(
235} 194}
236 195
237/* 196/*
238 * Buffered IO write completion for delayed allocate extents. 197 * IO write completion.
239 */
240STATIC void
241xfs_end_bio_delalloc(
242 struct work_struct *work)
243{
244 xfs_ioend_t *ioend =
245 container_of(work, xfs_ioend_t, io_work);
246
247 xfs_setfilesize(ioend);
248 xfs_destroy_ioend(ioend);
249}
250
251/*
252 * Buffered IO write completion for regular, written extents.
253 */
254STATIC void
255xfs_end_bio_written(
256 struct work_struct *work)
257{
258 xfs_ioend_t *ioend =
259 container_of(work, xfs_ioend_t, io_work);
260
261 xfs_setfilesize(ioend);
262 xfs_destroy_ioend(ioend);
263}
264
265/*
266 * IO write completion for unwritten extents.
267 *
268 * Issue transactions to convert a buffer range from unwritten
269 * to written extents.
270 */ 198 */
271STATIC void 199STATIC void
272xfs_end_bio_unwritten( 200xfs_end_io(
273 struct work_struct *work) 201 struct work_struct *work)
274{ 202{
275 xfs_ioend_t *ioend = 203 xfs_ioend_t *ioend =
276 container_of(work, xfs_ioend_t, io_work); 204 container_of(work, xfs_ioend_t, io_work);
277 struct xfs_inode *ip = XFS_I(ioend->io_inode); 205 struct xfs_inode *ip = XFS_I(ioend->io_inode);
278 xfs_off_t offset = ioend->io_offset;
279 size_t size = ioend->io_size;
280
281 if (likely(!ioend->io_error)) {
282 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
283 int error;
284 error = xfs_iomap_write_unwritten(ip, offset, size);
285 if (error)
286 ioend->io_error = error;
287 }
288 xfs_setfilesize(ioend);
289 }
290 xfs_destroy_ioend(ioend);
291}
292 206
293/* 207 /*
294 * IO read completion for regular, written extents. 208 * For unwritten extents we need to issue transactions to convert a
295 */ 209 * range to normal written extens after the data I/O has finished.
296STATIC void 210 */
297xfs_end_bio_read( 211 if (ioend->io_type == IOMAP_UNWRITTEN &&
298 struct work_struct *work) 212 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
299{ 213 int error;
300 xfs_ioend_t *ioend = 214
301 container_of(work, xfs_ioend_t, io_work); 215 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
216 ioend->io_size);
217 if (error)
218 ioend->io_error = error;
219 }
302 220
221 /*
222 * We might have to update the on-disk file size after extending
223 * writes.
224 */
225 if (ioend->io_type != IOMAP_READ)
226 xfs_setfilesize(ioend);
303 xfs_destroy_ioend(ioend); 227 xfs_destroy_ioend(ioend);
304} 228}
305 229
@@ -314,10 +238,10 @@ xfs_finish_ioend(
314 int wait) 238 int wait)
315{ 239{
316 if (atomic_dec_and_test(&ioend->io_remaining)) { 240 if (atomic_dec_and_test(&ioend->io_remaining)) {
317 struct workqueue_struct *wq = xfsdatad_workqueue; 241 struct workqueue_struct *wq;
318 if (ioend->io_work.func == xfs_end_bio_unwritten)
319 wq = xfsconvertd_workqueue;
320 242
243 wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
244 xfsconvertd_workqueue : xfsdatad_workqueue;
321 queue_work(wq, &ioend->io_work); 245 queue_work(wq, &ioend->io_work);
322 if (wait) 246 if (wait)
323 flush_workqueue(wq); 247 flush_workqueue(wq);
@@ -355,15 +279,7 @@ xfs_alloc_ioend(
355 ioend->io_offset = 0; 279 ioend->io_offset = 0;
356 ioend->io_size = 0; 280 ioend->io_size = 0;
357 281
358 if (type == IOMAP_UNWRITTEN) 282 INIT_WORK(&ioend->io_work, xfs_end_io);
359 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten);
360 else if (type == IOMAP_DELAY)
361 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc);
362 else if (type == IOMAP_READ)
363 INIT_WORK(&ioend->io_work, xfs_end_bio_read);
364 else
365 INIT_WORK(&ioend->io_work, xfs_end_bio_written);
366
367 return ioend; 283 return ioend;
368} 284}
369 285
@@ -380,7 +296,7 @@ xfs_map_blocks(
380 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); 296 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
381} 297}
382 298
383STATIC_INLINE int 299STATIC int
384xfs_iomap_valid( 300xfs_iomap_valid(
385 xfs_iomap_t *iomapp, 301 xfs_iomap_t *iomapp,
386 loff_t offset) 302 loff_t offset)
@@ -412,8 +328,9 @@ xfs_end_bio(
412 328
413STATIC void 329STATIC void
414xfs_submit_ioend_bio( 330xfs_submit_ioend_bio(
415 xfs_ioend_t *ioend, 331 struct writeback_control *wbc,
416 struct bio *bio) 332 xfs_ioend_t *ioend,
333 struct bio *bio)
417{ 334{
418 atomic_inc(&ioend->io_remaining); 335 atomic_inc(&ioend->io_remaining);
419 bio->bi_private = ioend; 336 bio->bi_private = ioend;
@@ -426,7 +343,8 @@ xfs_submit_ioend_bio(
426 if (xfs_ioend_new_eof(ioend)) 343 if (xfs_ioend_new_eof(ioend))
427 xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); 344 xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode));
428 345
429 submit_bio(WRITE, bio); 346 submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
347 WRITE_SYNC_PLUG : WRITE, bio);
430 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); 348 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
431 bio_put(bio); 349 bio_put(bio);
432} 350}
@@ -505,6 +423,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
505 */ 423 */
506STATIC void 424STATIC void
507xfs_submit_ioend( 425xfs_submit_ioend(
426 struct writeback_control *wbc,
508 xfs_ioend_t *ioend) 427 xfs_ioend_t *ioend)
509{ 428{
510 xfs_ioend_t *head = ioend; 429 xfs_ioend_t *head = ioend;
@@ -533,19 +452,19 @@ xfs_submit_ioend(
533 retry: 452 retry:
534 bio = xfs_alloc_ioend_bio(bh); 453 bio = xfs_alloc_ioend_bio(bh);
535 } else if (bh->b_blocknr != lastblock + 1) { 454 } else if (bh->b_blocknr != lastblock + 1) {
536 xfs_submit_ioend_bio(ioend, bio); 455 xfs_submit_ioend_bio(wbc, ioend, bio);
537 goto retry; 456 goto retry;
538 } 457 }
539 458
540 if (bio_add_buffer(bio, bh) != bh->b_size) { 459 if (bio_add_buffer(bio, bh) != bh->b_size) {
541 xfs_submit_ioend_bio(ioend, bio); 460 xfs_submit_ioend_bio(wbc, ioend, bio);
542 goto retry; 461 goto retry;
543 } 462 }
544 463
545 lastblock = bh->b_blocknr; 464 lastblock = bh->b_blocknr;
546 } 465 }
547 if (bio) 466 if (bio)
548 xfs_submit_ioend_bio(ioend, bio); 467 xfs_submit_ioend_bio(wbc, ioend, bio);
549 xfs_finish_ioend(ioend, 0); 468 xfs_finish_ioend(ioend, 0);
550 } while ((ioend = next) != NULL); 469 } while ((ioend = next) != NULL);
551} 470}
@@ -904,16 +823,9 @@ xfs_convert_page(
904 823
905 if (startio) { 824 if (startio) {
906 if (count) { 825 if (count) {
907 struct backing_dev_info *bdi;
908
909 bdi = inode->i_mapping->backing_dev_info;
910 wbc->nr_to_write--; 826 wbc->nr_to_write--;
911 if (bdi_write_congested(bdi)) { 827 if (wbc->nr_to_write <= 0)
912 wbc->encountered_congestion = 1;
913 done = 1;
914 } else if (wbc->nr_to_write <= 0) {
915 done = 1; 828 done = 1;
916 }
917 } 829 }
918 xfs_start_page_writeback(page, !page_dirty, count); 830 xfs_start_page_writeback(page, !page_dirty, count);
919 } 831 }
@@ -1198,7 +1110,7 @@ xfs_page_state_convert(
1198 } 1110 }
1199 1111
1200 if (iohead) 1112 if (iohead)
1201 xfs_submit_ioend(iohead); 1113 xfs_submit_ioend(wbc, iohead);
1202 1114
1203 return page_dirty; 1115 return page_dirty;
1204 1116
@@ -1249,7 +1161,7 @@ xfs_vm_writepage(
1249 int delalloc, unmapped, unwritten; 1161 int delalloc, unmapped, unwritten;
1250 struct inode *inode = page->mapping->host; 1162 struct inode *inode = page->mapping->host;
1251 1163
1252 xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); 1164 trace_xfs_writepage(inode, page, 0);
1253 1165
1254 /* 1166 /*
1255 * We need a transaction if: 1167 * We need a transaction if:
@@ -1354,7 +1266,7 @@ xfs_vm_releasepage(
1354 .nr_to_write = 1, 1266 .nr_to_write = 1,
1355 }; 1267 };
1356 1268
1357 xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0); 1269 trace_xfs_releasepage(inode, page, 0);
1358 1270
1359 if (!page_has_buffers(page)) 1271 if (!page_has_buffers(page))
1360 return 0; 1272 return 0;
@@ -1535,7 +1447,7 @@ xfs_end_io_direct(
1535 * didn't map an unwritten extent so switch it's completion 1447 * didn't map an unwritten extent so switch it's completion
1536 * handler. 1448 * handler.
1537 */ 1449 */
1538 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 1450 ioend->io_type = IOMAP_NEW;
1539 xfs_finish_ioend(ioend, 0); 1451 xfs_finish_ioend(ioend, 0);
1540 } 1452 }
1541 1453
@@ -1562,19 +1474,13 @@ xfs_vm_direct_IO(
1562 1474
1563 bdev = xfs_find_bdev_for_inode(XFS_I(inode)); 1475 bdev = xfs_find_bdev_for_inode(XFS_I(inode));
1564 1476
1565 if (rw == WRITE) { 1477 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
1566 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); 1478 IOMAP_UNWRITTEN : IOMAP_READ);
1567 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1479
1568 bdev, iov, offset, nr_segs, 1480 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
1569 xfs_get_blocks_direct, 1481 offset, nr_segs,
1570 xfs_end_io_direct); 1482 xfs_get_blocks_direct,
1571 } else { 1483 xfs_end_io_direct);
1572 iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
1573 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
1574 bdev, iov, offset, nr_segs,
1575 xfs_get_blocks_direct,
1576 xfs_end_io_direct);
1577 }
1578 1484
1579 if (unlikely(ret != -EIOCBQUEUED && iocb->private)) 1485 if (unlikely(ret != -EIOCBQUEUED && iocb->private))
1580 xfs_destroy_ioend(iocb->private); 1486 xfs_destroy_ioend(iocb->private);
@@ -1634,8 +1540,7 @@ xfs_vm_invalidatepage(
1634 struct page *page, 1540 struct page *page,
1635 unsigned long offset) 1541 unsigned long offset)
1636{ 1542{
1637 xfs_page_trace(XFS_INVALIDPAGE_ENTER, 1543 trace_xfs_invalidatepage(page->mapping->host, page, offset);
1638 page->mapping->host, page, offset);
1639 block_invalidatepage(page, offset); 1544 block_invalidatepage(page, offset);
1640} 1545}
1641 1546
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 221b3e66cee..4cfc6ea87df 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -45,4 +45,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
45extern void xfs_ioend_init(void); 45extern void xfs_ioend_init(void);
46extern void xfs_ioend_wait(struct xfs_inode *); 46extern void xfs_ioend_wait(struct xfs_inode *);
47 47
48extern void xfs_count_page_state(struct page *, int *, int *, int *);
49
48#endif /* __XFS_AOPS_H__ */ 50#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 965df1227d6..77b8be81c76 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -39,6 +39,7 @@
39#include "xfs_ag.h" 39#include "xfs_ag.h"
40#include "xfs_dmapi.h" 40#include "xfs_dmapi.h"
41#include "xfs_mount.h" 41#include "xfs_mount.h"
42#include "xfs_trace.h"
42 43
43static kmem_zone_t *xfs_buf_zone; 44static kmem_zone_t *xfs_buf_zone;
44STATIC int xfsbufd(void *); 45STATIC int xfsbufd(void *);
@@ -53,34 +54,6 @@ static struct workqueue_struct *xfslogd_workqueue;
53struct workqueue_struct *xfsdatad_workqueue; 54struct workqueue_struct *xfsdatad_workqueue;
54struct workqueue_struct *xfsconvertd_workqueue; 55struct workqueue_struct *xfsconvertd_workqueue;
55 56
56#ifdef XFS_BUF_TRACE
57void
58xfs_buf_trace(
59 xfs_buf_t *bp,
60 char *id,
61 void *data,
62 void *ra)
63{
64 ktrace_enter(xfs_buf_trace_buf,
65 bp, id,
66 (void *)(unsigned long)bp->b_flags,
67 (void *)(unsigned long)bp->b_hold.counter,
68 (void *)(unsigned long)bp->b_sema.count,
69 (void *)current,
70 data, ra,
71 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
72 (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
73 (void *)(unsigned long)bp->b_buffer_length,
74 NULL, NULL, NULL, NULL, NULL);
75}
76ktrace_t *xfs_buf_trace_buf;
77#define XFS_BUF_TRACE_SIZE 4096
78#define XB_TRACE(bp, id, data) \
79 xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
80#else
81#define XB_TRACE(bp, id, data) do { } while (0)
82#endif
83
84#ifdef XFS_BUF_LOCK_TRACKING 57#ifdef XFS_BUF_LOCK_TRACKING
85# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 58# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
86# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) 59# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
@@ -149,7 +122,7 @@ page_region_mask(
149 return mask; 122 return mask;
150} 123}
151 124
152STATIC_INLINE void 125STATIC void
153set_page_region( 126set_page_region(
154 struct page *page, 127 struct page *page,
155 size_t offset, 128 size_t offset,
@@ -161,7 +134,7 @@ set_page_region(
161 SetPageUptodate(page); 134 SetPageUptodate(page);
162} 135}
163 136
164STATIC_INLINE int 137STATIC int
165test_page_region( 138test_page_region(
166 struct page *page, 139 struct page *page,
167 size_t offset, 140 size_t offset,
@@ -279,7 +252,8 @@ _xfs_buf_initialize(
279 init_waitqueue_head(&bp->b_waiters); 252 init_waitqueue_head(&bp->b_waiters);
280 253
281 XFS_STATS_INC(xb_create); 254 XFS_STATS_INC(xb_create);
282 XB_TRACE(bp, "initialize", target); 255
256 trace_xfs_buf_init(bp, _RET_IP_);
283} 257}
284 258
285/* 259/*
@@ -318,6 +292,7 @@ _xfs_buf_free_pages(
318{ 292{
319 if (bp->b_pages != bp->b_page_array) { 293 if (bp->b_pages != bp->b_page_array) {
320 kmem_free(bp->b_pages); 294 kmem_free(bp->b_pages);
295 bp->b_pages = NULL;
321 } 296 }
322} 297}
323 298
@@ -332,7 +307,7 @@ void
332xfs_buf_free( 307xfs_buf_free(
333 xfs_buf_t *bp) 308 xfs_buf_t *bp)
334{ 309{
335 XB_TRACE(bp, "free", 0); 310 trace_xfs_buf_free(bp, _RET_IP_);
336 311
337 ASSERT(list_empty(&bp->b_hash_list)); 312 ASSERT(list_empty(&bp->b_hash_list));
338 313
@@ -349,9 +324,8 @@ xfs_buf_free(
349 ASSERT(!PagePrivate(page)); 324 ASSERT(!PagePrivate(page));
350 page_cache_release(page); 325 page_cache_release(page);
351 } 326 }
352 _xfs_buf_free_pages(bp);
353 } 327 }
354 328 _xfs_buf_free_pages(bp);
355 xfs_buf_deallocate(bp); 329 xfs_buf_deallocate(bp);
356} 330}
357 331
@@ -445,7 +419,6 @@ _xfs_buf_lookup_pages(
445 if (page_count == bp->b_page_count) 419 if (page_count == bp->b_page_count)
446 bp->b_flags |= XBF_DONE; 420 bp->b_flags |= XBF_DONE;
447 421
448 XB_TRACE(bp, "lookup_pages", (long)page_count);
449 return error; 422 return error;
450} 423}
451 424
@@ -548,7 +521,6 @@ found:
548 if (down_trylock(&bp->b_sema)) { 521 if (down_trylock(&bp->b_sema)) {
549 if (!(flags & XBF_TRYLOCK)) { 522 if (!(flags & XBF_TRYLOCK)) {
550 /* wait for buffer ownership */ 523 /* wait for buffer ownership */
551 XB_TRACE(bp, "get_lock", 0);
552 xfs_buf_lock(bp); 524 xfs_buf_lock(bp);
553 XFS_STATS_INC(xb_get_locked_waited); 525 XFS_STATS_INC(xb_get_locked_waited);
554 } else { 526 } else {
@@ -571,7 +543,8 @@ found:
571 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 543 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
572 bp->b_flags &= XBF_MAPPED; 544 bp->b_flags &= XBF_MAPPED;
573 } 545 }
574 XB_TRACE(bp, "got_lock", 0); 546
547 trace_xfs_buf_find(bp, flags, _RET_IP_);
575 XFS_STATS_INC(xb_get_locked); 548 XFS_STATS_INC(xb_get_locked);
576 return bp; 549 return bp;
577} 550}
@@ -582,7 +555,7 @@ found:
582 * although backing storage may not be. 555 * although backing storage may not be.
583 */ 556 */
584xfs_buf_t * 557xfs_buf_t *
585xfs_buf_get_flags( 558xfs_buf_get(
586 xfs_buftarg_t *target,/* target for buffer */ 559 xfs_buftarg_t *target,/* target for buffer */
587 xfs_off_t ioff, /* starting offset of range */ 560 xfs_off_t ioff, /* starting offset of range */
588 size_t isize, /* length of range */ 561 size_t isize, /* length of range */
@@ -627,7 +600,7 @@ xfs_buf_get_flags(
627 bp->b_bn = ioff; 600 bp->b_bn = ioff;
628 bp->b_count_desired = bp->b_buffer_length; 601 bp->b_count_desired = bp->b_buffer_length;
629 602
630 XB_TRACE(bp, "get", (unsigned long)flags); 603 trace_xfs_buf_get(bp, flags, _RET_IP_);
631 return bp; 604 return bp;
632 605
633 no_buffer: 606 no_buffer:
@@ -644,8 +617,6 @@ _xfs_buf_read(
644{ 617{
645 int status; 618 int status;
646 619
647 XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
648
649 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); 620 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
650 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 621 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
651 622
@@ -661,7 +632,7 @@ _xfs_buf_read(
661} 632}
662 633
663xfs_buf_t * 634xfs_buf_t *
664xfs_buf_read_flags( 635xfs_buf_read(
665 xfs_buftarg_t *target, 636 xfs_buftarg_t *target,
666 xfs_off_t ioff, 637 xfs_off_t ioff,
667 size_t isize, 638 size_t isize,
@@ -671,21 +642,20 @@ xfs_buf_read_flags(
671 642
672 flags |= XBF_READ; 643 flags |= XBF_READ;
673 644
674 bp = xfs_buf_get_flags(target, ioff, isize, flags); 645 bp = xfs_buf_get(target, ioff, isize, flags);
675 if (bp) { 646 if (bp) {
647 trace_xfs_buf_read(bp, flags, _RET_IP_);
648
676 if (!XFS_BUF_ISDONE(bp)) { 649 if (!XFS_BUF_ISDONE(bp)) {
677 XB_TRACE(bp, "read", (unsigned long)flags);
678 XFS_STATS_INC(xb_get_read); 650 XFS_STATS_INC(xb_get_read);
679 _xfs_buf_read(bp, flags); 651 _xfs_buf_read(bp, flags);
680 } else if (flags & XBF_ASYNC) { 652 } else if (flags & XBF_ASYNC) {
681 XB_TRACE(bp, "read_async", (unsigned long)flags);
682 /* 653 /*
683 * Read ahead call which is already satisfied, 654 * Read ahead call which is already satisfied,
684 * drop the buffer 655 * drop the buffer
685 */ 656 */
686 goto no_buffer; 657 goto no_buffer;
687 } else { 658 } else {
688 XB_TRACE(bp, "read_done", (unsigned long)flags);
689 /* We do not want read in the flags */ 659 /* We do not want read in the flags */
690 bp->b_flags &= ~XBF_READ; 660 bp->b_flags &= ~XBF_READ;
691 } 661 }
@@ -718,7 +688,7 @@ xfs_buf_readahead(
718 return; 688 return;
719 689
720 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); 690 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
721 xfs_buf_read_flags(target, ioff, isize, flags); 691 xfs_buf_read(target, ioff, isize, flags);
722} 692}
723 693
724xfs_buf_t * 694xfs_buf_t *
@@ -823,7 +793,7 @@ xfs_buf_get_noaddr(
823 793
824 xfs_buf_unlock(bp); 794 xfs_buf_unlock(bp);
825 795
826 XB_TRACE(bp, "no_daddr", len); 796 trace_xfs_buf_get_noaddr(bp, _RET_IP_);
827 return bp; 797 return bp;
828 798
829 fail_free_mem: 799 fail_free_mem:
@@ -845,8 +815,8 @@ void
845xfs_buf_hold( 815xfs_buf_hold(
846 xfs_buf_t *bp) 816 xfs_buf_t *bp)
847{ 817{
818 trace_xfs_buf_hold(bp, _RET_IP_);
848 atomic_inc(&bp->b_hold); 819 atomic_inc(&bp->b_hold);
849 XB_TRACE(bp, "hold", 0);
850} 820}
851 821
852/* 822/*
@@ -859,7 +829,7 @@ xfs_buf_rele(
859{ 829{
860 xfs_bufhash_t *hash = bp->b_hash; 830 xfs_bufhash_t *hash = bp->b_hash;
861 831
862 XB_TRACE(bp, "rele", bp->b_relse); 832 trace_xfs_buf_rele(bp, _RET_IP_);
863 833
864 if (unlikely(!hash)) { 834 if (unlikely(!hash)) {
865 ASSERT(!bp->b_relse); 835 ASSERT(!bp->b_relse);
@@ -909,21 +879,19 @@ xfs_buf_cond_lock(
909 int locked; 879 int locked;
910 880
911 locked = down_trylock(&bp->b_sema) == 0; 881 locked = down_trylock(&bp->b_sema) == 0;
912 if (locked) { 882 if (locked)
913 XB_SET_OWNER(bp); 883 XB_SET_OWNER(bp);
914 } 884
915 XB_TRACE(bp, "cond_lock", (long)locked); 885 trace_xfs_buf_cond_lock(bp, _RET_IP_);
916 return locked ? 0 : -EBUSY; 886 return locked ? 0 : -EBUSY;
917} 887}
918 888
919#if defined(DEBUG) || defined(XFS_BLI_TRACE)
920int 889int
921xfs_buf_lock_value( 890xfs_buf_lock_value(
922 xfs_buf_t *bp) 891 xfs_buf_t *bp)
923{ 892{
924 return bp->b_sema.count; 893 return bp->b_sema.count;
925} 894}
926#endif
927 895
928/* 896/*
929 * Locks a buffer object. 897 * Locks a buffer object.
@@ -935,12 +903,14 @@ void
935xfs_buf_lock( 903xfs_buf_lock(
936 xfs_buf_t *bp) 904 xfs_buf_t *bp)
937{ 905{
938 XB_TRACE(bp, "lock", 0); 906 trace_xfs_buf_lock(bp, _RET_IP_);
907
939 if (atomic_read(&bp->b_io_remaining)) 908 if (atomic_read(&bp->b_io_remaining))
940 blk_run_address_space(bp->b_target->bt_mapping); 909 blk_run_address_space(bp->b_target->bt_mapping);
941 down(&bp->b_sema); 910 down(&bp->b_sema);
942 XB_SET_OWNER(bp); 911 XB_SET_OWNER(bp);
943 XB_TRACE(bp, "locked", 0); 912
913 trace_xfs_buf_lock_done(bp, _RET_IP_);
944} 914}
945 915
946/* 916/*
@@ -962,7 +932,8 @@ xfs_buf_unlock(
962 932
963 XB_CLEAR_OWNER(bp); 933 XB_CLEAR_OWNER(bp);
964 up(&bp->b_sema); 934 up(&bp->b_sema);
965 XB_TRACE(bp, "unlock", 0); 935
936 trace_xfs_buf_unlock(bp, _RET_IP_);
966} 937}
967 938
968 939
@@ -974,17 +945,18 @@ void
974xfs_buf_pin( 945xfs_buf_pin(
975 xfs_buf_t *bp) 946 xfs_buf_t *bp)
976{ 947{
948 trace_xfs_buf_pin(bp, _RET_IP_);
977 atomic_inc(&bp->b_pin_count); 949 atomic_inc(&bp->b_pin_count);
978 XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
979} 950}
980 951
981void 952void
982xfs_buf_unpin( 953xfs_buf_unpin(
983 xfs_buf_t *bp) 954 xfs_buf_t *bp)
984{ 955{
956 trace_xfs_buf_unpin(bp, _RET_IP_);
957
985 if (atomic_dec_and_test(&bp->b_pin_count)) 958 if (atomic_dec_and_test(&bp->b_pin_count))
986 wake_up_all(&bp->b_waiters); 959 wake_up_all(&bp->b_waiters);
987 XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
988} 960}
989 961
990int 962int
@@ -1035,7 +1007,7 @@ xfs_buf_iodone_work(
1035 */ 1007 */
1036 if ((bp->b_error == EOPNOTSUPP) && 1008 if ((bp->b_error == EOPNOTSUPP) &&
1037 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { 1009 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
1038 XB_TRACE(bp, "ordered_retry", bp->b_iodone); 1010 trace_xfs_buf_ordered_retry(bp, _RET_IP_);
1039 bp->b_flags &= ~XBF_ORDERED; 1011 bp->b_flags &= ~XBF_ORDERED;
1040 bp->b_flags |= _XFS_BARRIER_FAILED; 1012 bp->b_flags |= _XFS_BARRIER_FAILED;
1041 xfs_buf_iorequest(bp); 1013 xfs_buf_iorequest(bp);
@@ -1050,12 +1022,12 @@ xfs_buf_ioend(
1050 xfs_buf_t *bp, 1022 xfs_buf_t *bp,
1051 int schedule) 1023 int schedule)
1052{ 1024{
1025 trace_xfs_buf_iodone(bp, _RET_IP_);
1026
1053 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 1027 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1054 if (bp->b_error == 0) 1028 if (bp->b_error == 0)
1055 bp->b_flags |= XBF_DONE; 1029 bp->b_flags |= XBF_DONE;
1056 1030
1057 XB_TRACE(bp, "iodone", bp->b_iodone);
1058
1059 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { 1031 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
1060 if (schedule) { 1032 if (schedule) {
1061 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); 1033 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
@@ -1075,7 +1047,7 @@ xfs_buf_ioerror(
1075{ 1047{
1076 ASSERT(error >= 0 && error <= 0xffff); 1048 ASSERT(error >= 0 && error <= 0xffff);
1077 bp->b_error = (unsigned short)error; 1049 bp->b_error = (unsigned short)error;
1078 XB_TRACE(bp, "ioerror", (unsigned long)error); 1050 trace_xfs_buf_ioerror(bp, error, _RET_IP_);
1079} 1051}
1080 1052
1081int 1053int
@@ -1083,7 +1055,7 @@ xfs_bawrite(
1083 void *mp, 1055 void *mp,
1084 struct xfs_buf *bp) 1056 struct xfs_buf *bp)
1085{ 1057{
1086 XB_TRACE(bp, "bawrite", 0); 1058 trace_xfs_buf_bawrite(bp, _RET_IP_);
1087 1059
1088 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 1060 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
1089 1061
@@ -1102,7 +1074,7 @@ xfs_bdwrite(
1102 void *mp, 1074 void *mp,
1103 struct xfs_buf *bp) 1075 struct xfs_buf *bp)
1104{ 1076{
1105 XB_TRACE(bp, "bdwrite", 0); 1077 trace_xfs_buf_bdwrite(bp, _RET_IP_);
1106 1078
1107 bp->b_strat = xfs_bdstrat_cb; 1079 bp->b_strat = xfs_bdstrat_cb;
1108 bp->b_mount = mp; 1080 bp->b_mount = mp;
@@ -1113,7 +1085,7 @@ xfs_bdwrite(
1113 xfs_buf_delwri_queue(bp, 1); 1085 xfs_buf_delwri_queue(bp, 1);
1114} 1086}
1115 1087
1116STATIC_INLINE void 1088STATIC void
1117_xfs_buf_ioend( 1089_xfs_buf_ioend(
1118 xfs_buf_t *bp, 1090 xfs_buf_t *bp,
1119 int schedule) 1091 int schedule)
@@ -1177,10 +1149,14 @@ _xfs_buf_ioapply(
1177 if (bp->b_flags & XBF_ORDERED) { 1149 if (bp->b_flags & XBF_ORDERED) {
1178 ASSERT(!(bp->b_flags & XBF_READ)); 1150 ASSERT(!(bp->b_flags & XBF_READ));
1179 rw = WRITE_BARRIER; 1151 rw = WRITE_BARRIER;
1180 } else if (bp->b_flags & _XBF_RUN_QUEUES) { 1152 } else if (bp->b_flags & XBF_LOG_BUFFER) {
1181 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1153 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1182 bp->b_flags &= ~_XBF_RUN_QUEUES; 1154 bp->b_flags &= ~_XBF_RUN_QUEUES;
1183 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; 1155 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
1156 } else if (bp->b_flags & _XBF_RUN_QUEUES) {
1157 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1158 bp->b_flags &= ~_XBF_RUN_QUEUES;
1159 rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
1184 } else { 1160 } else {
1185 rw = (bp->b_flags & XBF_WRITE) ? WRITE : 1161 rw = (bp->b_flags & XBF_WRITE) ? WRITE :
1186 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1162 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
@@ -1253,7 +1229,7 @@ int
1253xfs_buf_iorequest( 1229xfs_buf_iorequest(
1254 xfs_buf_t *bp) 1230 xfs_buf_t *bp)
1255{ 1231{
1256 XB_TRACE(bp, "iorequest", 0); 1232 trace_xfs_buf_iorequest(bp, _RET_IP_);
1257 1233
1258 if (bp->b_flags & XBF_DELWRI) { 1234 if (bp->b_flags & XBF_DELWRI) {
1259 xfs_buf_delwri_queue(bp, 1); 1235 xfs_buf_delwri_queue(bp, 1);
@@ -1287,11 +1263,13 @@ int
1287xfs_buf_iowait( 1263xfs_buf_iowait(
1288 xfs_buf_t *bp) 1264 xfs_buf_t *bp)
1289{ 1265{
1290 XB_TRACE(bp, "iowait", 0); 1266 trace_xfs_buf_iowait(bp, _RET_IP_);
1267
1291 if (atomic_read(&bp->b_io_remaining)) 1268 if (atomic_read(&bp->b_io_remaining))
1292 blk_run_address_space(bp->b_target->bt_mapping); 1269 blk_run_address_space(bp->b_target->bt_mapping);
1293 wait_for_completion(&bp->b_iowait); 1270 wait_for_completion(&bp->b_iowait);
1294 XB_TRACE(bp, "iowaited", (long)bp->b_error); 1271
1272 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1295 return bp->b_error; 1273 return bp->b_error;
1296} 1274}
1297 1275
@@ -1604,7 +1582,8 @@ xfs_buf_delwri_queue(
1604 struct list_head *dwq = &bp->b_target->bt_delwrite_queue; 1582 struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
1605 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; 1583 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1606 1584
1607 XB_TRACE(bp, "delwri_q", (long)unlock); 1585 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
1586
1608 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); 1587 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
1609 1588
1610 spin_lock(dwlk); 1589 spin_lock(dwlk);
@@ -1644,7 +1623,7 @@ xfs_buf_delwri_dequeue(
1644 if (dequeued) 1623 if (dequeued)
1645 xfs_buf_rele(bp); 1624 xfs_buf_rele(bp);
1646 1625
1647 XB_TRACE(bp, "delwri_dq", (long)dequeued); 1626 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
1648} 1627}
1649 1628
1650STATIC void 1629STATIC void
@@ -1692,7 +1671,7 @@ xfs_buf_delwri_split(
1692 INIT_LIST_HEAD(list); 1671 INIT_LIST_HEAD(list);
1693 spin_lock(dwlk); 1672 spin_lock(dwlk);
1694 list_for_each_entry_safe(bp, n, dwq, b_list) { 1673 list_for_each_entry_safe(bp, n, dwq, b_list) {
1695 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); 1674 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1696 ASSERT(bp->b_flags & XBF_DELWRI); 1675 ASSERT(bp->b_flags & XBF_DELWRI);
1697 1676
1698 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { 1677 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1816,14 +1795,10 @@ xfs_flush_buftarg(
1816int __init 1795int __init
1817xfs_buf_init(void) 1796xfs_buf_init(void)
1818{ 1797{
1819#ifdef XFS_BUF_TRACE
1820 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);
1821#endif
1822
1823 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", 1798 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
1824 KM_ZONE_HWALIGN, NULL); 1799 KM_ZONE_HWALIGN, NULL);
1825 if (!xfs_buf_zone) 1800 if (!xfs_buf_zone)
1826 goto out_free_trace_buf; 1801 goto out;
1827 1802
1828 xfslogd_workqueue = create_workqueue("xfslogd"); 1803 xfslogd_workqueue = create_workqueue("xfslogd");
1829 if (!xfslogd_workqueue) 1804 if (!xfslogd_workqueue)
@@ -1846,10 +1821,7 @@ xfs_buf_init(void)
1846 destroy_workqueue(xfslogd_workqueue); 1821 destroy_workqueue(xfslogd_workqueue);
1847 out_free_buf_zone: 1822 out_free_buf_zone:
1848 kmem_zone_destroy(xfs_buf_zone); 1823 kmem_zone_destroy(xfs_buf_zone);
1849 out_free_trace_buf: 1824 out:
1850#ifdef XFS_BUF_TRACE
1851 ktrace_free(xfs_buf_trace_buf);
1852#endif
1853 return -ENOMEM; 1825 return -ENOMEM;
1854} 1826}
1855 1827
@@ -1861,9 +1833,6 @@ xfs_buf_terminate(void)
1861 destroy_workqueue(xfsdatad_workqueue); 1833 destroy_workqueue(xfsdatad_workqueue);
1862 destroy_workqueue(xfslogd_workqueue); 1834 destroy_workqueue(xfslogd_workqueue);
1863 kmem_zone_destroy(xfs_buf_zone); 1835 kmem_zone_destroy(xfs_buf_zone);
1864#ifdef XFS_BUF_TRACE
1865 ktrace_free(xfs_buf_trace_buf);
1866#endif
1867} 1836}
1868 1837
1869#ifdef CONFIG_KDB_MODULES 1838#ifdef CONFIG_KDB_MODULES
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9b4d666ad31..a34c7b54822 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
56 XBF_ORDERED = (1 << 11), /* use ordered writes */ 56 XBF_ORDERED = (1 << 11), /* use ordered writes */
57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
58 XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */
58 59
59 /* flags used only as arguments to access routines */ 60 /* flags used only as arguments to access routines */
60 XBF_LOCK = (1 << 14), /* lock requested */ 61 XBF_LOCK = (1 << 14), /* lock requested */
@@ -95,6 +96,28 @@ typedef enum {
95 _XFS_BARRIER_FAILED = (1 << 23), 96 _XFS_BARRIER_FAILED = (1 << 23),
96} xfs_buf_flags_t; 97} xfs_buf_flags_t;
97 98
99#define XFS_BUF_FLAGS \
100 { XBF_READ, "READ" }, \
101 { XBF_WRITE, "WRITE" }, \
102 { XBF_MAPPED, "MAPPED" }, \
103 { XBF_ASYNC, "ASYNC" }, \
104 { XBF_DONE, "DONE" }, \
105 { XBF_DELWRI, "DELWRI" }, \
106 { XBF_STALE, "STALE" }, \
107 { XBF_FS_MANAGED, "FS_MANAGED" }, \
108 { XBF_ORDERED, "ORDERED" }, \
109 { XBF_READ_AHEAD, "READ_AHEAD" }, \
110 { XBF_LOCK, "LOCK" }, /* should never be set */\
111 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
112 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
113 { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
114 { _XBF_PAGES, "PAGES" }, \
115 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
116 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
117 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \
118 { _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
119
120
98typedef enum { 121typedef enum {
99 XBT_FORCE_SLEEP = 0, 122 XBT_FORCE_SLEEP = 0,
100 XBT_FORCE_FLUSH = 1, 123 XBT_FORCE_FLUSH = 1,
@@ -186,15 +209,10 @@ extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
186#define xfs_incore(buftarg,blkno,len,lockit) \ 209#define xfs_incore(buftarg,blkno,len,lockit) \
187 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) 210 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
188 211
189extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t, 212extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
190 xfs_buf_flags_t); 213 xfs_buf_flags_t);
191#define xfs_buf_get(target, blkno, len, flags) \ 214extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
192 xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
193
194extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
195 xfs_buf_flags_t); 215 xfs_buf_flags_t);
196#define xfs_buf_read(target, blkno, len, flags) \
197 xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
198 216
199extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 217extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
200extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); 218extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
@@ -248,13 +266,6 @@ extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
248extern int xfs_buf_init(void); 266extern int xfs_buf_init(void);
249extern void xfs_buf_terminate(void); 267extern void xfs_buf_terminate(void);
250 268
251#ifdef XFS_BUF_TRACE
252extern ktrace_t *xfs_buf_trace_buf;
253extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
254#else
255#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0)
256#endif
257
258#define xfs_buf_target_name(target) \ 269#define xfs_buf_target_name(target) \
259 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) 270 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
260 271
@@ -370,10 +381,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
370 381
371#define xfs_bpin(bp) xfs_buf_pin(bp) 382#define xfs_bpin(bp) xfs_buf_pin(bp)
372#define xfs_bunpin(bp) xfs_buf_unpin(bp) 383#define xfs_bunpin(bp) xfs_buf_unpin(bp)
373
374#define xfs_buftrace(id, bp) \
375 xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
376
377#define xfs_biodone(bp) xfs_buf_ioend(bp, 0) 384#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
378 385
379#define xfs_biomove(bp, off, len, data, rw) \ 386#define xfs_biomove(bp, off, len, data, rw) \
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index eff61e2732a..e4caeb28ce2 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -52,7 +52,7 @@ xfs_file_aio_read(
52 loff_t pos) 52 loff_t pos)
53{ 53{
54 struct file *file = iocb->ki_filp; 54 struct file *file = iocb->ki_filp;
55 int ioflags = IO_ISAIO; 55 int ioflags = 0;
56 56
57 BUG_ON(iocb->ki_pos != pos); 57 BUG_ON(iocb->ki_pos != pos);
58 if (unlikely(file->f_flags & O_DIRECT)) 58 if (unlikely(file->f_flags & O_DIRECT))
@@ -71,7 +71,7 @@ xfs_file_aio_write(
71 loff_t pos) 71 loff_t pos)
72{ 72{
73 struct file *file = iocb->ki_filp; 73 struct file *file = iocb->ki_filp;
74 int ioflags = IO_ISAIO; 74 int ioflags = 0;
75 75
76 BUG_ON(iocb->ki_pos != pos); 76 BUG_ON(iocb->ki_pos != pos);
77 if (unlikely(file->f_flags & O_DIRECT)) 77 if (unlikely(file->f_flags & O_DIRECT))
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 08be36d7326..7501b85fd86 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -19,6 +19,7 @@
19#include "xfs_vnodeops.h" 19#include "xfs_vnodeops.h"
20#include "xfs_bmap_btree.h" 20#include "xfs_bmap_btree.h"
21#include "xfs_inode.h" 21#include "xfs_inode.h"
22#include "xfs_trace.h"
22 23
23int fs_noerr(void) { return 0; } 24int fs_noerr(void) { return 0; }
24int fs_nosys(void) { return ENOSYS; } 25int fs_nosys(void) { return ENOSYS; }
@@ -51,6 +52,8 @@ xfs_flushinval_pages(
51 struct address_space *mapping = VFS_I(ip)->i_mapping; 52 struct address_space *mapping = VFS_I(ip)->i_mapping;
52 int ret = 0; 53 int ret = 0;
53 54
55 trace_xfs_pagecache_inval(ip, first, last);
56
54 if (mapping->nrpages) { 57 if (mapping->nrpages) {
55 xfs_iflags_clear(ip, XFS_ITRUNCATED); 58 xfs_iflags_clear(ip, XFS_ITRUNCATED);
56 ret = filemap_write_and_wait(mapping); 59 ret = filemap_write_and_wait(mapping);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 5bb523d7f37..a034cf62443 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -51,6 +51,7 @@
51#include "xfs_quota.h" 51#include "xfs_quota.h"
52#include "xfs_inode_item.h" 52#include "xfs_inode_item.h"
53#include "xfs_export.h" 53#include "xfs_export.h"
54#include "xfs_trace.h"
54 55
55#include <linux/capability.h> 56#include <linux/capability.h>
56#include <linux/dcache.h> 57#include <linux/dcache.h>
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index eafcc7c1870..be1527b1670 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -46,6 +46,7 @@
46#include "xfs_attr.h" 46#include "xfs_attr.h"
47#include "xfs_ioctl.h" 47#include "xfs_ioctl.h"
48#include "xfs_ioctl32.h" 48#include "xfs_ioctl32.h"
49#include "xfs_trace.h"
49 50
50#define _NATIVE_IOC(cmd, type) \ 51#define _NATIVE_IOC(cmd, type) \
51 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) 52 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index cd42ef78f6b..225946012d0 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -47,6 +47,7 @@
47#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
50#include "xfs_trace.h"
50 51
51#include <linux/capability.h> 52#include <linux/capability.h>
52#include <linux/xattr.h> 53#include <linux/xattr.h>
@@ -573,8 +574,8 @@ xfs_vn_fallocate(
573 bf.l_len = len; 574 bf.l_len = len;
574 575
575 xfs_ilock(ip, XFS_IOLOCK_EXCL); 576 xfs_ilock(ip, XFS_IOLOCK_EXCL);
576 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 577 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
577 0, XFS_ATTR_NOLOCK); 578 0, XFS_ATTR_NOLOCK);
578 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 579 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
579 offset + len > i_size_read(inode)) 580 offset + len > i_size_read(inode))
580 new_size = offset + len; 581 new_size = offset + len;
@@ -585,7 +586,7 @@ xfs_vn_fallocate(
585 586
586 iattr.ia_valid = ATTR_SIZE; 587 iattr.ia_valid = ATTR_SIZE;
587 iattr.ia_size = new_size; 588 iattr.ia_size = new_size;
588 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); 589 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
589 } 590 }
590 591
591 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 592 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -793,7 +794,7 @@ xfs_setup_inode(
793 struct inode *inode = &ip->i_vnode; 794 struct inode *inode = &ip->i_vnode;
794 795
795 inode->i_ino = ip->i_ino; 796 inode->i_ino = ip->i_ino;
796 inode->i_state = I_NEW|I_LOCK; 797 inode->i_state = I_NEW;
797 inode_add_to_lists(ip->i_mount->m_super, inode); 798 inode_add_to_lists(ip->i_mount->m_super, inode);
798 799
799 inode->i_mode = ip->i_d.di_mode; 800 inode->i_mode = ip->i_d.di_mode;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 6127e24062d..5af0c81ca1a 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -40,7 +40,6 @@
40#include <sv.h> 40#include <sv.h>
41#include <time.h> 41#include <time.h>
42 42
43#include <support/ktrace.h>
44#include <support/debug.h> 43#include <support/debug.h>
45#include <support/uuid.h> 44#include <support/uuid.h>
46 45
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 072050f8d34..0d32457abef 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -48,73 +48,12 @@
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_iomap.h" 49#include "xfs_iomap.h"
50#include "xfs_vnodeops.h" 50#include "xfs_vnodeops.h"
51#include "xfs_trace.h"
51 52
52#include <linux/capability.h> 53#include <linux/capability.h>
53#include <linux/writeback.h> 54#include <linux/writeback.h>
54 55
55 56
56#if defined(XFS_RW_TRACE)
57void
58xfs_rw_enter_trace(
59 int tag,
60 xfs_inode_t *ip,
61 void *data,
62 size_t segs,
63 loff_t offset,
64 int ioflags)
65{
66 if (ip->i_rwtrace == NULL)
67 return;
68 ktrace_enter(ip->i_rwtrace,
69 (void *)(unsigned long)tag,
70 (void *)ip,
71 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
72 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
73 (void *)data,
74 (void *)((unsigned long)segs),
75 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
76 (void *)((unsigned long)(offset & 0xffffffff)),
77 (void *)((unsigned long)ioflags),
78 (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
79 (void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
80 (void *)((unsigned long)current_pid()),
81 (void *)NULL,
82 (void *)NULL,
83 (void *)NULL,
84 (void *)NULL);
85}
86
87void
88xfs_inval_cached_trace(
89 xfs_inode_t *ip,
90 xfs_off_t offset,
91 xfs_off_t len,
92 xfs_off_t first,
93 xfs_off_t last)
94{
95
96 if (ip->i_rwtrace == NULL)
97 return;
98 ktrace_enter(ip->i_rwtrace,
99 (void *)(__psint_t)XFS_INVAL_CACHED,
100 (void *)ip,
101 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
102 (void *)((unsigned long)(offset & 0xffffffff)),
103 (void *)((unsigned long)((len >> 32) & 0xffffffff)),
104 (void *)((unsigned long)(len & 0xffffffff)),
105 (void *)((unsigned long)((first >> 32) & 0xffffffff)),
106 (void *)((unsigned long)(first & 0xffffffff)),
107 (void *)((unsigned long)((last >> 32) & 0xffffffff)),
108 (void *)((unsigned long)(last & 0xffffffff)),
109 (void *)((unsigned long)current_pid()),
110 (void *)NULL,
111 (void *)NULL,
112 (void *)NULL,
113 (void *)NULL,
114 (void *)NULL);
115}
116#endif
117
118/* 57/*
119 * xfs_iozero 58 * xfs_iozero
120 * 59 *
@@ -250,13 +189,10 @@ xfs_read(
250 } 189 }
251 } 190 }
252 191
253 xfs_rw_enter_trace(XFS_READ_ENTER, ip, 192 trace_xfs_file_read(ip, size, *offset, ioflags);
254 (void *)iovp, segs, *offset, ioflags);
255 193
256 iocb->ki_pos = *offset; 194 iocb->ki_pos = *offset;
257 ret = generic_file_aio_read(iocb, iovp, segs, *offset); 195 ret = generic_file_aio_read(iocb, iovp, segs, *offset);
258 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
259 ret = wait_on_sync_kiocb(iocb);
260 if (ret > 0) 196 if (ret > 0)
261 XFS_STATS_ADD(xs_read_bytes, ret); 197 XFS_STATS_ADD(xs_read_bytes, ret);
262 198
@@ -294,8 +230,9 @@ xfs_splice_read(
294 return -error; 230 return -error;
295 } 231 }
296 } 232 }
297 xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip, 233
298 pipe, count, *ppos, ioflags); 234 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
235
299 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 236 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
300 if (ret > 0) 237 if (ret > 0)
301 XFS_STATS_ADD(xs_read_bytes, ret); 238 XFS_STATS_ADD(xs_read_bytes, ret);
@@ -344,8 +281,8 @@ xfs_splice_write(
344 ip->i_new_size = new_size; 281 ip->i_new_size = new_size;
345 xfs_iunlock(ip, XFS_ILOCK_EXCL); 282 xfs_iunlock(ip, XFS_ILOCK_EXCL);
346 283
347 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip, 284 trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
348 pipe, count, *ppos, ioflags); 285
349 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); 286 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
350 if (ret > 0) 287 if (ret > 0)
351 XFS_STATS_ADD(xs_write_bytes, ret); 288 XFS_STATS_ADD(xs_write_bytes, ret);
@@ -712,8 +649,6 @@ start:
712 if ((ioflags & IO_ISDIRECT)) { 649 if ((ioflags & IO_ISDIRECT)) {
713 if (mapping->nrpages) { 650 if (mapping->nrpages) {
714 WARN_ON(need_i_mutex == 0); 651 WARN_ON(need_i_mutex == 0);
715 xfs_inval_cached_trace(xip, pos, -1,
716 (pos & PAGE_CACHE_MASK), -1);
717 error = xfs_flushinval_pages(xip, 652 error = xfs_flushinval_pages(xip,
718 (pos & PAGE_CACHE_MASK), 653 (pos & PAGE_CACHE_MASK),
719 -1, FI_REMAPF_LOCKED); 654 -1, FI_REMAPF_LOCKED);
@@ -730,8 +665,7 @@ start:
730 need_i_mutex = 0; 665 need_i_mutex = 0;
731 } 666 }
732 667
733 xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, 668 trace_xfs_file_direct_write(xip, count, *offset, ioflags);
734 *offset, ioflags);
735 ret = generic_file_direct_write(iocb, iovp, 669 ret = generic_file_direct_write(iocb, iovp,
736 &segs, pos, offset, count, ocount); 670 &segs, pos, offset, count, ocount);
737 671
@@ -754,8 +688,7 @@ start:
754 ssize_t ret2 = 0; 688 ssize_t ret2 = 0;
755 689
756write_retry: 690write_retry:
757 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, 691 trace_xfs_file_buffered_write(xip, count, *offset, ioflags);
758 *offset, ioflags);
759 ret2 = generic_file_buffered_write(iocb, iovp, segs, 692 ret2 = generic_file_buffered_write(iocb, iovp, segs,
760 pos, offset, count, ret); 693 pos, offset, count, ret);
761 /* 694 /*
@@ -774,9 +707,6 @@ write_retry:
774 707
775 current->backing_dev_info = NULL; 708 current->backing_dev_info = NULL;
776 709
777 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
778 ret = wait_on_sync_kiocb(iocb);
779
780 isize = i_size_read(inode); 710 isize = i_size_read(inode);
781 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) 711 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
782 *offset = isize; 712 *offset = isize;
@@ -811,7 +741,7 @@ write_retry:
811 XFS_STATS_ADD(xs_write_bytes, ret); 741 XFS_STATS_ADD(xs_write_bytes, ret);
812 742
813 /* Handle various SYNC-type writes */ 743 /* Handle various SYNC-type writes */
814 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 744 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
815 loff_t end = pos + ret - 1; 745 loff_t end = pos + ret - 1;
816 int error2; 746 int error2;
817 747
@@ -863,7 +793,7 @@ int
863xfs_bdstrat_cb(struct xfs_buf *bp) 793xfs_bdstrat_cb(struct xfs_buf *bp)
864{ 794{
865 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { 795 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
866 xfs_buftrace("XFS__BDSTRAT IOERROR", bp); 796 trace_xfs_bdstrat_shut(bp, _RET_IP_);
867 /* 797 /*
868 * Metadata write that didn't get logged but 798 * Metadata write that didn't get logged but
869 * written delayed anyway. These aren't associated 799 * written delayed anyway. These aren't associated
@@ -896,7 +826,7 @@ xfsbdstrat(
896 return; 826 return;
897 } 827 }
898 828
899 xfs_buftrace("XFSBDSTRAT IOERROR", bp); 829 trace_xfs_bdstrat_shut(bp, _RET_IP_);
900 xfs_bioerror_relse(bp); 830 xfs_bioerror_relse(bp);
901} 831}
902 832
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e6be37dbd0e..d1f7789c7ff 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -20,52 +20,7 @@
20 20
21struct xfs_mount; 21struct xfs_mount;
22struct xfs_inode; 22struct xfs_inode;
23struct xfs_bmbt_irec;
24struct xfs_buf; 23struct xfs_buf;
25struct xfs_iomap;
26
27#if defined(XFS_RW_TRACE)
28/*
29 * Defines for the trace mechanisms in xfs_lrw.c.
30 */
31#define XFS_RW_KTRACE_SIZE 128
32
33#define XFS_READ_ENTER 1
34#define XFS_WRITE_ENTER 2
35#define XFS_IOMAP_READ_ENTER 3
36#define XFS_IOMAP_WRITE_ENTER 4
37#define XFS_IOMAP_READ_MAP 5
38#define XFS_IOMAP_WRITE_MAP 6
39#define XFS_IOMAP_WRITE_NOSPACE 7
40#define XFS_ITRUNC_START 8
41#define XFS_ITRUNC_FINISH1 9
42#define XFS_ITRUNC_FINISH2 10
43#define XFS_CTRUNC1 11
44#define XFS_CTRUNC2 12
45#define XFS_CTRUNC3 13
46#define XFS_CTRUNC4 14
47#define XFS_CTRUNC5 15
48#define XFS_CTRUNC6 16
49#define XFS_BUNMAP 17
50#define XFS_INVAL_CACHED 18
51#define XFS_DIORD_ENTER 19
52#define XFS_DIOWR_ENTER 20
53#define XFS_WRITEPAGE_ENTER 22
54#define XFS_RELEASEPAGE_ENTER 23
55#define XFS_INVALIDPAGE_ENTER 24
56#define XFS_IOMAP_ALLOC_ENTER 25
57#define XFS_IOMAP_ALLOC_MAP 26
58#define XFS_IOMAP_UNWRITTEN 27
59#define XFS_SPLICE_READ_ENTER 28
60#define XFS_SPLICE_WRITE_ENTER 29
61extern void xfs_rw_enter_trace(int, struct xfs_inode *,
62 void *, size_t, loff_t, int);
63extern void xfs_inval_cached_trace(struct xfs_inode *,
64 xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
65#else
66#define xfs_rw_enter_trace(tag, ip, data, size, offset, ioflags)
67#define xfs_inval_cached_trace(ip, offset, len, first, last)
68#endif
69 24
70/* errors from xfsbdstrat() must be extracted from the buffer */ 25/* errors from xfsbdstrat() must be extracted from the buffer */
71extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); 26extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 18a4b8e11df..77414db10dc 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -15,6 +15,7 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
18#include "xfs.h" 19#include "xfs.h"
19#include "xfs_bit.h" 20#include "xfs_bit.h"
20#include "xfs_log.h" 21#include "xfs_log.h"
@@ -52,11 +53,11 @@
52#include "xfs_trans_priv.h" 53#include "xfs_trans_priv.h"
53#include "xfs_filestream.h" 54#include "xfs_filestream.h"
54#include "xfs_da_btree.h" 55#include "xfs_da_btree.h"
55#include "xfs_dir2_trace.h"
56#include "xfs_extfree_item.h" 56#include "xfs_extfree_item.h"
57#include "xfs_mru_cache.h" 57#include "xfs_mru_cache.h"
58#include "xfs_inode_item.h" 58#include "xfs_inode_item.h"
59#include "xfs_sync.h" 59#include "xfs_sync.h"
60#include "xfs_trace.h"
60 61
61#include <linux/namei.h> 62#include <linux/namei.h>
62#include <linux/init.h> 63#include <linux/init.h>
@@ -930,13 +931,37 @@ xfs_fs_alloc_inode(
930 */ 931 */
931STATIC void 932STATIC void
932xfs_fs_destroy_inode( 933xfs_fs_destroy_inode(
933 struct inode *inode) 934 struct inode *inode)
934{ 935{
935 xfs_inode_t *ip = XFS_I(inode); 936 struct xfs_inode *ip = XFS_I(inode);
937
938 xfs_itrace_entry(ip);
936 939
937 XFS_STATS_INC(vn_reclaim); 940 XFS_STATS_INC(vn_reclaim);
938 if (xfs_reclaim(ip)) 941
939 panic("%s: cannot reclaim 0x%p\n", __func__, inode); 942 /* bad inode, get out here ASAP */
943 if (is_bad_inode(inode))
944 goto out_reclaim;
945
946 xfs_ioend_wait(ip);
947
948 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
949
950 /*
951 * We should never get here with one of the reclaim flags already set.
952 */
953 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
955
956 /*
957 * We always use background reclaim here because even if the
958 * inode is clean, it still may be under IO and hence we have
959 * to take the flush lock. The background reclaim path handles
960 * this more efficiently than we can here, so simply let background
961 * reclaim tear down all inodes.
962 */
963out_reclaim:
964 xfs_inode_set_reclaim_tag(ip);
940} 965}
941 966
942/* 967/*
@@ -973,7 +998,6 @@ xfs_fs_inode_init_once(
973 998
974 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 999 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
975 "xfsino", ip->i_ino); 1000 "xfsino", ip->i_ino);
976 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
977} 1001}
978 1002
979/* 1003/*
@@ -1075,6 +1099,20 @@ xfs_fs_clear_inode(
1075 XFS_STATS_INC(vn_remove); 1099 XFS_STATS_INC(vn_remove);
1076 XFS_STATS_DEC(vn_active); 1100 XFS_STATS_DEC(vn_active);
1077 1101
1102 /*
1103 * The iolock is used by the file system to coordinate reads,
1104 * writes, and block truncates. Up to this point the lock
1105 * protected concurrent accesses by users of the inode. But
1106 * from here forward we're doing some final processing of the
1107 * inode because we're done with it, and although we reuse the
1108 * iolock for protection it is really a distinct lock class
1109 * (in the lockdep sense) from before. To keep lockdep happy
1110 * (and basically indicate what we are doing), we explicitly
1111 * re-init the iolock here.
1112 */
1113 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
1114 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
1115
1078 xfs_inactive(ip); 1116 xfs_inactive(ip);
1079} 1117}
1080 1118
@@ -1092,8 +1130,6 @@ xfs_fs_put_super(
1092 struct super_block *sb) 1130 struct super_block *sb)
1093{ 1131{
1094 struct xfs_mount *mp = XFS_M(sb); 1132 struct xfs_mount *mp = XFS_M(sb);
1095 struct xfs_inode *rip = mp->m_rootip;
1096 int unmount_event_flags = 0;
1097 1133
1098 xfs_syncd_stop(mp); 1134 xfs_syncd_stop(mp);
1099 1135
@@ -1109,20 +1145,7 @@ xfs_fs_put_super(
1109 xfs_sync_attr(mp, 0); 1145 xfs_sync_attr(mp, 0);
1110 } 1146 }
1111 1147
1112#ifdef HAVE_DMAPI 1148 XFS_SEND_PREUNMOUNT(mp);
1113 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1114 unmount_event_flags =
1115 (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
1116 0 : DM_FLAGS_UNWANTED;
1117 /*
1118 * Ignore error from dmapi here, first unmount is not allowed
1119 * to fail anyway, and second we wouldn't want to fail a
1120 * unmount because of dmapi.
1121 */
1122 XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
1123 NULL, NULL, 0, 0, unmount_event_flags);
1124 }
1125#endif
1126 1149
1127 /* 1150 /*
1128 * Blow away any referenced inode in the filestreams cache. 1151 * Blow away any referenced inode in the filestreams cache.
@@ -1133,10 +1156,7 @@ xfs_fs_put_super(
1133 1156
1134 XFS_bflush(mp->m_ddev_targp); 1157 XFS_bflush(mp->m_ddev_targp);
1135 1158
1136 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1159 XFS_SEND_UNMOUNT(mp);
1137 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
1138 unmount_event_flags);
1139 }
1140 1160
1141 xfs_unmountfs(mp); 1161 xfs_unmountfs(mp);
1142 xfs_freesb(mp); 1162 xfs_freesb(mp);
@@ -1504,8 +1524,6 @@ xfs_fs_fill_super(
1504 goto fail_vnrele; 1524 goto fail_vnrele;
1505 1525
1506 kfree(mtpt); 1526 kfree(mtpt);
1507
1508 xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
1509 return 0; 1527 return 0;
1510 1528
1511 out_filestream_unmount: 1529 out_filestream_unmount:
@@ -1581,94 +1599,6 @@ static struct file_system_type xfs_fs_type = {
1581}; 1599};
1582 1600
1583STATIC int __init 1601STATIC int __init
1584xfs_alloc_trace_bufs(void)
1585{
1586#ifdef XFS_ALLOC_TRACE
1587 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
1588 if (!xfs_alloc_trace_buf)
1589 goto out;
1590#endif
1591#ifdef XFS_BMAP_TRACE
1592 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
1593 if (!xfs_bmap_trace_buf)
1594 goto out_free_alloc_trace;
1595#endif
1596#ifdef XFS_BTREE_TRACE
1597 xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
1598 KM_MAYFAIL);
1599 if (!xfs_allocbt_trace_buf)
1600 goto out_free_bmap_trace;
1601
1602 xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
1603 if (!xfs_inobt_trace_buf)
1604 goto out_free_allocbt_trace;
1605
1606 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1607 if (!xfs_bmbt_trace_buf)
1608 goto out_free_inobt_trace;
1609#endif
1610#ifdef XFS_ATTR_TRACE
1611 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
1612 if (!xfs_attr_trace_buf)
1613 goto out_free_bmbt_trace;
1614#endif
1615#ifdef XFS_DIR2_TRACE
1616 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
1617 if (!xfs_dir2_trace_buf)
1618 goto out_free_attr_trace;
1619#endif
1620
1621 return 0;
1622
1623#ifdef XFS_DIR2_TRACE
1624 out_free_attr_trace:
1625#endif
1626#ifdef XFS_ATTR_TRACE
1627 ktrace_free(xfs_attr_trace_buf);
1628 out_free_bmbt_trace:
1629#endif
1630#ifdef XFS_BTREE_TRACE
1631 ktrace_free(xfs_bmbt_trace_buf);
1632 out_free_inobt_trace:
1633 ktrace_free(xfs_inobt_trace_buf);
1634 out_free_allocbt_trace:
1635 ktrace_free(xfs_allocbt_trace_buf);
1636 out_free_bmap_trace:
1637#endif
1638#ifdef XFS_BMAP_TRACE
1639 ktrace_free(xfs_bmap_trace_buf);
1640 out_free_alloc_trace:
1641#endif
1642#ifdef XFS_ALLOC_TRACE
1643 ktrace_free(xfs_alloc_trace_buf);
1644 out:
1645#endif
1646 return -ENOMEM;
1647}
1648
1649STATIC void
1650xfs_free_trace_bufs(void)
1651{
1652#ifdef XFS_DIR2_TRACE
1653 ktrace_free(xfs_dir2_trace_buf);
1654#endif
1655#ifdef XFS_ATTR_TRACE
1656 ktrace_free(xfs_attr_trace_buf);
1657#endif
1658#ifdef XFS_BTREE_TRACE
1659 ktrace_free(xfs_bmbt_trace_buf);
1660 ktrace_free(xfs_inobt_trace_buf);
1661 ktrace_free(xfs_allocbt_trace_buf);
1662#endif
1663#ifdef XFS_BMAP_TRACE
1664 ktrace_free(xfs_bmap_trace_buf);
1665#endif
1666#ifdef XFS_ALLOC_TRACE
1667 ktrace_free(xfs_alloc_trace_buf);
1668#endif
1669}
1670
1671STATIC int __init
1672xfs_init_zones(void) 1602xfs_init_zones(void)
1673{ 1603{
1674 1604
@@ -1809,7 +1739,6 @@ init_xfs_fs(void)
1809 printk(KERN_INFO XFS_VERSION_STRING " with " 1739 printk(KERN_INFO XFS_VERSION_STRING " with "
1810 XFS_BUILD_OPTIONS " enabled\n"); 1740 XFS_BUILD_OPTIONS " enabled\n");
1811 1741
1812 ktrace_init(64);
1813 xfs_ioend_init(); 1742 xfs_ioend_init();
1814 xfs_dir_startup(); 1743 xfs_dir_startup();
1815 1744
@@ -1817,13 +1746,9 @@ init_xfs_fs(void)
1817 if (error) 1746 if (error)
1818 goto out; 1747 goto out;
1819 1748
1820 error = xfs_alloc_trace_bufs();
1821 if (error)
1822 goto out_destroy_zones;
1823
1824 error = xfs_mru_cache_init(); 1749 error = xfs_mru_cache_init();
1825 if (error) 1750 if (error)
1826 goto out_free_trace_buffers; 1751 goto out_destroy_zones;
1827 1752
1828 error = xfs_filestream_init(); 1753 error = xfs_filestream_init();
1829 if (error) 1754 if (error)
@@ -1858,8 +1783,6 @@ init_xfs_fs(void)
1858 xfs_filestream_uninit(); 1783 xfs_filestream_uninit();
1859 out_mru_cache_uninit: 1784 out_mru_cache_uninit:
1860 xfs_mru_cache_uninit(); 1785 xfs_mru_cache_uninit();
1861 out_free_trace_buffers:
1862 xfs_free_trace_bufs();
1863 out_destroy_zones: 1786 out_destroy_zones:
1864 xfs_destroy_zones(); 1787 xfs_destroy_zones();
1865 out: 1788 out:
@@ -1876,9 +1799,7 @@ exit_xfs_fs(void)
1876 xfs_buf_terminate(); 1799 xfs_buf_terminate();
1877 xfs_filestream_uninit(); 1800 xfs_filestream_uninit();
1878 xfs_mru_cache_uninit(); 1801 xfs_mru_cache_uninit();
1879 xfs_free_trace_bufs();
1880 xfs_destroy_zones(); 1802 xfs_destroy_zones();
1881 ktrace_uninit();
1882} 1803}
1883 1804
1884module_init(init_xfs_fs); 1805module_init(init_xfs_fs);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 18175ebd58e..233d4b9881b 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -56,12 +56,6 @@ extern void xfs_qm_exit(void);
56# define XFS_BIGFS_STRING 56# define XFS_BIGFS_STRING
57#endif 57#endif
58 58
59#ifdef CONFIG_XFS_TRACE
60# define XFS_TRACE_STRING "tracing, "
61#else
62# define XFS_TRACE_STRING
63#endif
64
65#ifdef CONFIG_XFS_DMAPI 59#ifdef CONFIG_XFS_DMAPI
66# define XFS_DMAPI_STRING "dmapi support, " 60# define XFS_DMAPI_STRING "dmapi support, "
67#else 61#else
@@ -78,7 +72,6 @@ extern void xfs_qm_exit(void);
78 XFS_SECURITY_STRING \ 72 XFS_SECURITY_STRING \
79 XFS_REALTIME_STRING \ 73 XFS_REALTIME_STRING \
80 XFS_BIGFS_STRING \ 74 XFS_BIGFS_STRING \
81 XFS_TRACE_STRING \
82 XFS_DMAPI_STRING \ 75 XFS_DMAPI_STRING \
83 XFS_DBG_STRING /* DBG must be last */ 76 XFS_DBG_STRING /* DBG must be last */
84 77
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 961df0a22c7..1f5e4bb5e97 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -44,6 +44,7 @@
44#include "xfs_inode_item.h" 44#include "xfs_inode_item.h"
45#include "xfs_rw.h" 45#include "xfs_rw.h"
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_trace.h"
47 48
48#include <linux/kthread.h> 49#include <linux/kthread.h>
49#include <linux/freezer.h> 50#include <linux/freezer.h>
@@ -64,7 +65,6 @@ xfs_inode_ag_lookup(
64 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
65 * the number of objects requested. 66 * the number of objects requested.
66 */ 67 */
67 read_lock(&pag->pag_ici_lock);
68 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
70 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -73,7 +73,7 @@ xfs_inode_ag_lookup(
73 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
74 } 74 }
75 if (!nr_found) 75 if (!nr_found)
76 goto unlock; 76 return NULL;
77 77
78 /* 78 /*
79 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -83,13 +83,8 @@ xfs_inode_ag_lookup(
83 */ 83 */
84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
86 goto unlock; 86 return NULL;
87
88 return ip; 87 return ip;
89
90unlock:
91 read_unlock(&pag->pag_ici_lock);
92 return NULL;
93} 88}
94 89
95STATIC int 90STATIC int
@@ -99,7 +94,8 @@ xfs_inode_ag_walk(
99 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
100 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
101 int flags, 96 int flags,
102 int tag) 97 int tag,
98 int exclusive)
103{ 99{
104 struct xfs_perag *pag = &mp->m_perag[ag]; 100 struct xfs_perag *pag = &mp->m_perag[ag];
105 uint32_t first_index; 101 uint32_t first_index;
@@ -113,10 +109,20 @@ restart:
113 int error = 0; 109 int error = 0;
114 xfs_inode_t *ip; 110 xfs_inode_t *ip;
115 111
112 if (exclusive)
113 write_lock(&pag->pag_ici_lock);
114 else
115 read_lock(&pag->pag_ici_lock);
116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
117 if (!ip) 117 if (!ip) {
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
118 break; 122 break;
123 }
119 124
125 /* execute releases pag->pag_ici_lock */
120 error = execute(ip, pag, flags); 126 error = execute(ip, pag, flags);
121 if (error == EAGAIN) { 127 if (error == EAGAIN) {
122 skipped++; 128 skipped++;
@@ -124,9 +130,8 @@ restart:
124 } 130 }
125 if (error) 131 if (error)
126 last_error = error; 132 last_error = error;
127 /* 133
128 * bail out if the filesystem is corrupted. 134 /* bail out if the filesystem is corrupted. */
129 */
130 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
131 break; 136 break;
132 137
@@ -147,7 +152,8 @@ xfs_inode_ag_iterator(
147 int (*execute)(struct xfs_inode *ip, 152 int (*execute)(struct xfs_inode *ip,
148 struct xfs_perag *pag, int flags), 153 struct xfs_perag *pag, int flags),
149 int flags, 154 int flags,
150 int tag) 155 int tag,
156 int exclusive)
151{ 157{
152 int error = 0; 158 int error = 0;
153 int last_error = 0; 159 int last_error = 0;
@@ -156,7 +162,8 @@ xfs_inode_ag_iterator(
156 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 162 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
157 if (!mp->m_perag[ag].pag_ici_init) 163 if (!mp->m_perag[ag].pag_ici_init)
158 continue; 164 continue;
159 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 165 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
166 exclusive);
160 if (error) { 167 if (error) {
161 last_error = error; 168 last_error = error;
162 if (error == EFSCORRUPTED) 169 if (error == EFSCORRUPTED)
@@ -173,30 +180,31 @@ xfs_sync_inode_valid(
173 struct xfs_perag *pag) 180 struct xfs_perag *pag)
174{ 181{
175 struct inode *inode = VFS_I(ip); 182 struct inode *inode = VFS_I(ip);
183 int error = EFSCORRUPTED;
176 184
177 /* nothing to sync during shutdown */ 185 /* nothing to sync during shutdown */
178 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
179 read_unlock(&pag->pag_ici_lock); 187 goto out_unlock;
180 return EFSCORRUPTED;
181 }
182 188
183 /* 189 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
184 * If we can't get a reference on the inode, it must be in reclaim. 190 error = ENOENT;
185 * Leave it for the reclaim code to flush. Also avoid inodes that 191 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
186 * haven't been fully initialised. 192 goto out_unlock;
187 */
188 if (!igrab(inode)) {
189 read_unlock(&pag->pag_ici_lock);
190 return ENOENT;
191 }
192 read_unlock(&pag->pag_ici_lock);
193 193
194 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 194 /* If we can't grab the inode, it must on it's way to reclaim. */
195 if (!igrab(inode))
196 goto out_unlock;
197
198 if (is_bad_inode(inode)) {
195 IRELE(ip); 199 IRELE(ip);
196 return ENOENT; 200 goto out_unlock;
197 } 201 }
198 202
199 return 0; 203 /* inode is valid */
204 error = 0;
205out_unlock:
206 read_unlock(&pag->pag_ici_lock);
207 return error;
200} 208}
201 209
202STATIC int 210STATIC int
@@ -281,7 +289,7 @@ xfs_sync_data(
281 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 289 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
282 290
283 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 291 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
284 XFS_ICI_NO_TAG); 292 XFS_ICI_NO_TAG, 0);
285 if (error) 293 if (error)
286 return XFS_ERROR(error); 294 return XFS_ERROR(error);
287 295
@@ -303,7 +311,7 @@ xfs_sync_attr(
303 ASSERT((flags & ~SYNC_WAIT) == 0); 311 ASSERT((flags & ~SYNC_WAIT) == 0);
304 312
305 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 313 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
306 XFS_ICI_NO_TAG); 314 XFS_ICI_NO_TAG, 0);
307} 315}
308 316
309STATIC int 317STATIC int
@@ -663,67 +671,6 @@ xfs_syncd_stop(
663 kthread_stop(mp->m_sync_task); 671 kthread_stop(mp->m_sync_task);
664} 672}
665 673
666int
667xfs_reclaim_inode(
668 xfs_inode_t *ip,
669 int locked,
670 int sync_mode)
671{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674 /* The hash lock here protects a thread in xfs_iget_core from
675 * racing with us on linking the inode back with a vnode.
676 * Once we have the XFS_IRECLAIM flag set it will not touch
677 * us.
678 */
679 write_lock(&pag->pag_ici_lock);
680 spin_lock(&ip->i_flags_lock);
681 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock);
685 if (locked) {
686 xfs_ifunlock(ip);
687 xfs_iunlock(ip, XFS_ILOCK_EXCL);
688 }
689 return -EAGAIN;
690 }
691 __xfs_iflags_set(ip, XFS_IRECLAIM);
692 spin_unlock(&ip->i_flags_lock);
693 write_unlock(&pag->pag_ici_lock);
694 xfs_put_perag(ip->i_mount, pag);
695
696 /*
697 * If the inode is still dirty, then flush it out. If the inode
698 * is not in the AIL, then it will be OK to flush it delwri as
699 * long as xfs_iflush() does not keep any references to the inode.
700 * We leave that decision up to xfs_iflush() since it has the
701 * knowledge of whether it's OK to simply do a delwri flush of
702 * the inode or whether we need to wait until the inode is
703 * pulled from the AIL.
704 * We get the flush lock regardless, though, just to make sure
705 * we don't free it while it is being flushed.
706 */
707 if (!locked) {
708 xfs_ilock(ip, XFS_ILOCK_EXCL);
709 xfs_iflock(ip);
710 }
711
712 /*
713 * In the case of a forced shutdown we rely on xfs_iflush() to
714 * wait for the inode to be unpinned before returning an error.
715 */
716 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
717 /* synchronize with xfs_iflush_done */
718 xfs_iflock(ip);
719 xfs_ifunlock(ip);
720 }
721
722 xfs_iunlock(ip, XFS_ILOCK_EXCL);
723 xfs_ireclaim(ip);
724 return 0;
725}
726
727void 674void
728__xfs_inode_set_reclaim_tag( 675__xfs_inode_set_reclaim_tag(
729 struct xfs_perag *pag, 676 struct xfs_perag *pag,
@@ -766,19 +713,55 @@ __xfs_inode_clear_reclaim_tag(
766} 713}
767 714
768STATIC int 715STATIC int
769xfs_reclaim_inode_now( 716xfs_reclaim_inode(
770 struct xfs_inode *ip, 717 struct xfs_inode *ip,
771 struct xfs_perag *pag, 718 struct xfs_perag *pag,
772 int flags) 719 int sync_mode)
773{ 720{
774 /* ignore if already under reclaim */ 721 /*
775 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 722 * The radix tree lock here protects a thread in xfs_iget from racing
776 read_unlock(&pag->pag_ici_lock); 723 * with us starting reclaim on the inode. Once we have the
724 * XFS_IRECLAIM flag set it will not touch us.
725 */
726 spin_lock(&ip->i_flags_lock);
727 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
728 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
729 /* ignore as it is already under reclaim */
730 spin_unlock(&ip->i_flags_lock);
731 write_unlock(&pag->pag_ici_lock);
777 return 0; 732 return 0;
778 } 733 }
779 read_unlock(&pag->pag_ici_lock); 734 __xfs_iflags_set(ip, XFS_IRECLAIM);
735 spin_unlock(&ip->i_flags_lock);
736 write_unlock(&pag->pag_ici_lock);
780 737
781 return xfs_reclaim_inode(ip, 0, flags); 738 /*
739 * If the inode is still dirty, then flush it out. If the inode
740 * is not in the AIL, then it will be OK to flush it delwri as
741 * long as xfs_iflush() does not keep any references to the inode.
742 * We leave that decision up to xfs_iflush() since it has the
743 * knowledge of whether it's OK to simply do a delwri flush of
744 * the inode or whether we need to wait until the inode is
745 * pulled from the AIL.
746 * We get the flush lock regardless, though, just to make sure
747 * we don't free it while it is being flushed.
748 */
749 xfs_ilock(ip, XFS_ILOCK_EXCL);
750 xfs_iflock(ip);
751
752 /*
753 * In the case of a forced shutdown we rely on xfs_iflush() to
754 * wait for the inode to be unpinned before returning an error.
755 */
756 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
757 /* synchronize with xfs_iflush_done */
758 xfs_iflock(ip);
759 xfs_ifunlock(ip);
760 }
761
762 xfs_iunlock(ip, XFS_ILOCK_EXCL);
763 xfs_ireclaim(ip);
764 return 0;
782} 765}
783 766
784int 767int
@@ -786,6 +769,6 @@ xfs_reclaim_inodes(
786 xfs_mount_t *mp, 769 xfs_mount_t *mp,
787 int mode) 770 int mode)
788{ 771{
789 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 772 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
790 XFS_ICI_RECLAIM_TAG); 773 XFS_ICI_RECLAIM_TAG, 1);
791} 774}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 27920eb7a82..ea932b43335 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -44,7 +44,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
44 44
45void xfs_flush_inodes(struct xfs_inode *ip); 45void xfs_flush_inodes(struct xfs_inode *ip);
46 46
47int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 47int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 48
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 49void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
@@ -55,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
55int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
56int xfs_inode_ag_iterator(struct xfs_mount *mp, 55int xfs_inode_ag_iterator(struct xfs_mount *mp,
57 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
58 int flags, int tag); 57 int flags, int tag, int write_lock);
59 58
60#endif 59#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index c5bc67c4e3b..7bb5092d6ae 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -55,170 +55,140 @@ xfs_stats_clear_proc_handler(
55 55
56static ctl_table xfs_table[] = { 56static ctl_table xfs_table[] = {
57 { 57 {
58 .ctl_name = XFS_SGID_INHERIT,
59 .procname = "irix_sgid_inherit", 58 .procname = "irix_sgid_inherit",
60 .data = &xfs_params.sgid_inherit.val, 59 .data = &xfs_params.sgid_inherit.val,
61 .maxlen = sizeof(int), 60 .maxlen = sizeof(int),
62 .mode = 0644, 61 .mode = 0644,
63 .proc_handler = &proc_dointvec_minmax, 62 .proc_handler = proc_dointvec_minmax,
64 .strategy = &sysctl_intvec,
65 .extra1 = &xfs_params.sgid_inherit.min, 63 .extra1 = &xfs_params.sgid_inherit.min,
66 .extra2 = &xfs_params.sgid_inherit.max 64 .extra2 = &xfs_params.sgid_inherit.max
67 }, 65 },
68 { 66 {
69 .ctl_name = XFS_SYMLINK_MODE,
70 .procname = "irix_symlink_mode", 67 .procname = "irix_symlink_mode",
71 .data = &xfs_params.symlink_mode.val, 68 .data = &xfs_params.symlink_mode.val,
72 .maxlen = sizeof(int), 69 .maxlen = sizeof(int),
73 .mode = 0644, 70 .mode = 0644,
74 .proc_handler = &proc_dointvec_minmax, 71 .proc_handler = proc_dointvec_minmax,
75 .strategy = &sysctl_intvec,
76 .extra1 = &xfs_params.symlink_mode.min, 72 .extra1 = &xfs_params.symlink_mode.min,
77 .extra2 = &xfs_params.symlink_mode.max 73 .extra2 = &xfs_params.symlink_mode.max
78 }, 74 },
79 { 75 {
80 .ctl_name = XFS_PANIC_MASK,
81 .procname = "panic_mask", 76 .procname = "panic_mask",
82 .data = &xfs_params.panic_mask.val, 77 .data = &xfs_params.panic_mask.val,
83 .maxlen = sizeof(int), 78 .maxlen = sizeof(int),
84 .mode = 0644, 79 .mode = 0644,
85 .proc_handler = &proc_dointvec_minmax, 80 .proc_handler = proc_dointvec_minmax,
86 .strategy = &sysctl_intvec,
87 .extra1 = &xfs_params.panic_mask.min, 81 .extra1 = &xfs_params.panic_mask.min,
88 .extra2 = &xfs_params.panic_mask.max 82 .extra2 = &xfs_params.panic_mask.max
89 }, 83 },
90 84
91 { 85 {
92 .ctl_name = XFS_ERRLEVEL,
93 .procname = "error_level", 86 .procname = "error_level",
94 .data = &xfs_params.error_level.val, 87 .data = &xfs_params.error_level.val,
95 .maxlen = sizeof(int), 88 .maxlen = sizeof(int),
96 .mode = 0644, 89 .mode = 0644,
97 .proc_handler = &proc_dointvec_minmax, 90 .proc_handler = proc_dointvec_minmax,
98 .strategy = &sysctl_intvec,
99 .extra1 = &xfs_params.error_level.min, 91 .extra1 = &xfs_params.error_level.min,
100 .extra2 = &xfs_params.error_level.max 92 .extra2 = &xfs_params.error_level.max
101 }, 93 },
102 { 94 {
103 .ctl_name = XFS_SYNCD_TIMER,
104 .procname = "xfssyncd_centisecs", 95 .procname = "xfssyncd_centisecs",
105 .data = &xfs_params.syncd_timer.val, 96 .data = &xfs_params.syncd_timer.val,
106 .maxlen = sizeof(int), 97 .maxlen = sizeof(int),
107 .mode = 0644, 98 .mode = 0644,
108 .proc_handler = &proc_dointvec_minmax, 99 .proc_handler = proc_dointvec_minmax,
109 .strategy = &sysctl_intvec,
110 .extra1 = &xfs_params.syncd_timer.min, 100 .extra1 = &xfs_params.syncd_timer.min,
111 .extra2 = &xfs_params.syncd_timer.max 101 .extra2 = &xfs_params.syncd_timer.max
112 }, 102 },
113 { 103 {
114 .ctl_name = XFS_INHERIT_SYNC,
115 .procname = "inherit_sync", 104 .procname = "inherit_sync",
116 .data = &xfs_params.inherit_sync.val, 105 .data = &xfs_params.inherit_sync.val,
117 .maxlen = sizeof(int), 106 .maxlen = sizeof(int),
118 .mode = 0644, 107 .mode = 0644,
119 .proc_handler = &proc_dointvec_minmax, 108 .proc_handler = proc_dointvec_minmax,
120 .strategy = &sysctl_intvec,
121 .extra1 = &xfs_params.inherit_sync.min, 109 .extra1 = &xfs_params.inherit_sync.min,
122 .extra2 = &xfs_params.inherit_sync.max 110 .extra2 = &xfs_params.inherit_sync.max
123 }, 111 },
124 { 112 {
125 .ctl_name = XFS_INHERIT_NODUMP,
126 .procname = "inherit_nodump", 113 .procname = "inherit_nodump",
127 .data = &xfs_params.inherit_nodump.val, 114 .data = &xfs_params.inherit_nodump.val,
128 .maxlen = sizeof(int), 115 .maxlen = sizeof(int),
129 .mode = 0644, 116 .mode = 0644,
130 .proc_handler = &proc_dointvec_minmax, 117 .proc_handler = proc_dointvec_minmax,
131 .strategy = &sysctl_intvec,
132 .extra1 = &xfs_params.inherit_nodump.min, 118 .extra1 = &xfs_params.inherit_nodump.min,
133 .extra2 = &xfs_params.inherit_nodump.max 119 .extra2 = &xfs_params.inherit_nodump.max
134 }, 120 },
135 { 121 {
136 .ctl_name = XFS_INHERIT_NOATIME,
137 .procname = "inherit_noatime", 122 .procname = "inherit_noatime",
138 .data = &xfs_params.inherit_noatim.val, 123 .data = &xfs_params.inherit_noatim.val,
139 .maxlen = sizeof(int), 124 .maxlen = sizeof(int),
140 .mode = 0644, 125 .mode = 0644,
141 .proc_handler = &proc_dointvec_minmax, 126 .proc_handler = proc_dointvec_minmax,
142 .strategy = &sysctl_intvec,
143 .extra1 = &xfs_params.inherit_noatim.min, 127 .extra1 = &xfs_params.inherit_noatim.min,
144 .extra2 = &xfs_params.inherit_noatim.max 128 .extra2 = &xfs_params.inherit_noatim.max
145 }, 129 },
146 { 130 {
147 .ctl_name = XFS_BUF_TIMER,
148 .procname = "xfsbufd_centisecs", 131 .procname = "xfsbufd_centisecs",
149 .data = &xfs_params.xfs_buf_timer.val, 132 .data = &xfs_params.xfs_buf_timer.val,
150 .maxlen = sizeof(int), 133 .maxlen = sizeof(int),
151 .mode = 0644, 134 .mode = 0644,
152 .proc_handler = &proc_dointvec_minmax, 135 .proc_handler = proc_dointvec_minmax,
153 .strategy = &sysctl_intvec,
154 .extra1 = &xfs_params.xfs_buf_timer.min, 136 .extra1 = &xfs_params.xfs_buf_timer.min,
155 .extra2 = &xfs_params.xfs_buf_timer.max 137 .extra2 = &xfs_params.xfs_buf_timer.max
156 }, 138 },
157 { 139 {
158 .ctl_name = XFS_BUF_AGE,
159 .procname = "age_buffer_centisecs", 140 .procname = "age_buffer_centisecs",
160 .data = &xfs_params.xfs_buf_age.val, 141 .data = &xfs_params.xfs_buf_age.val,
161 .maxlen = sizeof(int), 142 .maxlen = sizeof(int),
162 .mode = 0644, 143 .mode = 0644,
163 .proc_handler = &proc_dointvec_minmax, 144 .proc_handler = proc_dointvec_minmax,
164 .strategy = &sysctl_intvec,
165 .extra1 = &xfs_params.xfs_buf_age.min, 145 .extra1 = &xfs_params.xfs_buf_age.min,
166 .extra2 = &xfs_params.xfs_buf_age.max 146 .extra2 = &xfs_params.xfs_buf_age.max
167 }, 147 },
168 { 148 {
169 .ctl_name = XFS_INHERIT_NOSYM,
170 .procname = "inherit_nosymlinks", 149 .procname = "inherit_nosymlinks",
171 .data = &xfs_params.inherit_nosym.val, 150 .data = &xfs_params.inherit_nosym.val,
172 .maxlen = sizeof(int), 151 .maxlen = sizeof(int),
173 .mode = 0644, 152 .mode = 0644,
174 .proc_handler = &proc_dointvec_minmax, 153 .proc_handler = proc_dointvec_minmax,
175 .strategy = &sysctl_intvec,
176 .extra1 = &xfs_params.inherit_nosym.min, 154 .extra1 = &xfs_params.inherit_nosym.min,
177 .extra2 = &xfs_params.inherit_nosym.max 155 .extra2 = &xfs_params.inherit_nosym.max
178 }, 156 },
179 { 157 {
180 .ctl_name = XFS_ROTORSTEP,
181 .procname = "rotorstep", 158 .procname = "rotorstep",
182 .data = &xfs_params.rotorstep.val, 159 .data = &xfs_params.rotorstep.val,
183 .maxlen = sizeof(int), 160 .maxlen = sizeof(int),
184 .mode = 0644, 161 .mode = 0644,
185 .proc_handler = &proc_dointvec_minmax, 162 .proc_handler = proc_dointvec_minmax,
186 .strategy = &sysctl_intvec,
187 .extra1 = &xfs_params.rotorstep.min, 163 .extra1 = &xfs_params.rotorstep.min,
188 .extra2 = &xfs_params.rotorstep.max 164 .extra2 = &xfs_params.rotorstep.max
189 }, 165 },
190 { 166 {
191 .ctl_name = XFS_INHERIT_NODFRG,
192 .procname = "inherit_nodefrag", 167 .procname = "inherit_nodefrag",
193 .data = &xfs_params.inherit_nodfrg.val, 168 .data = &xfs_params.inherit_nodfrg.val,
194 .maxlen = sizeof(int), 169 .maxlen = sizeof(int),
195 .mode = 0644, 170 .mode = 0644,
196 .proc_handler = &proc_dointvec_minmax, 171 .proc_handler = proc_dointvec_minmax,
197 .strategy = &sysctl_intvec,
198 .extra1 = &xfs_params.inherit_nodfrg.min, 172 .extra1 = &xfs_params.inherit_nodfrg.min,
199 .extra2 = &xfs_params.inherit_nodfrg.max 173 .extra2 = &xfs_params.inherit_nodfrg.max
200 }, 174 },
201 { 175 {
202 .ctl_name = XFS_FILESTREAM_TIMER,
203 .procname = "filestream_centisecs", 176 .procname = "filestream_centisecs",
204 .data = &xfs_params.fstrm_timer.val, 177 .data = &xfs_params.fstrm_timer.val,
205 .maxlen = sizeof(int), 178 .maxlen = sizeof(int),
206 .mode = 0644, 179 .mode = 0644,
207 .proc_handler = &proc_dointvec_minmax, 180 .proc_handler = proc_dointvec_minmax,
208 .strategy = &sysctl_intvec,
209 .extra1 = &xfs_params.fstrm_timer.min, 181 .extra1 = &xfs_params.fstrm_timer.min,
210 .extra2 = &xfs_params.fstrm_timer.max, 182 .extra2 = &xfs_params.fstrm_timer.max,
211 }, 183 },
212 /* please keep this the last entry */ 184 /* please keep this the last entry */
213#ifdef CONFIG_PROC_FS 185#ifdef CONFIG_PROC_FS
214 { 186 {
215 .ctl_name = XFS_STATS_CLEAR,
216 .procname = "stats_clear", 187 .procname = "stats_clear",
217 .data = &xfs_params.stats_clear.val, 188 .data = &xfs_params.stats_clear.val,
218 .maxlen = sizeof(int), 189 .maxlen = sizeof(int),
219 .mode = 0644, 190 .mode = 0644,
220 .proc_handler = &xfs_stats_clear_proc_handler, 191 .proc_handler = xfs_stats_clear_proc_handler,
221 .strategy = &sysctl_intvec,
222 .extra1 = &xfs_params.stats_clear.min, 192 .extra1 = &xfs_params.stats_clear.min,
223 .extra2 = &xfs_params.stats_clear.max 193 .extra2 = &xfs_params.stats_clear.max
224 }, 194 },
@@ -229,7 +199,6 @@ static ctl_table xfs_table[] = {
229 199
230static ctl_table xfs_dir_table[] = { 200static ctl_table xfs_dir_table[] = {
231 { 201 {
232 .ctl_name = FS_XFS,
233 .procname = "xfs", 202 .procname = "xfs",
234 .mode = 0555, 203 .mode = 0555,
235 .child = xfs_table 204 .child = xfs_table
@@ -239,7 +208,6 @@ static ctl_table xfs_dir_table[] = {
239 208
240static ctl_table xfs_root_table[] = { 209static ctl_table xfs_root_table[] = {
241 { 210 {
242 .ctl_name = CTL_FS,
243 .procname = "fs", 211 .procname = "fs",
244 .mode = 0555, 212 .mode = 0555,
245 .child = xfs_dir_table 213 .child = xfs_dir_table
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
new file mode 100644
index 00000000000..856eb3c8d60
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -0,0 +1,75 @@
1/*
2 * Copyright (c) 2009, Christoph Hellwig
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h"
32#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dinode.h"
35#include "xfs_inode.h"
36#include "xfs_btree.h"
37#include "xfs_dmapi.h"
38#include "xfs_mount.h"
39#include "xfs_ialloc.h"
40#include "xfs_itable.h"
41#include "xfs_alloc.h"
42#include "xfs_bmap.h"
43#include "xfs_attr.h"
44#include "xfs_attr_sf.h"
45#include "xfs_attr_leaf.h"
46#include "xfs_log_priv.h"
47#include "xfs_buf_item.h"
48#include "xfs_quota.h"
49#include "xfs_iomap.h"
50#include "xfs_aops.h"
51#include "quota/xfs_dquot_item.h"
52#include "quota/xfs_dquot.h"
53
54/*
55 * Format fsblock number into a static buffer & return it.
56 */
57STATIC char *xfs_fmtfsblock(xfs_fsblock_t bno)
58{
59 static char rval[50];
60
61 if (bno == NULLFSBLOCK)
62 sprintf(rval, "NULLFSBLOCK");
63 else if (isnullstartblock(bno))
64 sprintf(rval, "NULLSTARTBLOCK(%lld)", startblockval(bno));
65 else
66 sprintf(rval, "%lld", (xfs_dfsbno_t)bno);
67 return rval;
68}
69
70/*
71 * We include this last to have the helpers above available for the trace
72 * event implementations.
73 */
74#define CREATE_TRACE_POINTS
75#include "xfs_trace.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
new file mode 100644
index 00000000000..c22a608321a
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -0,0 +1,1422 @@
1/*
2 * Copyright (c) 2009, Christoph Hellwig
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#undef TRACE_SYSTEM
19#define TRACE_SYSTEM xfs
20
21#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
22#define _TRACE_XFS_H
23
24#include <linux/tracepoint.h>
25
26struct xfs_agf;
27struct xfs_alloc_arg;
28struct xfs_attr_list_context;
29struct xfs_buf_log_item;
30struct xfs_da_args;
31struct xfs_da_node_entry;
32struct xfs_dquot;
33struct xlog_ticket;
34struct log;
35
36DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx),
38 TP_ARGS(ctx),
39 TP_STRUCT__entry(
40 __field(dev_t, dev)
41 __field(xfs_ino_t, ino)
42 __field(u32, hashval)
43 __field(u32, blkno)
44 __field(u32, offset)
45 __field(void *, alist)
46 __field(int, bufsize)
47 __field(int, count)
48 __field(int, firstu)
49 __field(int, dupcnt)
50 __field(int, flags)
51 ),
52 TP_fast_assign(
53 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
54 __entry->ino = ctx->dp->i_ino;
55 __entry->hashval = ctx->cursor->hashval;
56 __entry->blkno = ctx->cursor->blkno;
57 __entry->offset = ctx->cursor->offset;
58 __entry->alist = ctx->alist;
59 __entry->bufsize = ctx->bufsize;
60 __entry->count = ctx->count;
61 __entry->firstu = ctx->firstu;
62 __entry->flags = ctx->flags;
63 ),
64 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
65 "alist 0x%p size %u count %u firstu %u flags %d %s",
66 MAJOR(__entry->dev), MINOR(__entry->dev),
67 __entry->ino,
68 __entry->hashval,
69 __entry->blkno,
70 __entry->offset,
71 __entry->dupcnt,
72 __entry->alist,
73 __entry->bufsize,
74 __entry->count,
75 __entry->firstu,
76 __entry->flags,
77 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
78 )
79)
80
81#define DEFINE_ATTR_LIST_EVENT(name) \
82DEFINE_EVENT(xfs_attr_list_class, name, \
83 TP_PROTO(struct xfs_attr_list_context *ctx), \
84 TP_ARGS(ctx))
85DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
86DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
87DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
88DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
89DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
90DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
91DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
92DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
93
94TRACE_EVENT(xfs_attr_list_node_descend,
95 TP_PROTO(struct xfs_attr_list_context *ctx,
96 struct xfs_da_node_entry *btree),
97 TP_ARGS(ctx, btree),
98 TP_STRUCT__entry(
99 __field(dev_t, dev)
100 __field(xfs_ino_t, ino)
101 __field(u32, hashval)
102 __field(u32, blkno)
103 __field(u32, offset)
104 __field(void *, alist)
105 __field(int, bufsize)
106 __field(int, count)
107 __field(int, firstu)
108 __field(int, dupcnt)
109 __field(int, flags)
110 __field(u32, bt_hashval)
111 __field(u32, bt_before)
112 ),
113 TP_fast_assign(
114 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
115 __entry->ino = ctx->dp->i_ino;
116 __entry->hashval = ctx->cursor->hashval;
117 __entry->blkno = ctx->cursor->blkno;
118 __entry->offset = ctx->cursor->offset;
119 __entry->alist = ctx->alist;
120 __entry->bufsize = ctx->bufsize;
121 __entry->count = ctx->count;
122 __entry->firstu = ctx->firstu;
123 __entry->flags = ctx->flags;
124 __entry->bt_hashval = be32_to_cpu(btree->hashval);
125 __entry->bt_before = be32_to_cpu(btree->before);
126 ),
127 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
128 "alist 0x%p size %u count %u firstu %u flags %d %s "
129 "node hashval %u, node before %u",
130 MAJOR(__entry->dev), MINOR(__entry->dev),
131 __entry->ino,
132 __entry->hashval,
133 __entry->blkno,
134 __entry->offset,
135 __entry->dupcnt,
136 __entry->alist,
137 __entry->bufsize,
138 __entry->count,
139 __entry->firstu,
140 __entry->flags,
141 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
142 __entry->bt_hashval,
143 __entry->bt_before)
144);
145
146TRACE_EVENT(xfs_iext_insert,
147 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
148 struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
149 TP_ARGS(ip, idx, r, state, caller_ip),
150 TP_STRUCT__entry(
151 __field(dev_t, dev)
152 __field(xfs_ino_t, ino)
153 __field(xfs_extnum_t, idx)
154 __field(xfs_fileoff_t, startoff)
155 __field(xfs_fsblock_t, startblock)
156 __field(xfs_filblks_t, blockcount)
157 __field(xfs_exntst_t, state)
158 __field(int, bmap_state)
159 __field(unsigned long, caller_ip)
160 ),
161 TP_fast_assign(
162 __entry->dev = VFS_I(ip)->i_sb->s_dev;
163 __entry->ino = ip->i_ino;
164 __entry->idx = idx;
165 __entry->startoff = r->br_startoff;
166 __entry->startblock = r->br_startblock;
167 __entry->blockcount = r->br_blockcount;
168 __entry->state = r->br_state;
169 __entry->bmap_state = state;
170 __entry->caller_ip = caller_ip;
171 ),
172 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
173 "offset %lld block %s count %lld flag %d caller %pf",
174 MAJOR(__entry->dev), MINOR(__entry->dev),
175 __entry->ino,
176 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
177 (long)__entry->idx,
178 __entry->startoff,
179 xfs_fmtfsblock(__entry->startblock),
180 __entry->blockcount,
181 __entry->state,
182 (char *)__entry->caller_ip)
183);
184
185DECLARE_EVENT_CLASS(xfs_bmap_class,
186 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
187 unsigned long caller_ip),
188 TP_ARGS(ip, idx, state, caller_ip),
189 TP_STRUCT__entry(
190 __field(dev_t, dev)
191 __field(xfs_ino_t, ino)
192 __field(xfs_extnum_t, idx)
193 __field(xfs_fileoff_t, startoff)
194 __field(xfs_fsblock_t, startblock)
195 __field(xfs_filblks_t, blockcount)
196 __field(xfs_exntst_t, state)
197 __field(int, bmap_state)
198 __field(unsigned long, caller_ip)
199 ),
200 TP_fast_assign(
201 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
202 ip->i_afp : &ip->i_df;
203 struct xfs_bmbt_irec r;
204
205 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
206 __entry->dev = VFS_I(ip)->i_sb->s_dev;
207 __entry->ino = ip->i_ino;
208 __entry->idx = idx;
209 __entry->startoff = r.br_startoff;
210 __entry->startblock = r.br_startblock;
211 __entry->blockcount = r.br_blockcount;
212 __entry->state = r.br_state;
213 __entry->bmap_state = state;
214 __entry->caller_ip = caller_ip;
215 ),
216 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
217 "offset %lld block %s count %lld flag %d caller %pf",
218 MAJOR(__entry->dev), MINOR(__entry->dev),
219 __entry->ino,
220 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
221 (long)__entry->idx,
222 __entry->startoff,
223 xfs_fmtfsblock(__entry->startblock),
224 __entry->blockcount,
225 __entry->state,
226 (char *)__entry->caller_ip)
227)
228
229#define DEFINE_BMAP_EVENT(name) \
230DEFINE_EVENT(xfs_bmap_class, name, \
231 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
232 unsigned long caller_ip), \
233 TP_ARGS(ip, idx, state, caller_ip))
234DEFINE_BMAP_EVENT(xfs_iext_remove);
235DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
236DEFINE_BMAP_EVENT(xfs_bmap_post_update);
237DEFINE_BMAP_EVENT(xfs_extlist);
238
239DECLARE_EVENT_CLASS(xfs_buf_class,
240 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
241 TP_ARGS(bp, caller_ip),
242 TP_STRUCT__entry(
243 __field(dev_t, dev)
244 __field(xfs_daddr_t, bno)
245 __field(size_t, buffer_length)
246 __field(int, hold)
247 __field(int, pincount)
248 __field(unsigned, lockval)
249 __field(unsigned, flags)
250 __field(unsigned long, caller_ip)
251 ),
252 TP_fast_assign(
253 __entry->dev = bp->b_target->bt_dev;
254 __entry->bno = bp->b_bn;
255 __entry->buffer_length = bp->b_buffer_length;
256 __entry->hold = atomic_read(&bp->b_hold);
257 __entry->pincount = atomic_read(&bp->b_pin_count);
258 __entry->lockval = xfs_buf_lock_value(bp);
259 __entry->flags = bp->b_flags;
260 __entry->caller_ip = caller_ip;
261 ),
262 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
263 "lock %d flags %s caller %pf",
264 MAJOR(__entry->dev), MINOR(__entry->dev),
265 (unsigned long long)__entry->bno,
266 __entry->buffer_length,
267 __entry->hold,
268 __entry->pincount,
269 __entry->lockval,
270 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
271 (void *)__entry->caller_ip)
272)
273
274#define DEFINE_BUF_EVENT(name) \
275DEFINE_EVENT(xfs_buf_class, name, \
276 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
277 TP_ARGS(bp, caller_ip))
278DEFINE_BUF_EVENT(xfs_buf_init);
279DEFINE_BUF_EVENT(xfs_buf_free);
280DEFINE_BUF_EVENT(xfs_buf_hold);
281DEFINE_BUF_EVENT(xfs_buf_rele);
282DEFINE_BUF_EVENT(xfs_buf_pin);
283DEFINE_BUF_EVENT(xfs_buf_unpin);
284DEFINE_BUF_EVENT(xfs_buf_iodone);
285DEFINE_BUF_EVENT(xfs_buf_iorequest);
286DEFINE_BUF_EVENT(xfs_buf_bawrite);
287DEFINE_BUF_EVENT(xfs_buf_bdwrite);
288DEFINE_BUF_EVENT(xfs_buf_lock);
289DEFINE_BUF_EVENT(xfs_buf_lock_done);
290DEFINE_BUF_EVENT(xfs_buf_cond_lock);
291DEFINE_BUF_EVENT(xfs_buf_unlock);
292DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
293DEFINE_BUF_EVENT(xfs_buf_iowait);
294DEFINE_BUF_EVENT(xfs_buf_iowait_done);
295DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
296DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
297DEFINE_BUF_EVENT(xfs_buf_delwri_split);
298DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
299DEFINE_BUF_EVENT(xfs_bdstrat_shut);
300DEFINE_BUF_EVENT(xfs_buf_item_relse);
301DEFINE_BUF_EVENT(xfs_buf_item_iodone);
302DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
303DEFINE_BUF_EVENT(xfs_buf_error_relse);
304DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
305DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
306
307/* not really buffer traces, but the buf provides useful information */
308DEFINE_BUF_EVENT(xfs_btree_corrupt);
309DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
310DEFINE_BUF_EVENT(xfs_reset_dqcounts);
311DEFINE_BUF_EVENT(xfs_inode_item_push);
312
313/* pass flags explicitly */
314DECLARE_EVENT_CLASS(xfs_buf_flags_class,
315 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
316 TP_ARGS(bp, flags, caller_ip),
317 TP_STRUCT__entry(
318 __field(dev_t, dev)
319 __field(xfs_daddr_t, bno)
320 __field(size_t, buffer_length)
321 __field(int, hold)
322 __field(int, pincount)
323 __field(unsigned, lockval)
324 __field(unsigned, flags)
325 __field(unsigned long, caller_ip)
326 ),
327 TP_fast_assign(
328 __entry->dev = bp->b_target->bt_dev;
329 __entry->bno = bp->b_bn;
330 __entry->buffer_length = bp->b_buffer_length;
331 __entry->flags = flags;
332 __entry->hold = atomic_read(&bp->b_hold);
333 __entry->pincount = atomic_read(&bp->b_pin_count);
334 __entry->lockval = xfs_buf_lock_value(bp);
335 __entry->caller_ip = caller_ip;
336 ),
337 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
338 "lock %d flags %s caller %pf",
339 MAJOR(__entry->dev), MINOR(__entry->dev),
340 (unsigned long long)__entry->bno,
341 __entry->buffer_length,
342 __entry->hold,
343 __entry->pincount,
344 __entry->lockval,
345 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
346 (void *)__entry->caller_ip)
347)
348
349#define DEFINE_BUF_FLAGS_EVENT(name) \
350DEFINE_EVENT(xfs_buf_flags_class, name, \
351 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
352 TP_ARGS(bp, flags, caller_ip))
353DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
354DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
355DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
356
357TRACE_EVENT(xfs_buf_ioerror,
358 TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
359 TP_ARGS(bp, error, caller_ip),
360 TP_STRUCT__entry(
361 __field(dev_t, dev)
362 __field(xfs_daddr_t, bno)
363 __field(size_t, buffer_length)
364 __field(unsigned, flags)
365 __field(int, hold)
366 __field(int, pincount)
367 __field(unsigned, lockval)
368 __field(int, error)
369 __field(unsigned long, caller_ip)
370 ),
371 TP_fast_assign(
372 __entry->dev = bp->b_target->bt_dev;
373 __entry->bno = bp->b_bn;
374 __entry->buffer_length = bp->b_buffer_length;
375 __entry->hold = atomic_read(&bp->b_hold);
376 __entry->pincount = atomic_read(&bp->b_pin_count);
377 __entry->lockval = xfs_buf_lock_value(bp);
378 __entry->error = error;
379 __entry->flags = bp->b_flags;
380 __entry->caller_ip = caller_ip;
381 ),
382 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
383 "lock %d error %d flags %s caller %pf",
384 MAJOR(__entry->dev), MINOR(__entry->dev),
385 (unsigned long long)__entry->bno,
386 __entry->buffer_length,
387 __entry->hold,
388 __entry->pincount,
389 __entry->lockval,
390 __entry->error,
391 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
392 (void *)__entry->caller_ip)
393);
394
395DECLARE_EVENT_CLASS(xfs_buf_item_class,
396 TP_PROTO(struct xfs_buf_log_item *bip),
397 TP_ARGS(bip),
398 TP_STRUCT__entry(
399 __field(dev_t, dev)
400 __field(xfs_daddr_t, buf_bno)
401 __field(size_t, buf_len)
402 __field(int, buf_hold)
403 __field(int, buf_pincount)
404 __field(int, buf_lockval)
405 __field(unsigned, buf_flags)
406 __field(unsigned, bli_recur)
407 __field(int, bli_refcount)
408 __field(unsigned, bli_flags)
409 __field(void *, li_desc)
410 __field(unsigned, li_flags)
411 ),
412 TP_fast_assign(
413 __entry->dev = bip->bli_buf->b_target->bt_dev;
414 __entry->bli_flags = bip->bli_flags;
415 __entry->bli_recur = bip->bli_recur;
416 __entry->bli_refcount = atomic_read(&bip->bli_refcount);
417 __entry->buf_bno = bip->bli_buf->b_bn;
418 __entry->buf_len = bip->bli_buf->b_buffer_length;
419 __entry->buf_flags = bip->bli_buf->b_flags;
420 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
421 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
422 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
423 __entry->li_desc = bip->bli_item.li_desc;
424 __entry->li_flags = bip->bli_item.li_flags;
425 ),
426 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
427 "lock %d flags %s recur %d refcount %d bliflags %s "
428 "lidesc 0x%p liflags %s",
429 MAJOR(__entry->dev), MINOR(__entry->dev),
430 (unsigned long long)__entry->buf_bno,
431 __entry->buf_len,
432 __entry->buf_hold,
433 __entry->buf_pincount,
434 __entry->buf_lockval,
435 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
436 __entry->bli_recur,
437 __entry->bli_refcount,
438 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
439 __entry->li_desc,
440 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
441)
442
443#define DEFINE_BUF_ITEM_EVENT(name) \
444DEFINE_EVENT(xfs_buf_item_class, name, \
445 TP_PROTO(struct xfs_buf_log_item *bip), \
446 TP_ARGS(bip))
447DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
448DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
449DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
450DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
451DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
452DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
453DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
454DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
455DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
456DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
457DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
458DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
459DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
460DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
461DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
462DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
463DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
464DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
465DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
466DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
467DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
468DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
469DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
470DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
471
472DECLARE_EVENT_CLASS(xfs_lock_class,
473 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
474 unsigned long caller_ip),
475 TP_ARGS(ip, lock_flags, caller_ip),
476 TP_STRUCT__entry(
477 __field(dev_t, dev)
478 __field(xfs_ino_t, ino)
479 __field(int, lock_flags)
480 __field(unsigned long, caller_ip)
481 ),
482 TP_fast_assign(
483 __entry->dev = VFS_I(ip)->i_sb->s_dev;
484 __entry->ino = ip->i_ino;
485 __entry->lock_flags = lock_flags;
486 __entry->caller_ip = caller_ip;
487 ),
488 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
489 MAJOR(__entry->dev), MINOR(__entry->dev),
490 __entry->ino,
491 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
492 (void *)__entry->caller_ip)
493)
494
495#define DEFINE_LOCK_EVENT(name) \
496DEFINE_EVENT(xfs_lock_class, name, \
497 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
498 unsigned long caller_ip), \
499 TP_ARGS(ip, lock_flags, caller_ip))
500DEFINE_LOCK_EVENT(xfs_ilock);
501DEFINE_LOCK_EVENT(xfs_ilock_nowait);
502DEFINE_LOCK_EVENT(xfs_ilock_demote);
503DEFINE_LOCK_EVENT(xfs_iunlock);
504
505DECLARE_EVENT_CLASS(xfs_iget_class,
506 TP_PROTO(struct xfs_inode *ip),
507 TP_ARGS(ip),
508 TP_STRUCT__entry(
509 __field(dev_t, dev)
510 __field(xfs_ino_t, ino)
511 ),
512 TP_fast_assign(
513 __entry->dev = VFS_I(ip)->i_sb->s_dev;
514 __entry->ino = ip->i_ino;
515 ),
516 TP_printk("dev %d:%d ino 0x%llx",
517 MAJOR(__entry->dev), MINOR(__entry->dev),
518 __entry->ino)
519)
520
521#define DEFINE_IGET_EVENT(name) \
522DEFINE_EVENT(xfs_iget_class, name, \
523 TP_PROTO(struct xfs_inode *ip), \
524 TP_ARGS(ip))
525DEFINE_IGET_EVENT(xfs_iget_skip);
526DEFINE_IGET_EVENT(xfs_iget_reclaim);
527DEFINE_IGET_EVENT(xfs_iget_found);
528DEFINE_IGET_EVENT(xfs_iget_alloc);
529
530DECLARE_EVENT_CLASS(xfs_inode_class,
531 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
532 TP_ARGS(ip, caller_ip),
533 TP_STRUCT__entry(
534 __field(dev_t, dev)
535 __field(xfs_ino_t, ino)
536 __field(int, count)
537 __field(unsigned long, caller_ip)
538 ),
539 TP_fast_assign(
540 __entry->dev = VFS_I(ip)->i_sb->s_dev;
541 __entry->ino = ip->i_ino;
542 __entry->count = atomic_read(&VFS_I(ip)->i_count);
543 __entry->caller_ip = caller_ip;
544 ),
545 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
546 MAJOR(__entry->dev), MINOR(__entry->dev),
547 __entry->ino,
548 __entry->count,
549 (char *)__entry->caller_ip)
550)
551
552#define DEFINE_INODE_EVENT(name) \
553DEFINE_EVENT(xfs_inode_class, name, \
554 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
555 TP_ARGS(ip, caller_ip))
556DEFINE_INODE_EVENT(xfs_ihold);
557DEFINE_INODE_EVENT(xfs_irele);
558/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
559DEFINE_INODE_EVENT(xfs_inode);
560#define xfs_itrace_entry(ip) \
561 trace_xfs_inode(ip, _THIS_IP_)
562
563DECLARE_EVENT_CLASS(xfs_dquot_class,
564 TP_PROTO(struct xfs_dquot *dqp),
565 TP_ARGS(dqp),
566 TP_STRUCT__entry(
567 __field(dev_t, dev)
568 __field(__be32, id)
569 __field(unsigned, flags)
570 __field(unsigned, nrefs)
571 __field(unsigned long long, res_bcount)
572 __field(unsigned long long, bcount)
573 __field(unsigned long long, icount)
574 __field(unsigned long long, blk_hardlimit)
575 __field(unsigned long long, blk_softlimit)
576 __field(unsigned long long, ino_hardlimit)
577 __field(unsigned long long, ino_softlimit)
578 ), \
579 TP_fast_assign(
580 __entry->dev = dqp->q_mount->m_super->s_dev;
581 __entry->id = dqp->q_core.d_id;
582 __entry->flags = dqp->dq_flags;
583 __entry->nrefs = dqp->q_nrefs;
584 __entry->res_bcount = dqp->q_res_bcount;
585 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
586 __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
587 __entry->blk_hardlimit =
588 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
589 __entry->blk_softlimit =
590 be64_to_cpu(dqp->q_core.d_blk_softlimit);
591 __entry->ino_hardlimit =
592 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
593 __entry->ino_softlimit =
594 be64_to_cpu(dqp->q_core.d_ino_softlimit);
595 ),
596 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
597 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
598 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
599 MAJOR(__entry->dev), MINOR(__entry->dev),
600 be32_to_cpu(__entry->id),
601 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
602 __entry->nrefs,
603 __entry->res_bcount,
604 __entry->bcount,
605 __entry->blk_hardlimit,
606 __entry->blk_softlimit,
607 __entry->icount,
608 __entry->ino_hardlimit,
609 __entry->ino_softlimit)
610)
611
612#define DEFINE_DQUOT_EVENT(name) \
613DEFINE_EVENT(xfs_dquot_class, name, \
614 TP_PROTO(struct xfs_dquot *dqp), \
615 TP_ARGS(dqp))
616DEFINE_DQUOT_EVENT(xfs_dqadjust);
617DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
618DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
619DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
620DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
621DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
622DEFINE_DQUOT_EVENT(xfs_dqattach_found);
623DEFINE_DQUOT_EVENT(xfs_dqattach_get);
624DEFINE_DQUOT_EVENT(xfs_dqinit);
625DEFINE_DQUOT_EVENT(xfs_dqreuse);
626DEFINE_DQUOT_EVENT(xfs_dqalloc);
627DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
628DEFINE_DQUOT_EVENT(xfs_dqread);
629DEFINE_DQUOT_EVENT(xfs_dqread_fail);
630DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
631DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
632DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
633DEFINE_DQUOT_EVENT(xfs_dqlookup_move);
634DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
635DEFINE_DQUOT_EVENT(xfs_dqget_hit);
636DEFINE_DQUOT_EVENT(xfs_dqget_miss);
637DEFINE_DQUOT_EVENT(xfs_dqput);
638DEFINE_DQUOT_EVENT(xfs_dqput_wait);
639DEFINE_DQUOT_EVENT(xfs_dqput_free);
640DEFINE_DQUOT_EVENT(xfs_dqrele);
641DEFINE_DQUOT_EVENT(xfs_dqflush);
642DEFINE_DQUOT_EVENT(xfs_dqflush_force);
643DEFINE_DQUOT_EVENT(xfs_dqflush_done);
644/* not really iget events, but we re-use the format */
645DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
646DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
647
648DECLARE_EVENT_CLASS(xfs_loggrant_class,
649 TP_PROTO(struct log *log, struct xlog_ticket *tic),
650 TP_ARGS(log, tic),
651 TP_STRUCT__entry(
652 __field(dev_t, dev)
653 __field(unsigned, trans_type)
654 __field(char, ocnt)
655 __field(char, cnt)
656 __field(int, curr_res)
657 __field(int, unit_res)
658 __field(unsigned int, flags)
659 __field(void *, reserve_headq)
660 __field(void *, write_headq)
661 __field(int, grant_reserve_cycle)
662 __field(int, grant_reserve_bytes)
663 __field(int, grant_write_cycle)
664 __field(int, grant_write_bytes)
665 __field(int, curr_cycle)
666 __field(int, curr_block)
667 __field(xfs_lsn_t, tail_lsn)
668 ),
669 TP_fast_assign(
670 __entry->dev = log->l_mp->m_super->s_dev;
671 __entry->trans_type = tic->t_trans_type;
672 __entry->ocnt = tic->t_ocnt;
673 __entry->cnt = tic->t_cnt;
674 __entry->curr_res = tic->t_curr_res;
675 __entry->unit_res = tic->t_unit_res;
676 __entry->flags = tic->t_flags;
677 __entry->reserve_headq = log->l_reserve_headq;
678 __entry->write_headq = log->l_write_headq;
679 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
680 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
681 __entry->grant_write_cycle = log->l_grant_write_cycle;
682 __entry->grant_write_bytes = log->l_grant_write_bytes;
683 __entry->curr_cycle = log->l_curr_cycle;
684 __entry->curr_block = log->l_curr_block;
685 __entry->tail_lsn = log->l_tail_lsn;
686 ),
687 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
688 "t_unit_res %u t_flags %s reserve_headq 0x%p "
689 "write_headq 0x%p grant_reserve_cycle %d "
690 "grant_reserve_bytes %d grant_write_cycle %d "
691 "grant_write_bytes %d curr_cycle %d curr_block %d "
692 "tail_cycle %d tail_block %d",
693 MAJOR(__entry->dev), MINOR(__entry->dev),
694 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
695 __entry->ocnt,
696 __entry->cnt,
697 __entry->curr_res,
698 __entry->unit_res,
699 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
700 __entry->reserve_headq,
701 __entry->write_headq,
702 __entry->grant_reserve_cycle,
703 __entry->grant_reserve_bytes,
704 __entry->grant_write_cycle,
705 __entry->grant_write_bytes,
706 __entry->curr_cycle,
707 __entry->curr_block,
708 CYCLE_LSN(__entry->tail_lsn),
709 BLOCK_LSN(__entry->tail_lsn)
710 )
711)
712
713#define DEFINE_LOGGRANT_EVENT(name) \
714DEFINE_EVENT(xfs_loggrant_class, name, \
715 TP_PROTO(struct log *log, struct xlog_ticket *tic), \
716 TP_ARGS(log, tic))
717DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
718DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
719DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
720DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
721DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
722DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
723DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
724DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
725DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
726DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
727DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
728DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
729DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
730DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
731DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
732DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
733DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
734DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
735DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
736DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
737DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
738DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
739DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
740DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
741
742#define DEFINE_RW_EVENT(name) \
743TRACE_EVENT(name, \
744 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
745 TP_ARGS(ip, count, offset, flags), \
746 TP_STRUCT__entry( \
747 __field(dev_t, dev) \
748 __field(xfs_ino_t, ino) \
749 __field(xfs_fsize_t, size) \
750 __field(xfs_fsize_t, new_size) \
751 __field(loff_t, offset) \
752 __field(size_t, count) \
753 __field(int, flags) \
754 ), \
755 TP_fast_assign( \
756 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
757 __entry->ino = ip->i_ino; \
758 __entry->size = ip->i_d.di_size; \
759 __entry->new_size = ip->i_new_size; \
760 __entry->offset = offset; \
761 __entry->count = count; \
762 __entry->flags = flags; \
763 ), \
764 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
765 "offset 0x%llx count 0x%zx ioflags %s", \
766 MAJOR(__entry->dev), MINOR(__entry->dev), \
767 __entry->ino, \
768 __entry->size, \
769 __entry->new_size, \
770 __entry->offset, \
771 __entry->count, \
772 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
773)
774DEFINE_RW_EVENT(xfs_file_read);
775DEFINE_RW_EVENT(xfs_file_buffered_write);
776DEFINE_RW_EVENT(xfs_file_direct_write);
777DEFINE_RW_EVENT(xfs_file_splice_read);
778DEFINE_RW_EVENT(xfs_file_splice_write);
779
780
781#define DEFINE_PAGE_EVENT(name) \
782TRACE_EVENT(name, \
783 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
784 TP_ARGS(inode, page, off), \
785 TP_STRUCT__entry( \
786 __field(dev_t, dev) \
787 __field(xfs_ino_t, ino) \
788 __field(pgoff_t, pgoff) \
789 __field(loff_t, size) \
790 __field(unsigned long, offset) \
791 __field(int, delalloc) \
792 __field(int, unmapped) \
793 __field(int, unwritten) \
794 ), \
795 TP_fast_assign( \
796 int delalloc = -1, unmapped = -1, unwritten = -1; \
797 \
798 if (page_has_buffers(page)) \
799 xfs_count_page_state(page, &delalloc, \
800 &unmapped, &unwritten); \
801 __entry->dev = inode->i_sb->s_dev; \
802 __entry->ino = XFS_I(inode)->i_ino; \
803 __entry->pgoff = page_offset(page); \
804 __entry->size = i_size_read(inode); \
805 __entry->offset = off; \
806 __entry->delalloc = delalloc; \
807 __entry->unmapped = unmapped; \
808 __entry->unwritten = unwritten; \
809 ), \
810 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \
811 "delalloc %d unmapped %d unwritten %d", \
812 MAJOR(__entry->dev), MINOR(__entry->dev), \
813 __entry->ino, \
814 __entry->pgoff, \
815 __entry->size, \
816 __entry->offset, \
817 __entry->delalloc, \
818 __entry->unmapped, \
819 __entry->unwritten) \
820)
821DEFINE_PAGE_EVENT(xfs_writepage);
822DEFINE_PAGE_EVENT(xfs_releasepage);
823DEFINE_PAGE_EVENT(xfs_invalidatepage);
824
825#define DEFINE_IOMAP_EVENT(name) \
826TRACE_EVENT(name, \
827 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
828 int flags, struct xfs_bmbt_irec *irec), \
829 TP_ARGS(ip, offset, count, flags, irec), \
830 TP_STRUCT__entry( \
831 __field(dev_t, dev) \
832 __field(xfs_ino_t, ino) \
833 __field(loff_t, size) \
834 __field(loff_t, new_size) \
835 __field(loff_t, offset) \
836 __field(size_t, count) \
837 __field(int, flags) \
838 __field(xfs_fileoff_t, startoff) \
839 __field(xfs_fsblock_t, startblock) \
840 __field(xfs_filblks_t, blockcount) \
841 ), \
842 TP_fast_assign( \
843 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
844 __entry->ino = ip->i_ino; \
845 __entry->size = ip->i_d.di_size; \
846 __entry->new_size = ip->i_new_size; \
847 __entry->offset = offset; \
848 __entry->count = count; \
849 __entry->flags = flags; \
850 __entry->startoff = irec ? irec->br_startoff : 0; \
851 __entry->startblock = irec ? irec->br_startblock : 0; \
852 __entry->blockcount = irec ? irec->br_blockcount : 0; \
853 ), \
854 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
855 "offset 0x%llx count %zd flags %s " \
856 "startoff 0x%llx startblock %s blockcount 0x%llx", \
857 MAJOR(__entry->dev), MINOR(__entry->dev), \
858 __entry->ino, \
859 __entry->size, \
860 __entry->new_size, \
861 __entry->offset, \
862 __entry->count, \
863 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
864 __entry->startoff, \
865 xfs_fmtfsblock(__entry->startblock), \
866 __entry->blockcount) \
867)
868DEFINE_IOMAP_EVENT(xfs_iomap_enter);
869DEFINE_IOMAP_EVENT(xfs_iomap_found);
870DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
871
872#define DEFINE_SIMPLE_IO_EVENT(name) \
873TRACE_EVENT(name, \
874 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
875 TP_ARGS(ip, offset, count), \
876 TP_STRUCT__entry( \
877 __field(dev_t, dev) \
878 __field(xfs_ino_t, ino) \
879 __field(loff_t, size) \
880 __field(loff_t, new_size) \
881 __field(loff_t, offset) \
882 __field(size_t, count) \
883 ), \
884 TP_fast_assign( \
885 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
886 __entry->ino = ip->i_ino; \
887 __entry->size = ip->i_d.di_size; \
888 __entry->new_size = ip->i_new_size; \
889 __entry->offset = offset; \
890 __entry->count = count; \
891 ), \
892 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
893 "offset 0x%llx count %zd", \
894 MAJOR(__entry->dev), MINOR(__entry->dev), \
895 __entry->ino, \
896 __entry->size, \
897 __entry->new_size, \
898 __entry->offset, \
899 __entry->count) \
900);
901DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
902DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
903
904
905TRACE_EVENT(xfs_itruncate_start,
906 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
907 xfs_off_t toss_start, xfs_off_t toss_finish),
908 TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
909 TP_STRUCT__entry(
910 __field(dev_t, dev)
911 __field(xfs_ino_t, ino)
912 __field(xfs_fsize_t, size)
913 __field(xfs_fsize_t, new_size)
914 __field(xfs_off_t, toss_start)
915 __field(xfs_off_t, toss_finish)
916 __field(int, flag)
917 ),
918 TP_fast_assign(
919 __entry->dev = VFS_I(ip)->i_sb->s_dev;
920 __entry->ino = ip->i_ino;
921 __entry->size = ip->i_d.di_size;
922 __entry->new_size = new_size;
923 __entry->toss_start = toss_start;
924 __entry->toss_finish = toss_finish;
925 __entry->flag = flag;
926 ),
927 TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
928 "toss start 0x%llx toss finish 0x%llx",
929 MAJOR(__entry->dev), MINOR(__entry->dev),
930 __entry->ino,
931 __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
932 __entry->size,
933 __entry->new_size,
934 __entry->toss_start,
935 __entry->toss_finish)
936);
937
938DECLARE_EVENT_CLASS(xfs_itrunc_class,
939 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
940 TP_ARGS(ip, new_size),
941 TP_STRUCT__entry(
942 __field(dev_t, dev)
943 __field(xfs_ino_t, ino)
944 __field(xfs_fsize_t, size)
945 __field(xfs_fsize_t, new_size)
946 ),
947 TP_fast_assign(
948 __entry->dev = VFS_I(ip)->i_sb->s_dev;
949 __entry->ino = ip->i_ino;
950 __entry->size = ip->i_d.di_size;
951 __entry->new_size = new_size;
952 ),
953 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
954 MAJOR(__entry->dev), MINOR(__entry->dev),
955 __entry->ino,
956 __entry->size,
957 __entry->new_size)
958)
959
960#define DEFINE_ITRUNC_EVENT(name) \
961DEFINE_EVENT(xfs_itrunc_class, name, \
962 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
963 TP_ARGS(ip, new_size))
964DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
965DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
966
967TRACE_EVENT(xfs_pagecache_inval,
968 TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
969 TP_ARGS(ip, start, finish),
970 TP_STRUCT__entry(
971 __field(dev_t, dev)
972 __field(xfs_ino_t, ino)
973 __field(xfs_fsize_t, size)
974 __field(xfs_off_t, start)
975 __field(xfs_off_t, finish)
976 ),
977 TP_fast_assign(
978 __entry->dev = VFS_I(ip)->i_sb->s_dev;
979 __entry->ino = ip->i_ino;
980 __entry->size = ip->i_d.di_size;
981 __entry->start = start;
982 __entry->finish = finish;
983 ),
984 TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
985 MAJOR(__entry->dev), MINOR(__entry->dev),
986 __entry->ino,
987 __entry->size,
988 __entry->start,
989 __entry->finish)
990);
991
992TRACE_EVENT(xfs_bunmap,
993 TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
994 int flags, unsigned long caller_ip),
995 TP_ARGS(ip, bno, len, flags, caller_ip),
996 TP_STRUCT__entry(
997 __field(dev_t, dev)
998 __field(xfs_ino_t, ino)
999 __field(xfs_fsize_t, size)
1000 __field(xfs_fileoff_t, bno)
1001 __field(xfs_filblks_t, len)
1002 __field(unsigned long, caller_ip)
1003 __field(int, flags)
1004 ),
1005 TP_fast_assign(
1006 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1007 __entry->ino = ip->i_ino;
1008 __entry->size = ip->i_d.di_size;
1009 __entry->bno = bno;
1010 __entry->len = len;
1011 __entry->caller_ip = caller_ip;
1012 __entry->flags = flags;
1013 ),
1014 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
1015 "flags %s caller %pf",
1016 MAJOR(__entry->dev), MINOR(__entry->dev),
1017 __entry->ino,
1018 __entry->size,
1019 __entry->bno,
1020 __entry->len,
1021 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
1022 (void *)__entry->caller_ip)
1023
1024);
1025
1026TRACE_EVENT(xfs_alloc_busy,
1027 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1028 xfs_extlen_t len, int slot),
1029 TP_ARGS(mp, agno, agbno, len, slot),
1030 TP_STRUCT__entry(
1031 __field(dev_t, dev)
1032 __field(xfs_agnumber_t, agno)
1033 __field(xfs_agblock_t, agbno)
1034 __field(xfs_extlen_t, len)
1035 __field(int, slot)
1036 ),
1037 TP_fast_assign(
1038 __entry->dev = mp->m_super->s_dev;
1039 __entry->agno = agno;
1040 __entry->agbno = agbno;
1041 __entry->len = len;
1042 __entry->slot = slot;
1043 ),
1044 TP_printk("dev %d:%d agno %u agbno %u len %u slot %d",
1045 MAJOR(__entry->dev), MINOR(__entry->dev),
1046 __entry->agno,
1047 __entry->agbno,
1048 __entry->len,
1049 __entry->slot)
1050
1051);
1052
1053#define XFS_BUSY_STATES \
1054 { 0, "found" }, \
1055 { 1, "missing" }
1056
1057TRACE_EVENT(xfs_alloc_unbusy,
1058 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1059 int slot, int found),
1060 TP_ARGS(mp, agno, slot, found),
1061 TP_STRUCT__entry(
1062 __field(dev_t, dev)
1063 __field(xfs_agnumber_t, agno)
1064 __field(int, slot)
1065 __field(int, found)
1066 ),
1067 TP_fast_assign(
1068 __entry->dev = mp->m_super->s_dev;
1069 __entry->agno = agno;
1070 __entry->slot = slot;
1071 __entry->found = found;
1072 ),
1073 TP_printk("dev %d:%d agno %u slot %d %s",
1074 MAJOR(__entry->dev), MINOR(__entry->dev),
1075 __entry->agno,
1076 __entry->slot,
1077 __print_symbolic(__entry->found, XFS_BUSY_STATES))
1078);
1079
1080TRACE_EVENT(xfs_alloc_busysearch,
1081 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1082 xfs_extlen_t len, xfs_lsn_t lsn),
1083 TP_ARGS(mp, agno, agbno, len, lsn),
1084 TP_STRUCT__entry(
1085 __field(dev_t, dev)
1086 __field(xfs_agnumber_t, agno)
1087 __field(xfs_agblock_t, agbno)
1088 __field(xfs_extlen_t, len)
1089 __field(xfs_lsn_t, lsn)
1090 ),
1091 TP_fast_assign(
1092 __entry->dev = mp->m_super->s_dev;
1093 __entry->agno = agno;
1094 __entry->agbno = agbno;
1095 __entry->len = len;
1096 __entry->lsn = lsn;
1097 ),
1098 TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
1099 MAJOR(__entry->dev), MINOR(__entry->dev),
1100 __entry->agno,
1101 __entry->agbno,
1102 __entry->len,
1103 __entry->lsn)
1104);
1105
1106TRACE_EVENT(xfs_agf,
1107 TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
1108 unsigned long caller_ip),
1109 TP_ARGS(mp, agf, flags, caller_ip),
1110 TP_STRUCT__entry(
1111 __field(dev_t, dev)
1112 __field(xfs_agnumber_t, agno)
1113 __field(int, flags)
1114 __field(__u32, length)
1115 __field(__u32, bno_root)
1116 __field(__u32, cnt_root)
1117 __field(__u32, bno_level)
1118 __field(__u32, cnt_level)
1119 __field(__u32, flfirst)
1120 __field(__u32, fllast)
1121 __field(__u32, flcount)
1122 __field(__u32, freeblks)
1123 __field(__u32, longest)
1124 __field(unsigned long, caller_ip)
1125 ),
1126 TP_fast_assign(
1127 __entry->dev = mp->m_super->s_dev;
1128 __entry->agno = be32_to_cpu(agf->agf_seqno),
1129 __entry->flags = flags;
1130 __entry->length = be32_to_cpu(agf->agf_length),
1131 __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
1132 __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
1133 __entry->bno_level =
1134 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
1135 __entry->cnt_level =
1136 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
1137 __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
1138 __entry->fllast = be32_to_cpu(agf->agf_fllast),
1139 __entry->flcount = be32_to_cpu(agf->agf_flcount),
1140 __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
1141 __entry->longest = be32_to_cpu(agf->agf_longest);
1142 __entry->caller_ip = caller_ip;
1143 ),
1144 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
1145 "levels b %u c %u flfirst %u fllast %u flcount %u "
1146 "freeblks %u longest %u caller %pf",
1147 MAJOR(__entry->dev), MINOR(__entry->dev),
1148 __entry->agno,
1149 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
1150 __entry->length,
1151 __entry->bno_root,
1152 __entry->cnt_root,
1153 __entry->bno_level,
1154 __entry->cnt_level,
1155 __entry->flfirst,
1156 __entry->fllast,
1157 __entry->flcount,
1158 __entry->freeblks,
1159 __entry->longest,
1160 (void *)__entry->caller_ip)
1161);
1162
1163TRACE_EVENT(xfs_free_extent,
1164 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1165 xfs_extlen_t len, bool isfl, int haveleft, int haveright),
1166 TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
1167 TP_STRUCT__entry(
1168 __field(dev_t, dev)
1169 __field(xfs_agnumber_t, agno)
1170 __field(xfs_agblock_t, agbno)
1171 __field(xfs_extlen_t, len)
1172 __field(int, isfl)
1173 __field(int, haveleft)
1174 __field(int, haveright)
1175 ),
1176 TP_fast_assign(
1177 __entry->dev = mp->m_super->s_dev;
1178 __entry->agno = agno;
1179 __entry->agbno = agbno;
1180 __entry->len = len;
1181 __entry->isfl = isfl;
1182 __entry->haveleft = haveleft;
1183 __entry->haveright = haveright;
1184 ),
1185 TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
1186 MAJOR(__entry->dev), MINOR(__entry->dev),
1187 __entry->agno,
1188 __entry->agbno,
1189 __entry->len,
1190 __entry->isfl,
1191 __entry->haveleft ?
1192 (__entry->haveright ? "both" : "left") :
1193 (__entry->haveright ? "right" : "none"))
1194
1195);
1196
1197DECLARE_EVENT_CLASS(xfs_alloc_class,
1198 TP_PROTO(struct xfs_alloc_arg *args),
1199 TP_ARGS(args),
1200 TP_STRUCT__entry(
1201 __field(dev_t, dev)
1202 __field(xfs_agnumber_t, agno)
1203 __field(xfs_agblock_t, agbno)
1204 __field(xfs_extlen_t, minlen)
1205 __field(xfs_extlen_t, maxlen)
1206 __field(xfs_extlen_t, mod)
1207 __field(xfs_extlen_t, prod)
1208 __field(xfs_extlen_t, minleft)
1209 __field(xfs_extlen_t, total)
1210 __field(xfs_extlen_t, alignment)
1211 __field(xfs_extlen_t, minalignslop)
1212 __field(xfs_extlen_t, len)
1213 __field(short, type)
1214 __field(short, otype)
1215 __field(char, wasdel)
1216 __field(char, wasfromfl)
1217 __field(char, isfl)
1218 __field(char, userdata)
1219 __field(xfs_fsblock_t, firstblock)
1220 ),
1221 TP_fast_assign(
1222 __entry->dev = args->mp->m_super->s_dev;
1223 __entry->agno = args->agno;
1224 __entry->agbno = args->agbno;
1225 __entry->minlen = args->minlen;
1226 __entry->maxlen = args->maxlen;
1227 __entry->mod = args->mod;
1228 __entry->prod = args->prod;
1229 __entry->minleft = args->minleft;
1230 __entry->total = args->total;
1231 __entry->alignment = args->alignment;
1232 __entry->minalignslop = args->minalignslop;
1233 __entry->len = args->len;
1234 __entry->type = args->type;
1235 __entry->otype = args->otype;
1236 __entry->wasdel = args->wasdel;
1237 __entry->wasfromfl = args->wasfromfl;
1238 __entry->isfl = args->isfl;
1239 __entry->userdata = args->userdata;
1240 __entry->firstblock = args->firstblock;
1241 ),
1242 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
1243 "prod %u minleft %u total %u alignment %u minalignslop %u "
1244 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
1245 "userdata %d firstblock 0x%llx",
1246 MAJOR(__entry->dev), MINOR(__entry->dev),
1247 __entry->agno,
1248 __entry->agbno,
1249 __entry->minlen,
1250 __entry->maxlen,
1251 __entry->mod,
1252 __entry->prod,
1253 __entry->minleft,
1254 __entry->total,
1255 __entry->alignment,
1256 __entry->minalignslop,
1257 __entry->len,
1258 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
1259 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
1260 __entry->wasdel,
1261 __entry->wasfromfl,
1262 __entry->isfl,
1263 __entry->userdata,
1264 __entry->firstblock)
1265)
1266
1267#define DEFINE_ALLOC_EVENT(name) \
1268DEFINE_EVENT(xfs_alloc_class, name, \
1269 TP_PROTO(struct xfs_alloc_arg *args), \
1270 TP_ARGS(args))
1271DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1272DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1273DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
1274DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
1275DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
1276DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
1277DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
1278DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
1279DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
1280DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
1281DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
1282DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
1283DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
1284DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
1285DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
1286DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
1287DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
1288DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
1289DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
1290DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1291DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1292
1293DECLARE_EVENT_CLASS(xfs_dir2_class,
1294 TP_PROTO(struct xfs_da_args *args),
1295 TP_ARGS(args),
1296 TP_STRUCT__entry(
1297 __field(dev_t, dev)
1298 __field(xfs_ino_t, ino)
1299 __dynamic_array(char, name, args->namelen)
1300 __field(int, namelen)
1301 __field(xfs_dahash_t, hashval)
1302 __field(xfs_ino_t, inumber)
1303 __field(int, op_flags)
1304 ),
1305 TP_fast_assign(
1306 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1307 __entry->ino = args->dp->i_ino;
1308 if (args->namelen)
1309 memcpy(__get_str(name), args->name, args->namelen);
1310 __entry->namelen = args->namelen;
1311 __entry->hashval = args->hashval;
1312 __entry->inumber = args->inumber;
1313 __entry->op_flags = args->op_flags;
1314 ),
1315 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
1316 "inumber 0x%llx op_flags %s",
1317 MAJOR(__entry->dev), MINOR(__entry->dev),
1318 __entry->ino,
1319 __entry->namelen,
1320 __entry->namelen ? __get_str(name) : NULL,
1321 __entry->namelen,
1322 __entry->hashval,
1323 __entry->inumber,
1324 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
1325)
1326
1327#define DEFINE_DIR2_EVENT(name) \
1328DEFINE_EVENT(xfs_dir2_class, name, \
1329 TP_PROTO(struct xfs_da_args *args), \
1330 TP_ARGS(args))
1331DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
1332DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
1333DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
1334DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
1335DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
1336DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
1337DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
1338DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
1339DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
1340DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
1341DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
1342DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
1343DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
1344DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
1345DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
1346DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
1347DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
1348DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
1349DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
1350DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
1351DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
1352DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
1353DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1354DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1355DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
1356
1357DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1358 TP_PROTO(struct xfs_da_args *args, int idx),
1359 TP_ARGS(args, idx),
1360 TP_STRUCT__entry(
1361 __field(dev_t, dev)
1362 __field(xfs_ino_t, ino)
1363 __field(int, op_flags)
1364 __field(int, idx)
1365 ),
1366 TP_fast_assign(
1367 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1368 __entry->ino = args->dp->i_ino;
1369 __entry->op_flags = args->op_flags;
1370 __entry->idx = idx;
1371 ),
1372 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
1373 MAJOR(__entry->dev), MINOR(__entry->dev),
1374 __entry->ino,
1375 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1376 __entry->idx)
1377)
1378
1379#define DEFINE_DIR2_SPACE_EVENT(name) \
1380DEFINE_EVENT(xfs_dir2_space_class, name, \
1381 TP_PROTO(struct xfs_da_args *args, int idx), \
1382 TP_ARGS(args, idx))
1383DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
1384DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
1385DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
1386DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
1387
1388TRACE_EVENT(xfs_dir2_leafn_moveents,
1389 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
1390 TP_ARGS(args, src_idx, dst_idx, count),
1391 TP_STRUCT__entry(
1392 __field(dev_t, dev)
1393 __field(xfs_ino_t, ino)
1394 __field(int, op_flags)
1395 __field(int, src_idx)
1396 __field(int, dst_idx)
1397 __field(int, count)
1398 ),
1399 TP_fast_assign(
1400 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1401 __entry->ino = args->dp->i_ino;
1402 __entry->op_flags = args->op_flags;
1403 __entry->src_idx = src_idx;
1404 __entry->dst_idx = dst_idx;
1405 __entry->count = count;
1406 ),
1407 TP_printk("dev %d:%d ino 0x%llx op_flags %s "
1408 "src_idx %d dst_idx %d count %d",
1409 MAJOR(__entry->dev), MINOR(__entry->dev),
1410 __entry->ino,
1411 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1412 __entry->src_idx,
1413 __entry->dst_idx,
1414 __entry->count)
1415);
1416
1417#endif /* _TRACE_XFS_H */
1418
1419#undef TRACE_INCLUDE_PATH
1420#define TRACE_INCLUDE_PATH .
1421#define TRACE_INCLUDE_FILE xfs_trace
1422#include <trace/define_trace.h>
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index ad7fbead4c9..7c220b4227b 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -36,10 +36,13 @@ struct attrlist_cursor_kern;
36/* 36/*
37 * Flags for read/write calls - same values as IRIX 37 * Flags for read/write calls - same values as IRIX
38 */ 38 */
39#define IO_ISAIO 0x00001 /* don't wait for completion */
40#define IO_ISDIRECT 0x00004 /* bypass page cache */ 39#define IO_ISDIRECT 0x00004 /* bypass page cache */
41#define IO_INVIS 0x00020 /* don't update inode timestamps */ 40#define IO_INVIS 0x00020 /* don't update inode timestamps */
42 41
42#define XFS_IO_FLAGS \
43 { IO_ISDIRECT, "DIRECT" }, \
44 { IO_INVIS, "INVIS"}
45
43/* 46/*
44 * Flush/Invalidate options for vop_toss/flush/flushinval_pages. 47 * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
45 */ 48 */
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc..0b1878857fc 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
30 30
31 31
32static int 32static int
33__xfs_xattr_get(struct inode *inode, const char *name, 33xfs_xattr_get(struct dentry *dentry, const char *name,
34 void *value, size_t size, int xflags) 34 void *value, size_t size, int xflags)
35{ 35{
36 struct xfs_inode *ip = XFS_I(inode); 36 struct xfs_inode *ip = XFS_I(dentry->d_inode);
37 int error, asize = size; 37 int error, asize = size;
38 38
39 if (strcmp(name, "") == 0) 39 if (strcmp(name, "") == 0)
@@ -52,10 +52,10 @@ __xfs_xattr_get(struct inode *inode, const char *name,
52} 52}
53 53
54static int 54static int
55__xfs_xattr_set(struct inode *inode, const char *name, const void *value, 55xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
56 size_t size, int flags, int xflags) 56 size_t size, int flags, int xflags)
57{ 57{
58 struct xfs_inode *ip = XFS_I(inode); 58 struct xfs_inode *ip = XFS_I(dentry->d_inode);
59 59
60 if (strcmp(name, "") == 0) 60 if (strcmp(name, "") == 0)
61 return -EINVAL; 61 return -EINVAL;
@@ -71,75 +71,34 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
71 return -xfs_attr_set(ip, name, (void *)value, size, xflags); 71 return -xfs_attr_set(ip, name, (void *)value, size, xflags);
72} 72}
73 73
74static int
75xfs_xattr_user_get(struct inode *inode, const char *name,
76 void *value, size_t size)
77{
78 return __xfs_xattr_get(inode, name, value, size, 0);
79}
80
81static int
82xfs_xattr_user_set(struct inode *inode, const char *name,
83 const void *value, size_t size, int flags)
84{
85 return __xfs_xattr_set(inode, name, value, size, flags, 0);
86}
87
88static struct xattr_handler xfs_xattr_user_handler = { 74static struct xattr_handler xfs_xattr_user_handler = {
89 .prefix = XATTR_USER_PREFIX, 75 .prefix = XATTR_USER_PREFIX,
90 .get = xfs_xattr_user_get, 76 .flags = 0, /* no flags implies user namespace */
91 .set = xfs_xattr_user_set, 77 .get = xfs_xattr_get,
78 .set = xfs_xattr_set,
92}; 79};
93 80
94
95static int
96xfs_xattr_trusted_get(struct inode *inode, const char *name,
97 void *value, size_t size)
98{
99 return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
100}
101
102static int
103xfs_xattr_trusted_set(struct inode *inode, const char *name,
104 const void *value, size_t size, int flags)
105{
106 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
107}
108
109static struct xattr_handler xfs_xattr_trusted_handler = { 81static struct xattr_handler xfs_xattr_trusted_handler = {
110 .prefix = XATTR_TRUSTED_PREFIX, 82 .prefix = XATTR_TRUSTED_PREFIX,
111 .get = xfs_xattr_trusted_get, 83 .flags = ATTR_ROOT,
112 .set = xfs_xattr_trusted_set, 84 .get = xfs_xattr_get,
85 .set = xfs_xattr_set,
113}; 86};
114 87
115
116static int
117xfs_xattr_secure_get(struct inode *inode, const char *name,
118 void *value, size_t size)
119{
120 return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
121}
122
123static int
124xfs_xattr_secure_set(struct inode *inode, const char *name,
125 const void *value, size_t size, int flags)
126{
127 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
128}
129
130static struct xattr_handler xfs_xattr_security_handler = { 88static struct xattr_handler xfs_xattr_security_handler = {
131 .prefix = XATTR_SECURITY_PREFIX, 89 .prefix = XATTR_SECURITY_PREFIX,
132 .get = xfs_xattr_secure_get, 90 .flags = ATTR_SECURE,
133 .set = xfs_xattr_secure_set, 91 .get = xfs_xattr_get,
92 .set = xfs_xattr_set,
134}; 93};
135 94
136
137struct xattr_handler *xfs_xattr_handlers[] = { 95struct xattr_handler *xfs_xattr_handlers[] = {
138 &xfs_xattr_user_handler, 96 &xfs_xattr_user_handler,
139 &xfs_xattr_trusted_handler, 97 &xfs_xattr_trusted_handler,
140 &xfs_xattr_security_handler, 98 &xfs_xattr_security_handler,
141#ifdef CONFIG_XFS_POSIX_ACL 99#ifdef CONFIG_XFS_POSIX_ACL
142 &xfs_xattr_system_handler, 100 &xfs_xattr_acl_access_handler,
101 &xfs_xattr_acl_default_handler,
143#endif 102#endif
144 NULL 103 NULL
145}; 104};
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 2f3f2229eaa..d7c7eea09fc 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -47,6 +47,7 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_trans_priv.h" 48#include "xfs_trans_priv.h"
49#include "xfs_qm.h" 49#include "xfs_qm.h"
50#include "xfs_trace.h"
50 51
51 52
52/* 53/*
@@ -112,10 +113,7 @@ xfs_qm_dqinit(
112 init_completion(&dqp->q_flush); 113 init_completion(&dqp->q_flush);
113 complete(&dqp->q_flush); 114 complete(&dqp->q_flush);
114 115
115#ifdef XFS_DQUOT_TRACE 116 trace_xfs_dqinit(dqp);
116 dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS);
117 xfs_dqtrace_entry(dqp, "DQINIT");
118#endif
119 } else { 117 } else {
120 /* 118 /*
121 * Only the q_core portion was zeroed in dqreclaim_one(). 119 * Only the q_core portion was zeroed in dqreclaim_one().
@@ -136,10 +134,7 @@ xfs_qm_dqinit(
136 dqp->q_hash = NULL; 134 dqp->q_hash = NULL;
137 ASSERT(dqp->dq_flnext == dqp->dq_flprev); 135 ASSERT(dqp->dq_flnext == dqp->dq_flprev);
138 136
139#ifdef XFS_DQUOT_TRACE 137 trace_xfs_dqreuse(dqp);
140 ASSERT(dqp->q_trace);
141 xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT");
142#endif
143 } 138 }
144 139
145 /* 140 /*
@@ -167,13 +162,8 @@ xfs_qm_dqdestroy(
167 162
168 mutex_destroy(&dqp->q_qlock); 163 mutex_destroy(&dqp->q_qlock);
169 sv_destroy(&dqp->q_pinwait); 164 sv_destroy(&dqp->q_pinwait);
170
171#ifdef XFS_DQUOT_TRACE
172 if (dqp->q_trace)
173 ktrace_free(dqp->q_trace);
174 dqp->q_trace = NULL;
175#endif
176 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); 165 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
166
177 atomic_dec(&xfs_Gqm->qm_totaldquots); 167 atomic_dec(&xfs_Gqm->qm_totaldquots);
178} 168}
179 169
@@ -195,49 +185,6 @@ xfs_qm_dqinit_core(
195 d->dd_diskdq.d_flags = type; 185 d->dd_diskdq.d_flags = type;
196} 186}
197 187
198
199#ifdef XFS_DQUOT_TRACE
200/*
201 * Dquot tracing for debugging.
202 */
203/* ARGSUSED */
204void
205__xfs_dqtrace_entry(
206 xfs_dquot_t *dqp,
207 char *func,
208 void *retaddr,
209 xfs_inode_t *ip)
210{
211 xfs_dquot_t *udqp = NULL;
212 xfs_ino_t ino = 0;
213
214 ASSERT(dqp->q_trace);
215 if (ip) {
216 ino = ip->i_ino;
217 udqp = ip->i_udquot;
218 }
219 ktrace_enter(dqp->q_trace,
220 (void *)(__psint_t)DQUOT_KTRACE_ENTRY,
221 (void *)func,
222 (void *)(__psint_t)dqp->q_nrefs,
223 (void *)(__psint_t)dqp->dq_flags,
224 (void *)(__psint_t)dqp->q_res_bcount,
225 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_bcount),
226 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_icount),
227 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_hardlimit),
228 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_softlimit),
229 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_hardlimit),
230 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_softlimit),
231 (void *)(__psint_t)be32_to_cpu(dqp->q_core.d_id),
232 (void *)(__psint_t)current_pid(),
233 (void *)(__psint_t)ino,
234 (void *)(__psint_t)retaddr,
235 (void *)(__psint_t)udqp);
236 return;
237}
238#endif
239
240
241/* 188/*
242 * If default limits are in force, push them into the dquot now. 189 * If default limits are in force, push them into the dquot now.
243 * We overwrite the dquot limits only if they are zero and this 190 * We overwrite the dquot limits only if they are zero and this
@@ -425,7 +372,8 @@ xfs_qm_dqalloc(
425 xfs_trans_t *tp = *tpp; 372 xfs_trans_t *tp = *tpp;
426 373
427 ASSERT(tp != NULL); 374 ASSERT(tp != NULL);
428 xfs_dqtrace_entry(dqp, "DQALLOC"); 375
376 trace_xfs_dqalloc(dqp);
429 377
430 /* 378 /*
431 * Initialize the bmap freelist prior to calling bmapi code. 379 * Initialize the bmap freelist prior to calling bmapi code.
@@ -612,7 +560,8 @@ xfs_qm_dqtobp(
612 * (in which case we already have the buf). 560 * (in which case we already have the buf).
613 */ 561 */
614 if (! newdquot) { 562 if (! newdquot) {
615 xfs_dqtrace_entry(dqp, "DQTOBP READBUF"); 563 trace_xfs_dqtobp_read(dqp);
564
616 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 565 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
617 dqp->q_blkno, 566 dqp->q_blkno,
618 XFS_QI_DQCHUNKLEN(mp), 567 XFS_QI_DQCHUNKLEN(mp),
@@ -670,11 +619,12 @@ xfs_qm_dqread(
670 619
671 ASSERT(tpp); 620 ASSERT(tpp);
672 621
622 trace_xfs_dqread(dqp);
623
673 /* 624 /*
674 * get a pointer to the on-disk dquot and the buffer containing it 625 * get a pointer to the on-disk dquot and the buffer containing it
675 * dqp already knows its own type (GROUP/USER). 626 * dqp already knows its own type (GROUP/USER).
676 */ 627 */
677 xfs_dqtrace_entry(dqp, "DQREAD");
678 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { 628 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
679 return (error); 629 return (error);
680 } 630 }
@@ -763,7 +713,7 @@ xfs_qm_idtodq(
763 * or if the dquot didn't exist on disk and we ask to 713 * or if the dquot didn't exist on disk and we ask to
764 * allocate (ENOENT). 714 * allocate (ENOENT).
765 */ 715 */
766 xfs_dqtrace_entry(dqp, "DQREAD FAIL"); 716 trace_xfs_dqread_fail(dqp);
767 cancelflags |= XFS_TRANS_ABORT; 717 cancelflags |= XFS_TRANS_ABORT;
768 goto error0; 718 goto error0;
769 } 719 }
@@ -817,7 +767,8 @@ xfs_qm_dqlookup(
817 * id can't be modified without the hashlock anyway. 767 * id can't be modified without the hashlock anyway.
818 */ 768 */
819 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { 769 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
820 xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP"); 770 trace_xfs_dqlookup_found(dqp);
771
821 /* 772 /*
822 * All in core dquots must be on the dqlist of mp 773 * All in core dquots must be on the dqlist of mp
823 */ 774 */
@@ -827,7 +778,7 @@ xfs_qm_dqlookup(
827 if (dqp->q_nrefs == 0) { 778 if (dqp->q_nrefs == 0) {
828 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp)); 779 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
829 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 780 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
830 xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT"); 781 trace_xfs_dqlookup_want(dqp);
831 782
832 /* 783 /*
833 * We may have raced with dqreclaim_one() 784 * We may have raced with dqreclaim_one()
@@ -857,8 +808,7 @@ xfs_qm_dqlookup(
857 /* 808 /*
858 * take it off the freelist 809 * take it off the freelist
859 */ 810 */
860 xfs_dqtrace_entry(dqp, 811 trace_xfs_dqlookup_freelist(dqp);
861 "DQLOOKUP: TAKEOFF FL");
862 XQM_FREELIST_REMOVE(dqp); 812 XQM_FREELIST_REMOVE(dqp);
863 /* xfs_qm_freelist_print(&(xfs_Gqm-> 813 /* xfs_qm_freelist_print(&(xfs_Gqm->
864 qm_dqfreelist), 814 qm_dqfreelist),
@@ -878,8 +828,7 @@ xfs_qm_dqlookup(
878 */ 828 */
879 ASSERT(mutex_is_locked(&qh->qh_lock)); 829 ASSERT(mutex_is_locked(&qh->qh_lock));
880 if (dqp->HL_PREVP != &qh->qh_next) { 830 if (dqp->HL_PREVP != &qh->qh_next) {
881 xfs_dqtrace_entry(dqp, 831 trace_xfs_dqlookup_move(dqp);
882 "DQLOOKUP: HASH MOVETOFRONT");
883 if ((d = dqp->HL_NEXT)) 832 if ((d = dqp->HL_NEXT))
884 d->HL_PREVP = dqp->HL_PREVP; 833 d->HL_PREVP = dqp->HL_PREVP;
885 *(dqp->HL_PREVP) = d; 834 *(dqp->HL_PREVP) = d;
@@ -889,7 +838,7 @@ xfs_qm_dqlookup(
889 dqp->HL_PREVP = &qh->qh_next; 838 dqp->HL_PREVP = &qh->qh_next;
890 qh->qh_next = dqp; 839 qh->qh_next = dqp;
891 } 840 }
892 xfs_dqtrace_entry(dqp, "LOOKUP END"); 841 trace_xfs_dqlookup_done(dqp);
893 *O_dqpp = dqp; 842 *O_dqpp = dqp;
894 ASSERT(mutex_is_locked(&qh->qh_lock)); 843 ASSERT(mutex_is_locked(&qh->qh_lock));
895 return (0); 844 return (0);
@@ -971,7 +920,7 @@ xfs_qm_dqget(
971 ASSERT(*O_dqpp); 920 ASSERT(*O_dqpp);
972 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); 921 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
973 mutex_unlock(&h->qh_lock); 922 mutex_unlock(&h->qh_lock);
974 xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)"); 923 trace_xfs_dqget_hit(*O_dqpp);
975 return (0); /* success */ 924 return (0); /* success */
976 } 925 }
977 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); 926 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
@@ -1104,7 +1053,7 @@ xfs_qm_dqget(
1104 mutex_unlock(&h->qh_lock); 1053 mutex_unlock(&h->qh_lock);
1105 dqret: 1054 dqret:
1106 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1055 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
1107 xfs_dqtrace_entry(dqp, "DQGET DONE"); 1056 trace_xfs_dqget_miss(dqp);
1108 *O_dqpp = dqp; 1057 *O_dqpp = dqp;
1109 return (0); 1058 return (0);
1110} 1059}
@@ -1124,7 +1073,8 @@ xfs_qm_dqput(
1124 1073
1125 ASSERT(dqp->q_nrefs > 0); 1074 ASSERT(dqp->q_nrefs > 0);
1126 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1075 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1127 xfs_dqtrace_entry(dqp, "DQPUT"); 1076
1077 trace_xfs_dqput(dqp);
1128 1078
1129 if (dqp->q_nrefs != 1) { 1079 if (dqp->q_nrefs != 1) {
1130 dqp->q_nrefs--; 1080 dqp->q_nrefs--;
@@ -1137,7 +1087,7 @@ xfs_qm_dqput(
1137 * in the right order; but try to get it out-of-order first 1087 * in the right order; but try to get it out-of-order first
1138 */ 1088 */
1139 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 1089 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
1140 xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT"); 1090 trace_xfs_dqput_wait(dqp);
1141 xfs_dqunlock(dqp); 1091 xfs_dqunlock(dqp);
1142 xfs_qm_freelist_lock(xfs_Gqm); 1092 xfs_qm_freelist_lock(xfs_Gqm);
1143 xfs_dqlock(dqp); 1093 xfs_dqlock(dqp);
@@ -1148,7 +1098,8 @@ xfs_qm_dqput(
1148 1098
1149 /* We can't depend on nrefs being == 1 here */ 1099 /* We can't depend on nrefs being == 1 here */
1150 if (--dqp->q_nrefs == 0) { 1100 if (--dqp->q_nrefs == 0) {
1151 xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST"); 1101 trace_xfs_dqput_free(dqp);
1102
1152 /* 1103 /*
1153 * insert at end of the freelist. 1104 * insert at end of the freelist.
1154 */ 1105 */
@@ -1196,7 +1147,7 @@ xfs_qm_dqrele(
1196 if (!dqp) 1147 if (!dqp)
1197 return; 1148 return;
1198 1149
1199 xfs_dqtrace_entry(dqp, "DQRELE"); 1150 trace_xfs_dqrele(dqp);
1200 1151
1201 xfs_dqlock(dqp); 1152 xfs_dqlock(dqp);
1202 /* 1153 /*
@@ -1229,7 +1180,7 @@ xfs_qm_dqflush(
1229 1180
1230 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1181 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1231 ASSERT(!completion_done(&dqp->q_flush)); 1182 ASSERT(!completion_done(&dqp->q_flush));
1232 xfs_dqtrace_entry(dqp, "DQFLUSH"); 1183 trace_xfs_dqflush(dqp);
1233 1184
1234 /* 1185 /*
1235 * If not dirty, or it's pinned and we are not supposed to 1186 * If not dirty, or it's pinned and we are not supposed to
@@ -1259,7 +1210,6 @@ xfs_qm_dqflush(
1259 * the ondisk-dquot has already been allocated for. 1210 * the ondisk-dquot has already been allocated for.
1260 */ 1211 */
1261 if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { 1212 if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
1262 xfs_dqtrace_entry(dqp, "DQTOBP FAIL");
1263 ASSERT(error != ENOENT); 1213 ASSERT(error != ENOENT);
1264 /* 1214 /*
1265 * Quotas could have gotten turned off (ESRCH) 1215 * Quotas could have gotten turned off (ESRCH)
@@ -1297,7 +1247,7 @@ xfs_qm_dqflush(
1297 * get stuck waiting in the write for too long. 1247 * get stuck waiting in the write for too long.
1298 */ 1248 */
1299 if (XFS_BUF_ISPINNED(bp)) { 1249 if (XFS_BUF_ISPINNED(bp)) {
1300 xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE"); 1250 trace_xfs_dqflush_force(dqp);
1301 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 1251 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
1302 } 1252 }
1303 1253
@@ -1308,7 +1258,9 @@ xfs_qm_dqflush(
1308 } else { 1258 } else {
1309 error = xfs_bwrite(mp, bp); 1259 error = xfs_bwrite(mp, bp);
1310 } 1260 }
1311 xfs_dqtrace_entry(dqp, "DQFLUSH END"); 1261
1262 trace_xfs_dqflush_done(dqp);
1263
1312 /* 1264 /*
1313 * dqp is still locked, but caller is free to unlock it now. 1265 * dqp is still locked, but caller is free to unlock it now.
1314 */ 1266 */
@@ -1483,7 +1435,7 @@ xfs_qm_dqpurge(
1483 */ 1435 */
1484 if (XFS_DQ_IS_DIRTY(dqp)) { 1436 if (XFS_DQ_IS_DIRTY(dqp)) {
1485 int error; 1437 int error;
1486 xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); 1438
1487 /* dqflush unlocks dqflock */ 1439 /* dqflush unlocks dqflock */
1488 /* 1440 /*
1489 * Given that dqpurge is a very rare occurrence, it is OK 1441 * Given that dqpurge is a very rare occurrence, it is OK
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 6533ead9b88..a0f7da586d1 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -85,9 +85,6 @@ typedef struct xfs_dquot {
85 struct completion q_flush; /* flush completion queue */ 85 struct completion q_flush; /* flush completion queue */
86 atomic_t q_pincount; /* dquot pin count */ 86 atomic_t q_pincount; /* dquot pin count */
87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ 87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
88#ifdef XFS_DQUOT_TRACE
89 struct ktrace *q_trace; /* trace header structure */
90#endif
91} xfs_dquot_t; 88} xfs_dquot_t;
92 89
93 90
@@ -98,7 +95,7 @@ typedef struct xfs_dquot {
98#define dq_flags q_lists.dqm_flags 95#define dq_flags q_lists.dqm_flags
99 96
100/* 97/*
101 * Lock hierachy for q_qlock: 98 * Lock hierarchy for q_qlock:
102 * XFS_QLOCK_NORMAL is the implicit default, 99 * XFS_QLOCK_NORMAL is the implicit default,
103 * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 100 * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
104 */ 101 */
@@ -144,24 +141,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
144 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ 141 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
145 (XFS_IS_OQUOTA_ON((d)->q_mount)))) 142 (XFS_IS_OQUOTA_ON((d)->q_mount))))
146 143
147#ifdef XFS_DQUOT_TRACE
148/*
149 * Dquot Tracing stuff.
150 */
151#define DQUOT_TRACE_SIZE 64
152#define DQUOT_KTRACE_ENTRY 1
153
154extern void __xfs_dqtrace_entry(xfs_dquot_t *dqp, char *func,
155 void *, xfs_inode_t *);
156#define xfs_dqtrace_entry_ino(a,b,ip) \
157 __xfs_dqtrace_entry((a), (b), (void*)__return_address, (ip))
158#define xfs_dqtrace_entry(a,b) \
159 __xfs_dqtrace_entry((a), (b), (void*)__return_address, NULL)
160#else
161#define xfs_dqtrace_entry(a,b)
162#define xfs_dqtrace_entry_ino(a,b,ip)
163#endif
164
165#ifdef QUOTADEBUG 144#ifdef QUOTADEBUG
166extern void xfs_qm_dqprint(xfs_dquot_t *); 145extern void xfs_qm_dqprint(xfs_dquot_t *);
167#else 146#else
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 45b1bfef738..9e627a8b5b0 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -47,6 +47,7 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_qm.h" 49#include "xfs_qm.h"
50#include "xfs_trace.h"
50 51
51/* 52/*
52 * The global quota manager. There is only one of these for the entire 53 * The global quota manager. There is only one of these for the entire
@@ -453,7 +454,7 @@ again:
453 xfs_dqunlock(dqp); 454 xfs_dqunlock(dqp);
454 continue; 455 continue;
455 } 456 }
456 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); 457
457 /* XXX a sentinel would be better */ 458 /* XXX a sentinel would be better */
458 recl = XFS_QI_MPLRECLAIMS(mp); 459 recl = XFS_QI_MPLRECLAIMS(mp);
459 if (!xfs_dqflock_nowait(dqp)) { 460 if (!xfs_dqflock_nowait(dqp)) {
@@ -651,7 +652,7 @@ xfs_qm_dqattach_one(
651 */ 652 */
652 dqp = *IO_idqpp; 653 dqp = *IO_idqpp;
653 if (dqp) { 654 if (dqp) {
654 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip"); 655 trace_xfs_dqattach_found(dqp);
655 return 0; 656 return 0;
656 } 657 }
657 658
@@ -704,7 +705,7 @@ xfs_qm_dqattach_one(
704 if (error) 705 if (error)
705 return error; 706 return error;
706 707
707 xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget"); 708 trace_xfs_dqattach_get(dqp);
708 709
709 /* 710 /*
710 * dqget may have dropped and re-acquired the ilock, but it guarantees 711 * dqget may have dropped and re-acquired the ilock, but it guarantees
@@ -890,15 +891,15 @@ xfs_qm_dqdetach(
890 if (!(ip->i_udquot || ip->i_gdquot)) 891 if (!(ip->i_udquot || ip->i_gdquot))
891 return; 892 return;
892 893
894 trace_xfs_dquot_dqdetach(ip);
895
893 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); 896 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
894 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); 897 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
895 if (ip->i_udquot) { 898 if (ip->i_udquot) {
896 xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
897 xfs_qm_dqrele(ip->i_udquot); 899 xfs_qm_dqrele(ip->i_udquot);
898 ip->i_udquot = NULL; 900 ip->i_udquot = NULL;
899 } 901 }
900 if (ip->i_gdquot) { 902 if (ip->i_gdquot) {
901 xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
902 xfs_qm_dqrele(ip->i_gdquot); 903 xfs_qm_dqrele(ip->i_gdquot);
903 ip->i_gdquot = NULL; 904 ip->i_gdquot = NULL;
904 } 905 }
@@ -977,7 +978,6 @@ xfs_qm_sync(
977 * across a disk write 978 * across a disk write
978 */ 979 */
979 xfs_qm_mplist_unlock(mp); 980 xfs_qm_mplist_unlock(mp);
980 xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
981 error = xfs_qm_dqflush(dqp, flush_flags); 981 error = xfs_qm_dqflush(dqp, flush_flags);
982 xfs_dqunlock(dqp); 982 xfs_dqunlock(dqp);
983 if (error && XFS_FORCED_SHUTDOWN(mp)) 983 if (error && XFS_FORCED_SHUTDOWN(mp))
@@ -1350,7 +1350,8 @@ xfs_qm_reset_dqcounts(
1350 xfs_disk_dquot_t *ddq; 1350 xfs_disk_dquot_t *ddq;
1351 int j; 1351 int j;
1352 1352
1353 xfs_buftrace("RESET DQUOTS", bp); 1353 trace_xfs_reset_dqcounts(bp, _RET_IP_);
1354
1354 /* 1355 /*
1355 * Reset all counters and timers. They'll be 1356 * Reset all counters and timers. They'll be
1356 * started afresh by xfs_qm_quotacheck. 1357 * started afresh by xfs_qm_quotacheck.
@@ -1543,7 +1544,9 @@ xfs_qm_quotacheck_dqadjust(
1543 xfs_qcnt_t rtblks) 1544 xfs_qcnt_t rtblks)
1544{ 1545{
1545 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1546 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1546 xfs_dqtrace_entry(dqp, "QCHECK DQADJUST"); 1547
1548 trace_xfs_dqadjust(dqp);
1549
1547 /* 1550 /*
1548 * Adjust the inode count and the block count to reflect this inode's 1551 * Adjust the inode count and the block count to reflect this inode's
1549 * resource usage. 1552 * resource usage.
@@ -1994,7 +1997,9 @@ xfs_qm_shake_freelist(
1994 */ 1997 */
1995 if (XFS_DQ_IS_DIRTY(dqp)) { 1998 if (XFS_DQ_IS_DIRTY(dqp)) {
1996 int error; 1999 int error;
1997 xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); 2000
2001 trace_xfs_dqshake_dirty(dqp);
2002
1998 /* 2003 /*
1999 * We flush it delayed write, so don't bother 2004 * We flush it delayed write, so don't bother
2000 * releasing the mplock. 2005 * releasing the mplock.
@@ -2038,7 +2043,9 @@ xfs_qm_shake_freelist(
2038 return nreclaimed; 2043 return nreclaimed;
2039 goto tryagain; 2044 goto tryagain;
2040 } 2045 }
2041 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING"); 2046
2047 trace_xfs_dqshake_unlink(dqp);
2048
2042#ifdef QUOTADEBUG 2049#ifdef QUOTADEBUG
2043 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n", 2050 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2044 dqp, be32_to_cpu(dqp->q_core.d_id)); 2051 dqp, be32_to_cpu(dqp->q_core.d_id));
@@ -2125,7 +2132,9 @@ xfs_qm_dqreclaim_one(void)
2125 */ 2132 */
2126 if (dqp->dq_flags & XFS_DQ_WANT) { 2133 if (dqp->dq_flags & XFS_DQ_WANT) {
2127 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); 2134 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2128 xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT"); 2135
2136 trace_xfs_dqreclaim_want(dqp);
2137
2129 xfs_dqunlock(dqp); 2138 xfs_dqunlock(dqp);
2130 xfs_qm_freelist_unlock(xfs_Gqm); 2139 xfs_qm_freelist_unlock(xfs_Gqm);
2131 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 2140 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
@@ -2171,7 +2180,9 @@ xfs_qm_dqreclaim_one(void)
2171 */ 2180 */
2172 if (XFS_DQ_IS_DIRTY(dqp)) { 2181 if (XFS_DQ_IS_DIRTY(dqp)) {
2173 int error; 2182 int error;
2174 xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); 2183
2184 trace_xfs_dqreclaim_dirty(dqp);
2185
2175 /* 2186 /*
2176 * We flush it delayed write, so don't bother 2187 * We flush it delayed write, so don't bother
2177 * releasing the freelist lock. 2188 * releasing the freelist lock.
@@ -2194,8 +2205,9 @@ xfs_qm_dqreclaim_one(void)
2194 if (!mutex_trylock(&dqp->q_hash->qh_lock)) 2205 if (!mutex_trylock(&dqp->q_hash->qh_lock))
2195 goto mplistunlock; 2206 goto mplistunlock;
2196 2207
2208 trace_xfs_dqreclaim_unlink(dqp);
2209
2197 ASSERT(dqp->q_nrefs == 0); 2210 ASSERT(dqp->q_nrefs == 0);
2198 xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2199 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); 2211 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2200 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); 2212 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2201 XQM_FREELIST_REMOVE(dqp); 2213 XQM_FREELIST_REMOVE(dqp);
@@ -2430,7 +2442,7 @@ xfs_qm_vop_dqalloc(
2430 } 2442 }
2431 } 2443 }
2432 if (uq) 2444 if (uq)
2433 xfs_dqtrace_entry_ino(uq, "DQALLOC", ip); 2445 trace_xfs_dquot_dqalloc(ip);
2434 2446
2435 xfs_iunlock(ip, lockflags); 2447 xfs_iunlock(ip, lockflags);
2436 if (O_udqpp) 2448 if (O_udqpp)
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d1a3b98a6e..873e07e2907 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -49,6 +49,7 @@
49#include "xfs_buf_item.h" 49#include "xfs_buf_item.h"
50#include "xfs_utils.h" 50#include "xfs_utils.h"
51#include "xfs_qm.h" 51#include "xfs_qm.h"
52#include "xfs_trace.h"
52 53
53#ifdef DEBUG 54#ifdef DEBUG
54# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args) 55# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args)
@@ -496,7 +497,6 @@ xfs_qm_scall_setqlim(
496 ASSERT(error != ENOENT); 497 ASSERT(error != ENOENT);
497 return (error); 498 return (error);
498 } 499 }
499 xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET");
500 xfs_trans_dqjoin(tp, dqp); 500 xfs_trans_dqjoin(tp, dqp);
501 ddq = &dqp->q_core; 501 ddq = &dqp->q_core;
502 502
@@ -602,7 +602,6 @@ xfs_qm_scall_setqlim(
602 dqp->dq_flags |= XFS_DQ_DIRTY; 602 dqp->dq_flags |= XFS_DQ_DIRTY;
603 xfs_trans_log_dquot(tp, dqp); 603 xfs_trans_log_dquot(tp, dqp);
604 604
605 xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
606 error = xfs_trans_commit(tp, 0); 605 error = xfs_trans_commit(tp, 0);
607 xfs_qm_dqprint(dqp); 606 xfs_qm_dqprint(dqp);
608 xfs_qm_dqrele(dqp); 607 xfs_qm_dqrele(dqp);
@@ -630,7 +629,6 @@ xfs_qm_scall_getquota(
630 return (error); 629 return (error);
631 } 630 }
632 631
633 xfs_dqtrace_entry(dqp, "Q_GETQUOTA SUCCESS");
634 /* 632 /*
635 * If everything's NULL, this dquot doesn't quite exist as far as 633 * If everything's NULL, this dquot doesn't quite exist as far as
636 * our utility programs are concerned. 634 * our utility programs are concerned.
@@ -893,7 +891,7 @@ xfs_qm_dqrele_all_inodes(
893 uint flags) 891 uint flags)
894{ 892{
895 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
896 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
897} 895}
898 896
899/*------------------------------------------------------------------------*/ 897/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 6f4fd37c67a..d2d20462fd4 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -41,10 +41,6 @@ extern void assfail(char *expr, char *f, int l);
41# define STATIC static noinline 41# define STATIC static noinline
42#endif 42#endif
43 43
44#ifndef STATIC_INLINE
45# define STATIC_INLINE static inline
46#endif
47
48#else /* DEBUG */ 44#else /* DEBUG */
49 45
50#define ASSERT(expr) \ 46#define ASSERT(expr) \
@@ -54,19 +50,5 @@ extern void assfail(char *expr, char *f, int l);
54# define STATIC noinline 50# define STATIC noinline
55#endif 51#endif
56 52
57/*
58 * We stop inlining of inline functions in debug mode.
59 * Unfortunately, this means static inline in header files
60 * get multiple definitions, so they need to remain static.
61 * This then gives tonnes of warnings about unused but defined
62 * functions, so we need to add the unused attribute to prevent
63 * these spurious warnings.
64 */
65#ifndef STATIC_INLINE
66# define STATIC_INLINE static __attribute__ ((unused)) noinline
67#endif
68
69#endif /* DEBUG */ 53#endif /* DEBUG */
70
71
72#endif /* __XFS_SUPPORT_DEBUG_H__ */ 54#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
deleted file mode 100644
index 2d494c26717..00000000000
--- a/fs/xfs/support/ktrace.c
+++ /dev/null
@@ -1,323 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <xfs.h>
19
20static kmem_zone_t *ktrace_hdr_zone;
21static kmem_zone_t *ktrace_ent_zone;
22static int ktrace_zentries;
23
24void __init
25ktrace_init(int zentries)
26{
27 ktrace_zentries = roundup_pow_of_two(zentries);
28
29 ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t),
30 "ktrace_hdr");
31 ASSERT(ktrace_hdr_zone);
32
33 ktrace_ent_zone = kmem_zone_init(ktrace_zentries
34 * sizeof(ktrace_entry_t),
35 "ktrace_ent");
36 ASSERT(ktrace_ent_zone);
37}
38
39void __exit
40ktrace_uninit(void)
41{
42 kmem_zone_destroy(ktrace_hdr_zone);
43 kmem_zone_destroy(ktrace_ent_zone);
44}
45
46/*
47 * ktrace_alloc()
48 *
49 * Allocate a ktrace header and enough buffering for the given
50 * number of entries. Round the number of entries up to a
51 * power of 2 so we can do fast masking to get the index from
52 * the atomic index counter.
53 */
54ktrace_t *
55ktrace_alloc(int nentries, unsigned int __nocast sleep)
56{
57 ktrace_t *ktp;
58 ktrace_entry_t *ktep;
59 int entries;
60
61 ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep);
62
63 if (ktp == (ktrace_t*)NULL) {
64 /*
65 * KM_SLEEP callers don't expect failure.
66 */
67 if (sleep & KM_SLEEP)
68 panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
69
70 return NULL;
71 }
72
73 /*
74 * Special treatment for buffers with the ktrace_zentries entries
75 */
76 entries = roundup_pow_of_two(nentries);
77 if (entries == ktrace_zentries) {
78 ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone,
79 sleep);
80 } else {
81 ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)),
82 sleep | KM_LARGE);
83 }
84
85 if (ktep == NULL) {
86 /*
87 * KM_SLEEP callers don't expect failure.
88 */
89 if (sleep & KM_SLEEP)
90 panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
91
92 kmem_free(ktp);
93
94 return NULL;
95 }
96
97 ktp->kt_entries = ktep;
98 ktp->kt_nentries = entries;
99 ASSERT(is_power_of_2(entries));
100 ktp->kt_index_mask = entries - 1;
101 atomic_set(&ktp->kt_index, 0);
102 ktp->kt_rollover = 0;
103 return ktp;
104}
105
106
107/*
108 * ktrace_free()
109 *
110 * Free up the ktrace header and buffer. It is up to the caller
111 * to ensure that no-one is referencing it.
112 */
113void
114ktrace_free(ktrace_t *ktp)
115{
116 if (ktp == (ktrace_t *)NULL)
117 return;
118
119 /*
120 * Special treatment for the Vnode trace buffer.
121 */
122 if (ktp->kt_nentries == ktrace_zentries)
123 kmem_zone_free(ktrace_ent_zone, ktp->kt_entries);
124 else
125 kmem_free(ktp->kt_entries);
126
127 kmem_zone_free(ktrace_hdr_zone, ktp);
128}
129
130
131/*
132 * Enter the given values into the "next" entry in the trace buffer.
133 * kt_index is always the index of the next entry to be filled.
134 */
135void
136ktrace_enter(
137 ktrace_t *ktp,
138 void *val0,
139 void *val1,
140 void *val2,
141 void *val3,
142 void *val4,
143 void *val5,
144 void *val6,
145 void *val7,
146 void *val8,
147 void *val9,
148 void *val10,
149 void *val11,
150 void *val12,
151 void *val13,
152 void *val14,
153 void *val15)
154{
155 int index;
156 ktrace_entry_t *ktep;
157
158 ASSERT(ktp != NULL);
159
160 /*
161 * Grab an entry by pushing the index up to the next one.
162 */
163 index = atomic_add_return(1, &ktp->kt_index);
164 index = (index - 1) & ktp->kt_index_mask;
165 if (!ktp->kt_rollover && index == ktp->kt_nentries - 1)
166 ktp->kt_rollover = 1;
167
168 ASSERT((index >= 0) && (index < ktp->kt_nentries));
169
170 ktep = &(ktp->kt_entries[index]);
171
172 ktep->val[0] = val0;
173 ktep->val[1] = val1;
174 ktep->val[2] = val2;
175 ktep->val[3] = val3;
176 ktep->val[4] = val4;
177 ktep->val[5] = val5;
178 ktep->val[6] = val6;
179 ktep->val[7] = val7;
180 ktep->val[8] = val8;
181 ktep->val[9] = val9;
182 ktep->val[10] = val10;
183 ktep->val[11] = val11;
184 ktep->val[12] = val12;
185 ktep->val[13] = val13;
186 ktep->val[14] = val14;
187 ktep->val[15] = val15;
188}
189
190/*
191 * Return the number of entries in the trace buffer.
192 */
193int
194ktrace_nentries(
195 ktrace_t *ktp)
196{
197 int index;
198 if (ktp == NULL)
199 return 0;
200
201 index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
202 return (ktp->kt_rollover ? ktp->kt_nentries : index);
203}
204
205/*
206 * ktrace_first()
207 *
208 * This is used to find the start of the trace buffer.
209 * In conjunction with ktrace_next() it can be used to
210 * iterate through the entire trace buffer. This code does
211 * not do any locking because it is assumed that it is called
212 * from the debugger.
213 *
214 * The caller must pass in a pointer to a ktrace_snap
215 * structure in which we will keep some state used to
216 * iterate through the buffer. This state must not touched
217 * by any code outside of this module.
218 */
219ktrace_entry_t *
220ktrace_first(ktrace_t *ktp, ktrace_snap_t *ktsp)
221{
222 ktrace_entry_t *ktep;
223 int index;
224 int nentries;
225
226 if (ktp->kt_rollover)
227 index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
228 else
229 index = 0;
230
231 ktsp->ks_start = index;
232 ktep = &(ktp->kt_entries[index]);
233
234 nentries = ktrace_nentries(ktp);
235 index++;
236 if (index < nentries) {
237 ktsp->ks_index = index;
238 } else {
239 ktsp->ks_index = 0;
240 if (index > nentries)
241 ktep = NULL;
242 }
243 return ktep;
244}
245
246/*
247 * ktrace_next()
248 *
249 * This is used to iterate through the entries of the given
250 * trace buffer. The caller must pass in the ktrace_snap_t
251 * structure initialized by ktrace_first(). The return value
252 * will be either a pointer to the next ktrace_entry or NULL
253 * if all of the entries have been traversed.
254 */
255ktrace_entry_t *
256ktrace_next(
257 ktrace_t *ktp,
258 ktrace_snap_t *ktsp)
259{
260 int index;
261 ktrace_entry_t *ktep;
262
263 index = ktsp->ks_index;
264 if (index == ktsp->ks_start) {
265 ktep = NULL;
266 } else {
267 ktep = &ktp->kt_entries[index];
268 }
269
270 index++;
271 if (index == ktrace_nentries(ktp)) {
272 ktsp->ks_index = 0;
273 } else {
274 ktsp->ks_index = index;
275 }
276
277 return ktep;
278}
279
280/*
281 * ktrace_skip()
282 *
283 * Skip the next "count" entries and return the entry after that.
284 * Return NULL if this causes us to iterate past the beginning again.
285 */
286ktrace_entry_t *
287ktrace_skip(
288 ktrace_t *ktp,
289 int count,
290 ktrace_snap_t *ktsp)
291{
292 int index;
293 int new_index;
294 ktrace_entry_t *ktep;
295 int nentries = ktrace_nentries(ktp);
296
297 index = ktsp->ks_index;
298 new_index = index + count;
299 while (new_index >= nentries) {
300 new_index -= nentries;
301 }
302 if (index == ktsp->ks_start) {
303 /*
304 * We've iterated around to the start, so we're done.
305 */
306 ktep = NULL;
307 } else if ((new_index < index) && (index < ktsp->ks_index)) {
308 /*
309 * We've skipped past the start again, so we're done.
310 */
311 ktep = NULL;
312 ktsp->ks_index = ktsp->ks_start;
313 } else {
314 ktep = &(ktp->kt_entries[new_index]);
315 new_index++;
316 if (new_index == nentries) {
317 ktsp->ks_index = 0;
318 } else {
319 ktsp->ks_index = new_index;
320 }
321 }
322 return ktep;
323}
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
deleted file mode 100644
index 741d6947ca6..00000000000
--- a/fs/xfs/support/ktrace.h
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_KTRACE_H__
19#define __XFS_SUPPORT_KTRACE_H__
20
21/*
22 * Trace buffer entry structure.
23 */
24typedef struct ktrace_entry {
25 void *val[16];
26} ktrace_entry_t;
27
28/*
29 * Trace buffer header structure.
30 */
31typedef struct ktrace {
32 int kt_nentries; /* number of entries in trace buf */
33 atomic_t kt_index; /* current index in entries */
34 unsigned int kt_index_mask;
35 int kt_rollover;
36 ktrace_entry_t *kt_entries; /* buffer of entries */
37} ktrace_t;
38
39/*
40 * Trace buffer snapshot structure.
41 */
42typedef struct ktrace_snap {
43 int ks_start; /* kt_index at time of snap */
44 int ks_index; /* current index */
45} ktrace_snap_t;
46
47
48#ifdef CONFIG_XFS_TRACE
49
50extern void ktrace_init(int zentries);
51extern void ktrace_uninit(void);
52
53extern ktrace_t *ktrace_alloc(int, unsigned int __nocast);
54extern void ktrace_free(ktrace_t *);
55
56extern void ktrace_enter(
57 ktrace_t *,
58 void *,
59 void *,
60 void *,
61 void *,
62 void *,
63 void *,
64 void *,
65 void *,
66 void *,
67 void *,
68 void *,
69 void *,
70 void *,
71 void *,
72 void *,
73 void *);
74
75extern ktrace_entry_t *ktrace_first(ktrace_t *, ktrace_snap_t *);
76extern int ktrace_nentries(ktrace_t *);
77extern ktrace_entry_t *ktrace_next(ktrace_t *, ktrace_snap_t *);
78extern ktrace_entry_t *ktrace_skip(ktrace_t *, int, ktrace_snap_t *);
79
80#else
81#define ktrace_init(x) do { } while (0)
82#define ktrace_uninit() do { } while (0)
83#endif /* CONFIG_XFS_TRACE */
84
85#endif /* __XFS_SUPPORT_KTRACE_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 17254b529c5..5ad8ad3a1dc 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -25,21 +25,5 @@
25/* #define QUOTADEBUG 1 */ 25/* #define QUOTADEBUG 1 */
26#endif 26#endif
27 27
28#ifdef CONFIG_XFS_TRACE
29#define XFS_ALLOC_TRACE 1
30#define XFS_ATTR_TRACE 1
31#define XFS_BLI_TRACE 1
32#define XFS_BMAP_TRACE 1
33#define XFS_BTREE_TRACE 1
34#define XFS_DIR2_TRACE 1
35#define XFS_DQUOT_TRACE 1
36#define XFS_ILOCK_TRACE 1
37#define XFS_LOG_TRACE 1
38#define XFS_RW_TRACE 1
39#define XFS_BUF_TRACE 1
40#define XFS_INODE_TRACE 1
41#define XFS_FILESTREAMS_TRACE 1
42#endif
43
44#include <linux-2.6/xfs_linux.h> 28#include <linux-2.6/xfs_linux.h>
45#endif /* __XFS_H__ */ 29#endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 947b150df8e..00fd357c3e4 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
49extern int posix_acl_access_exists(struct inode *inode); 49extern int posix_acl_access_exists(struct inode *inode);
50extern int posix_acl_default_exists(struct inode *inode); 50extern int posix_acl_default_exists(struct inode *inode);
51 51
52extern struct xattr_handler xfs_xattr_system_handler; 52extern struct xattr_handler xfs_xattr_acl_access_handler;
53extern struct xattr_handler xfs_xattr_acl_default_handler;
53#else 54#else
54# define xfs_check_acl NULL 55# define xfs_check_acl NULL
55# define xfs_get_acl(inode, type) NULL 56# define xfs_get_acl(inode, type) NULL
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index a5d54bf4931..6702bd86581 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -86,6 +86,20 @@ typedef struct xfs_agf {
86#define XFS_AGF_NUM_BITS 12 86#define XFS_AGF_NUM_BITS 12
87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) 87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
88 88
89#define XFS_AGF_FLAGS \
90 { XFS_AGF_MAGICNUM, "MAGICNUM" }, \
91 { XFS_AGF_VERSIONNUM, "VERSIONNUM" }, \
92 { XFS_AGF_SEQNO, "SEQNO" }, \
93 { XFS_AGF_LENGTH, "LENGTH" }, \
94 { XFS_AGF_ROOTS, "ROOTS" }, \
95 { XFS_AGF_LEVELS, "LEVELS" }, \
96 { XFS_AGF_FLFIRST, "FLFIRST" }, \
97 { XFS_AGF_FLLAST, "FLLAST" }, \
98 { XFS_AGF_FLCOUNT, "FLCOUNT" }, \
99 { XFS_AGF_FREEBLKS, "FREEBLKS" }, \
100 { XFS_AGF_LONGEST, "LONGEST" }, \
101 { XFS_AGF_BTREEBLKS, "BTREEBLKS" }
102
89/* disk block (xfs_daddr_t) in the AG */ 103/* disk block (xfs_daddr_t) in the AG */
90#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) 104#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
91#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) 105#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 2cf944eb796..275b1f4f943 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -38,6 +38,7 @@
38#include "xfs_ialloc.h" 38#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 39#include "xfs_alloc.h"
40#include "xfs_error.h" 40#include "xfs_error.h"
41#include "xfs_trace.h"
41 42
42 43
43#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 44#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
@@ -51,30 +52,6 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
51 xfs_agblock_t bno, 52 xfs_agblock_t bno,
52 xfs_extlen_t len); 53 xfs_extlen_t len);
53 54
54#if defined(XFS_ALLOC_TRACE)
55ktrace_t *xfs_alloc_trace_buf;
56
57#define TRACE_ALLOC(s,a) \
58 xfs_alloc_trace_alloc(__func__, s, a, __LINE__)
59#define TRACE_FREE(s,a,b,x,f) \
60 xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__)
61#define TRACE_MODAGF(s,a,f) \
62 xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__)
63#define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \
64 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
65#define TRACE_UNBUSY(__func__,s,ag,sl,tp) \
66 xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
67#define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \
68 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
69#else
70#define TRACE_ALLOC(s,a)
71#define TRACE_FREE(s,a,b,x,f)
72#define TRACE_MODAGF(s,a,f)
73#define TRACE_BUSY(s,a,ag,agb,l,sl,tp)
74#define TRACE_UNBUSY(fname,s,ag,sl,tp)
75#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp)
76#endif /* XFS_ALLOC_TRACE */
77
78/* 55/*
79 * Prototypes for per-ag allocation routines 56 * Prototypes for per-ag allocation routines
80 */ 57 */
@@ -498,124 +475,6 @@ xfs_alloc_read_agfl(
498 return 0; 475 return 0;
499} 476}
500 477
501#if defined(XFS_ALLOC_TRACE)
502/*
503 * Add an allocation trace entry for an alloc call.
504 */
505STATIC void
506xfs_alloc_trace_alloc(
507 const char *name, /* function tag string */
508 char *str, /* additional string */
509 xfs_alloc_arg_t *args, /* allocation argument structure */
510 int line) /* source line number */
511{
512 ktrace_enter(xfs_alloc_trace_buf,
513 (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
514 (void *)name,
515 (void *)str,
516 (void *)args->mp,
517 (void *)(__psunsigned_t)args->agno,
518 (void *)(__psunsigned_t)args->agbno,
519 (void *)(__psunsigned_t)args->minlen,
520 (void *)(__psunsigned_t)args->maxlen,
521 (void *)(__psunsigned_t)args->mod,
522 (void *)(__psunsigned_t)args->prod,
523 (void *)(__psunsigned_t)args->minleft,
524 (void *)(__psunsigned_t)args->total,
525 (void *)(__psunsigned_t)args->alignment,
526 (void *)(__psunsigned_t)args->len,
527 (void *)((((__psint_t)args->type) << 16) |
528 (__psint_t)args->otype),
529 (void *)(__psint_t)((args->wasdel << 3) |
530 (args->wasfromfl << 2) |
531 (args->isfl << 1) |
532 (args->userdata << 0)));
533}
534
535/*
536 * Add an allocation trace entry for a free call.
537 */
538STATIC void
539xfs_alloc_trace_free(
540 const char *name, /* function tag string */
541 char *str, /* additional string */
542 xfs_mount_t *mp, /* file system mount point */
543 xfs_agnumber_t agno, /* allocation group number */
544 xfs_agblock_t agbno, /* a.g. relative block number */
545 xfs_extlen_t len, /* length of extent */
546 int isfl, /* set if is freelist allocation/free */
547 int line) /* source line number */
548{
549 ktrace_enter(xfs_alloc_trace_buf,
550 (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
551 (void *)name,
552 (void *)str,
553 (void *)mp,
554 (void *)(__psunsigned_t)agno,
555 (void *)(__psunsigned_t)agbno,
556 (void *)(__psunsigned_t)len,
557 (void *)(__psint_t)isfl,
558 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
559}
560
561/*
562 * Add an allocation trace entry for modifying an agf.
563 */
564STATIC void
565xfs_alloc_trace_modagf(
566 const char *name, /* function tag string */
567 char *str, /* additional string */
568 xfs_mount_t *mp, /* file system mount point */
569 xfs_agf_t *agf, /* new agf value */
570 int flags, /* logging flags for agf */
571 int line) /* source line number */
572{
573 ktrace_enter(xfs_alloc_trace_buf,
574 (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
575 (void *)name,
576 (void *)str,
577 (void *)mp,
578 (void *)(__psint_t)flags,
579 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno),
580 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_length),
581 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
582 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
583 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
584 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
585 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst),
586 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast),
587 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount),
588 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks),
589 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest));
590}
591
592STATIC void
593xfs_alloc_trace_busy(
594 const char *name, /* function tag string */
595 char *str, /* additional string */
596 xfs_mount_t *mp, /* file system mount point */
597 xfs_agnumber_t agno, /* allocation group number */
598 xfs_agblock_t agbno, /* a.g. relative block number */
599 xfs_extlen_t len, /* length of extent */
600 int slot, /* perag Busy slot */
601 xfs_trans_t *tp,
602 int trtype, /* type: add, delete, search */
603 int line) /* source line number */
604{
605 ktrace_enter(xfs_alloc_trace_buf,
606 (void *)(__psint_t)(trtype | (line << 16)),
607 (void *)name,
608 (void *)str,
609 (void *)mp,
610 (void *)(__psunsigned_t)agno,
611 (void *)(__psunsigned_t)agbno,
612 (void *)(__psunsigned_t)len,
613 (void *)(__psint_t)slot,
614 (void *)tp,
615 NULL, NULL, NULL, NULL, NULL, NULL, NULL);
616}
617#endif /* XFS_ALLOC_TRACE */
618
619/* 478/*
620 * Allocation group level functions. 479 * Allocation group level functions.
621 */ 480 */
@@ -665,9 +524,6 @@ xfs_alloc_ag_vextent(
665 */ 524 */
666 if (args->agbno != NULLAGBLOCK) { 525 if (args->agbno != NULLAGBLOCK) {
667 xfs_agf_t *agf; /* allocation group freelist header */ 526 xfs_agf_t *agf; /* allocation group freelist header */
668#ifdef XFS_ALLOC_TRACE
669 xfs_mount_t *mp = args->mp;
670#endif
671 long slen = (long)args->len; 527 long slen = (long)args->len;
672 528
673 ASSERT(args->len >= args->minlen && args->len <= args->maxlen); 529 ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
@@ -682,7 +538,6 @@ xfs_alloc_ag_vextent(
682 args->pag->pagf_freeblks -= args->len; 538 args->pag->pagf_freeblks -= args->len;
683 ASSERT(be32_to_cpu(agf->agf_freeblks) <= 539 ASSERT(be32_to_cpu(agf->agf_freeblks) <=
684 be32_to_cpu(agf->agf_length)); 540 be32_to_cpu(agf->agf_length));
685 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
686 xfs_alloc_log_agf(args->tp, args->agbp, 541 xfs_alloc_log_agf(args->tp, args->agbp,
687 XFS_AGF_FREEBLKS); 542 XFS_AGF_FREEBLKS);
688 /* search the busylist for these blocks */ 543 /* search the busylist for these blocks */
@@ -792,13 +647,14 @@ xfs_alloc_ag_vextent_exact(
792 } 647 }
793 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 648 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
794 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 649 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
795 TRACE_ALLOC("normal", args); 650
651 trace_xfs_alloc_exact_done(args);
796 args->wasfromfl = 0; 652 args->wasfromfl = 0;
797 return 0; 653 return 0;
798 654
799error0: 655error0:
800 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 656 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
801 TRACE_ALLOC("error", args); 657 trace_xfs_alloc_exact_error(args);
802 return error; 658 return error;
803} 659}
804 660
@@ -958,7 +814,7 @@ xfs_alloc_ag_vextent_near(
958 args->len = blen; 814 args->len = blen;
959 if (!xfs_alloc_fix_minleft(args)) { 815 if (!xfs_alloc_fix_minleft(args)) {
960 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 816 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
961 TRACE_ALLOC("nominleft", args); 817 trace_xfs_alloc_near_nominleft(args);
962 return 0; 818 return 0;
963 } 819 }
964 blen = args->len; 820 blen = args->len;
@@ -981,7 +837,8 @@ xfs_alloc_ag_vextent_near(
981 goto error0; 837 goto error0;
982 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 838 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
983 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 839 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
984 TRACE_ALLOC("first", args); 840
841 trace_xfs_alloc_near_first(args);
985 return 0; 842 return 0;
986 } 843 }
987 /* 844 /*
@@ -1272,7 +1129,7 @@ xfs_alloc_ag_vextent_near(
1272 * If we couldn't get anything, give up. 1129 * If we couldn't get anything, give up.
1273 */ 1130 */
1274 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1131 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1275 TRACE_ALLOC("neither", args); 1132 trace_xfs_alloc_size_neither(args);
1276 args->agbno = NULLAGBLOCK; 1133 args->agbno = NULLAGBLOCK;
1277 return 0; 1134 return 0;
1278 } 1135 }
@@ -1299,7 +1156,7 @@ xfs_alloc_ag_vextent_near(
1299 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1156 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1300 xfs_alloc_fix_len(args); 1157 xfs_alloc_fix_len(args);
1301 if (!xfs_alloc_fix_minleft(args)) { 1158 if (!xfs_alloc_fix_minleft(args)) {
1302 TRACE_ALLOC("nominleft", args); 1159 trace_xfs_alloc_near_nominleft(args);
1303 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1160 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1304 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1161 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1305 return 0; 1162 return 0;
@@ -1314,13 +1171,18 @@ xfs_alloc_ag_vextent_near(
1314 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1171 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1315 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1172 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1316 goto error0; 1173 goto error0;
1317 TRACE_ALLOC(j ? "gt" : "lt", args); 1174
1175 if (j)
1176 trace_xfs_alloc_near_greater(args);
1177 else
1178 trace_xfs_alloc_near_lesser(args);
1179
1318 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1180 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1319 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1181 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1320 return 0; 1182 return 0;
1321 1183
1322 error0: 1184 error0:
1323 TRACE_ALLOC("error", args); 1185 trace_xfs_alloc_near_error(args);
1324 if (cnt_cur != NULL) 1186 if (cnt_cur != NULL)
1325 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1187 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1326 if (bno_cur_lt != NULL) 1188 if (bno_cur_lt != NULL)
@@ -1371,7 +1233,7 @@ xfs_alloc_ag_vextent_size(
1371 goto error0; 1233 goto error0;
1372 if (i == 0 || flen == 0) { 1234 if (i == 0 || flen == 0) {
1373 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1235 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1374 TRACE_ALLOC("noentry", args); 1236 trace_xfs_alloc_size_noentry(args);
1375 return 0; 1237 return 0;
1376 } 1238 }
1377 ASSERT(i == 1); 1239 ASSERT(i == 1);
@@ -1448,7 +1310,7 @@ xfs_alloc_ag_vextent_size(
1448 xfs_alloc_fix_len(args); 1310 xfs_alloc_fix_len(args);
1449 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1311 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
1450 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1312 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1451 TRACE_ALLOC("nominleft", args); 1313 trace_xfs_alloc_size_nominleft(args);
1452 args->agbno = NULLAGBLOCK; 1314 args->agbno = NULLAGBLOCK;
1453 return 0; 1315 return 0;
1454 } 1316 }
@@ -1471,11 +1333,11 @@ xfs_alloc_ag_vextent_size(
1471 args->agbno + args->len <= 1333 args->agbno + args->len <=
1472 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1334 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1473 error0); 1335 error0);
1474 TRACE_ALLOC("normal", args); 1336 trace_xfs_alloc_size_done(args);
1475 return 0; 1337 return 0;
1476 1338
1477error0: 1339error0:
1478 TRACE_ALLOC("error", args); 1340 trace_xfs_alloc_size_error(args);
1479 if (cnt_cur) 1341 if (cnt_cur)
1480 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1342 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1481 if (bno_cur) 1343 if (bno_cur)
@@ -1534,7 +1396,7 @@ xfs_alloc_ag_vextent_small(
1534 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1396 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1535 error0); 1397 error0);
1536 args->wasfromfl = 1; 1398 args->wasfromfl = 1;
1537 TRACE_ALLOC("freelist", args); 1399 trace_xfs_alloc_small_freelist(args);
1538 *stat = 0; 1400 *stat = 0;
1539 return 0; 1401 return 0;
1540 } 1402 }
@@ -1556,17 +1418,17 @@ xfs_alloc_ag_vextent_small(
1556 */ 1418 */
1557 if (flen < args->minlen) { 1419 if (flen < args->minlen) {
1558 args->agbno = NULLAGBLOCK; 1420 args->agbno = NULLAGBLOCK;
1559 TRACE_ALLOC("notenough", args); 1421 trace_xfs_alloc_small_notenough(args);
1560 flen = 0; 1422 flen = 0;
1561 } 1423 }
1562 *fbnop = fbno; 1424 *fbnop = fbno;
1563 *flenp = flen; 1425 *flenp = flen;
1564 *stat = 1; 1426 *stat = 1;
1565 TRACE_ALLOC("normal", args); 1427 trace_xfs_alloc_small_done(args);
1566 return 0; 1428 return 0;
1567 1429
1568error0: 1430error0:
1569 TRACE_ALLOC("error", args); 1431 trace_xfs_alloc_small_error(args);
1570 return error; 1432 return error;
1571} 1433}
1572 1434
@@ -1809,17 +1671,14 @@ xfs_free_ag_extent(
1809 be32_to_cpu(agf->agf_freeblks) <= 1671 be32_to_cpu(agf->agf_freeblks) <=
1810 be32_to_cpu(agf->agf_length), 1672 be32_to_cpu(agf->agf_length),
1811 error0); 1673 error0);
1812 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
1813 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 1674 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1814 if (!isfl) 1675 if (!isfl)
1815 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); 1676 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1816 XFS_STATS_INC(xs_freex); 1677 XFS_STATS_INC(xs_freex);
1817 XFS_STATS_ADD(xs_freeb, len); 1678 XFS_STATS_ADD(xs_freeb, len);
1818 } 1679 }
1819 TRACE_FREE(haveleft ? 1680
1820 (haveright ? "both" : "left") : 1681 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1821 (haveright ? "right" : "none"),
1822 agno, bno, len, isfl);
1823 1682
1824 /* 1683 /*
1825 * Since blocks move to the free list without the coordination 1684 * Since blocks move to the free list without the coordination
@@ -1836,7 +1695,7 @@ xfs_free_ag_extent(
1836 return 0; 1695 return 0;
1837 1696
1838 error0: 1697 error0:
1839 TRACE_FREE("error", agno, bno, len, isfl); 1698 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1840 if (bno_cur) 1699 if (bno_cur)
1841 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1700 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1842 if (cnt_cur) 1701 if (cnt_cur)
@@ -2122,7 +1981,6 @@ xfs_alloc_get_freelist(
2122 logflags |= XFS_AGF_BTREEBLKS; 1981 logflags |= XFS_AGF_BTREEBLKS;
2123 } 1982 }
2124 1983
2125 TRACE_MODAGF(NULL, agf, logflags);
2126 xfs_alloc_log_agf(tp, agbp, logflags); 1984 xfs_alloc_log_agf(tp, agbp, logflags);
2127 *bnop = bno; 1985 *bnop = bno;
2128 1986
@@ -2165,6 +2023,8 @@ xfs_alloc_log_agf(
2165 sizeof(xfs_agf_t) 2023 sizeof(xfs_agf_t)
2166 }; 2024 };
2167 2025
2026 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2027
2168 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2028 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2169 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2029 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2170} 2030}
@@ -2230,13 +2090,11 @@ xfs_alloc_put_freelist(
2230 logflags |= XFS_AGF_BTREEBLKS; 2090 logflags |= XFS_AGF_BTREEBLKS;
2231 } 2091 }
2232 2092
2233 TRACE_MODAGF(NULL, agf, logflags);
2234 xfs_alloc_log_agf(tp, agbp, logflags); 2093 xfs_alloc_log_agf(tp, agbp, logflags);
2235 2094
2236 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2095 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2237 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2096 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2238 *blockp = cpu_to_be32(bno); 2097 *blockp = cpu_to_be32(bno);
2239 TRACE_MODAGF(NULL, agf, logflags);
2240 xfs_alloc_log_agf(tp, agbp, logflags); 2098 xfs_alloc_log_agf(tp, agbp, logflags);
2241 xfs_trans_log_buf(tp, agflbp, 2099 xfs_trans_log_buf(tp, agflbp,
2242 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2100 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
@@ -2399,7 +2257,7 @@ xfs_alloc_vextent(
2399 args->minlen > args->maxlen || args->minlen > agsize || 2257 args->minlen > args->maxlen || args->minlen > agsize ||
2400 args->mod >= args->prod) { 2258 args->mod >= args->prod) {
2401 args->fsbno = NULLFSBLOCK; 2259 args->fsbno = NULLFSBLOCK;
2402 TRACE_ALLOC("badargs", args); 2260 trace_xfs_alloc_vextent_badargs(args);
2403 return 0; 2261 return 0;
2404 } 2262 }
2405 minleft = args->minleft; 2263 minleft = args->minleft;
@@ -2418,12 +2276,12 @@ xfs_alloc_vextent(
2418 error = xfs_alloc_fix_freelist(args, 0); 2276 error = xfs_alloc_fix_freelist(args, 0);
2419 args->minleft = minleft; 2277 args->minleft = minleft;
2420 if (error) { 2278 if (error) {
2421 TRACE_ALLOC("nofix", args); 2279 trace_xfs_alloc_vextent_nofix(args);
2422 goto error0; 2280 goto error0;
2423 } 2281 }
2424 if (!args->agbp) { 2282 if (!args->agbp) {
2425 up_read(&mp->m_peraglock); 2283 up_read(&mp->m_peraglock);
2426 TRACE_ALLOC("noagbp", args); 2284 trace_xfs_alloc_vextent_noagbp(args);
2427 break; 2285 break;
2428 } 2286 }
2429 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2287 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
@@ -2488,7 +2346,7 @@ xfs_alloc_vextent(
2488 error = xfs_alloc_fix_freelist(args, flags); 2346 error = xfs_alloc_fix_freelist(args, flags);
2489 args->minleft = minleft; 2347 args->minleft = minleft;
2490 if (error) { 2348 if (error) {
2491 TRACE_ALLOC("nofix", args); 2349 trace_xfs_alloc_vextent_nofix(args);
2492 goto error0; 2350 goto error0;
2493 } 2351 }
2494 /* 2352 /*
@@ -2499,7 +2357,9 @@ xfs_alloc_vextent(
2499 goto error0; 2357 goto error0;
2500 break; 2358 break;
2501 } 2359 }
2502 TRACE_ALLOC("loopfailed", args); 2360
2361 trace_xfs_alloc_vextent_loopfailed(args);
2362
2503 /* 2363 /*
2504 * Didn't work, figure out the next iteration. 2364 * Didn't work, figure out the next iteration.
2505 */ 2365 */
@@ -2526,7 +2386,7 @@ xfs_alloc_vextent(
2526 if (args->agno == sagno) { 2386 if (args->agno == sagno) {
2527 if (no_min == 1) { 2387 if (no_min == 1) {
2528 args->agbno = NULLAGBLOCK; 2388 args->agbno = NULLAGBLOCK;
2529 TRACE_ALLOC("allfailed", args); 2389 trace_xfs_alloc_vextent_allfailed(args);
2530 break; 2390 break;
2531 } 2391 }
2532 if (flags == 0) { 2392 if (flags == 0) {
@@ -2642,16 +2502,16 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2642 } 2502 }
2643 } 2503 }
2644 2504
2505 trace_xfs_alloc_busy(mp, agno, bno, len, n);
2506
2645 if (n < XFS_PAGB_NUM_SLOTS) { 2507 if (n < XFS_PAGB_NUM_SLOTS) {
2646 bsy = &mp->m_perag[agno].pagb_list[n]; 2508 bsy = &mp->m_perag[agno].pagb_list[n];
2647 mp->m_perag[agno].pagb_count++; 2509 mp->m_perag[agno].pagb_count++;
2648 TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp);
2649 bsy->busy_start = bno; 2510 bsy->busy_start = bno;
2650 bsy->busy_length = len; 2511 bsy->busy_length = len;
2651 bsy->busy_tp = tp; 2512 bsy->busy_tp = tp;
2652 xfs_trans_add_busy(tp, agno, n); 2513 xfs_trans_add_busy(tp, agno, n);
2653 } else { 2514 } else {
2654 TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp);
2655 /* 2515 /*
2656 * The busy list is full! Since it is now not possible to 2516 * The busy list is full! Since it is now not possible to
2657 * track the free block, make this a synchronous transaction 2517 * track the free block, make this a synchronous transaction
@@ -2678,12 +2538,12 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
2678 list = mp->m_perag[agno].pagb_list; 2538 list = mp->m_perag[agno].pagb_list;
2679 2539
2680 ASSERT(idx < XFS_PAGB_NUM_SLOTS); 2540 ASSERT(idx < XFS_PAGB_NUM_SLOTS);
2541
2542 trace_xfs_alloc_unbusy(mp, agno, idx, list[idx].busy_tp == tp);
2543
2681 if (list[idx].busy_tp == tp) { 2544 if (list[idx].busy_tp == tp) {
2682 TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp);
2683 list[idx].busy_tp = NULL; 2545 list[idx].busy_tp = NULL;
2684 mp->m_perag[agno].pagb_count--; 2546 mp->m_perag[agno].pagb_count--;
2685 } else {
2686 TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp);
2687 } 2547 }
2688 2548
2689 spin_unlock(&mp->m_perag[agno].pagb_lock); 2549 spin_unlock(&mp->m_perag[agno].pagb_lock);
@@ -2703,45 +2563,41 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2703 xfs_mount_t *mp; 2563 xfs_mount_t *mp;
2704 xfs_perag_busy_t *bsy; 2564 xfs_perag_busy_t *bsy;
2705 xfs_agblock_t uend, bend; 2565 xfs_agblock_t uend, bend;
2706 xfs_lsn_t lsn; 2566 xfs_lsn_t lsn = 0;
2707 int cnt; 2567 int cnt;
2708 2568
2709 mp = tp->t_mountp; 2569 mp = tp->t_mountp;
2710 2570
2711 spin_lock(&mp->m_perag[agno].pagb_lock); 2571 spin_lock(&mp->m_perag[agno].pagb_lock);
2712 cnt = mp->m_perag[agno].pagb_count;
2713 2572
2714 uend = bno + len - 1; 2573 uend = bno + len - 1;
2715 2574
2716 /* search pagb_list for this slot, skipping open slots */ 2575 /*
2717 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { 2576 * search pagb_list for this slot, skipping open slots. We have to
2577 * search the entire array as there may be multiple overlaps and
2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range.
2580 */
2581 for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) {
2582 bsy = &mp->m_perag[agno].pagb_list[cnt];
2583 if (!bsy->busy_tp)
2584 continue;
2718 2585
2719 /* 2586 bend = bsy->busy_start + bsy->busy_length - 1;
2720 * (start1,length1) within (start2, length2) 2587 if (bno > bend || uend < bsy->busy_start)
2721 */ 2588 continue;
2722 if (bsy->busy_tp != NULL) { 2589
2723 bend = bsy->busy_start + bsy->busy_length - 1; 2590 /* (start1,length1) within (start2, length2) */
2724 if ((bno > bend) || (uend < bsy->busy_start)) { 2591 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2725 cnt--; 2592 lsn = bsy->busy_tp->t_commit_lsn;
2726 } else {
2727 TRACE_BUSYSEARCH("xfs_alloc_search_busy",
2728 "found1", agno, bno, len, tp);
2729 break;
2730 }
2731 }
2732 } 2593 }
2594 spin_unlock(&mp->m_perag[agno].pagb_lock);
2595 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2733 2596
2734 /* 2597 /*
2735 * If a block was found, force the log through the LSN of the 2598 * If a block was found, force the log through the LSN of the
2736 * transaction that freed the block 2599 * transaction that freed the block
2737 */ 2600 */
2738 if (cnt) { 2601 if (lsn)
2739 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp);
2740 lsn = bsy->busy_tp->t_commit_lsn;
2741 spin_unlock(&mp->m_perag[agno].pagb_lock);
2742 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2602 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2743 } else {
2744 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp);
2745 spin_unlock(&mp->m_perag[agno].pagb_lock);
2746 }
2747} 2603}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index e704caee10d..599bffa3978 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -37,6 +37,15 @@ typedef enum xfs_alloctype
37 XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */ 37 XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */
38} xfs_alloctype_t; 38} xfs_alloctype_t;
39 39
40#define XFS_ALLOC_TYPES \
41 { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \
42 { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \
43 { XFS_ALLOCTYPE_START_AG, "START_AG" }, \
44 { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \
45 { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \
46 { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \
47 { XFS_ALLOCTYPE_THIS_BNO, "THIS_BNO" }
48
40/* 49/*
41 * Flags for xfs_alloc_fix_freelist. 50 * Flags for xfs_alloc_fix_freelist.
42 */ 51 */
@@ -109,24 +118,6 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp,
109 118
110#ifdef __KERNEL__ 119#ifdef __KERNEL__
111 120
112#if defined(XFS_ALLOC_TRACE)
113/*
114 * Allocation tracing buffer size.
115 */
116#define XFS_ALLOC_TRACE_SIZE 4096
117extern ktrace_t *xfs_alloc_trace_buf;
118
119/*
120 * Types for alloc tracing.
121 */
122#define XFS_ALLOC_KTRACE_ALLOC 1
123#define XFS_ALLOC_KTRACE_FREE 2
124#define XFS_ALLOC_KTRACE_MODAGF 3
125#define XFS_ALLOC_KTRACE_BUSY 4
126#define XFS_ALLOC_KTRACE_UNBUSY 5
127#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
128#endif
129
130void 121void
131xfs_alloc_mark_busy(xfs_trans_t *tp, 122xfs_alloc_mark_busy(xfs_trans_t *tp,
132 xfs_agnumber_t agno, 123 xfs_agnumber_t agno,
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index c10c3a292d3..adbd9141aea 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -39,6 +39,7 @@
39#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
40#include "xfs_alloc.h" 40#include "xfs_alloc.h"
41#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_trace.h"
42 43
43 44
44STATIC struct xfs_btree_cur * 45STATIC struct xfs_btree_cur *
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 4ece1906bd4..e953b6cfb2a 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -47,6 +47,7 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_rw.h" 48#include "xfs_rw.h"
49#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
50#include "xfs_trace.h"
50 51
51/* 52/*
52 * xfs_attr.c 53 * xfs_attr.c
@@ -89,10 +90,6 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
89 90
90#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 91#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
91 92
92#if defined(XFS_ATTR_TRACE)
93ktrace_t *xfs_attr_trace_buf;
94#endif
95
96STATIC int 93STATIC int
97xfs_attr_name_to_xname( 94xfs_attr_name_to_xname(
98 struct xfs_name *xname, 95 struct xfs_name *xname,
@@ -123,9 +120,13 @@ xfs_inode_hasattr(
123 * Overall external interface routines. 120 * Overall external interface routines.
124 *========================================================================*/ 121 *========================================================================*/
125 122
126int 123STATIC int
127xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name, 124xfs_attr_get_int(
128 char *value, int *valuelenp, int flags) 125 struct xfs_inode *ip,
126 struct xfs_name *name,
127 char *value,
128 int *valuelenp,
129 int flags)
129{ 130{
130 xfs_da_args_t args; 131 xfs_da_args_t args;
131 int error; 132 int error;
@@ -188,7 +189,7 @@ xfs_attr_get(
188 return error; 189 return error;
189 190
190 xfs_ilock(ip, XFS_ILOCK_SHARED); 191 xfs_ilock(ip, XFS_ILOCK_SHARED);
191 error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags); 192 error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags);
192 xfs_iunlock(ip, XFS_ILOCK_SHARED); 193 xfs_iunlock(ip, XFS_ILOCK_SHARED);
193 return(error); 194 return(error);
194} 195}
@@ -636,7 +637,6 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
636 return EIO; 637 return EIO;
637 638
638 xfs_ilock(dp, XFS_ILOCK_SHARED); 639 xfs_ilock(dp, XFS_ILOCK_SHARED);
639 xfs_attr_trace_l_c("syscall start", context);
640 640
641 /* 641 /*
642 * Decide on what work routines to call based on the inode size. 642 * Decide on what work routines to call based on the inode size.
@@ -652,7 +652,6 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
652 } 652 }
653 653
654 xfs_iunlock(dp, XFS_ILOCK_SHARED); 654 xfs_iunlock(dp, XFS_ILOCK_SHARED);
655 xfs_attr_trace_l_c("syscall end", context);
656 655
657 return error; 656 return error;
658} 657}
@@ -698,7 +697,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
698 context->count * sizeof(alist->al_offset[0]); 697 context->count * sizeof(alist->al_offset[0]);
699 context->firstu -= ATTR_ENTSIZE(namelen); 698 context->firstu -= ATTR_ENTSIZE(namelen);
700 if (context->firstu < arraytop) { 699 if (context->firstu < arraytop) {
701 xfs_attr_trace_l_c("buffer full", context); 700 trace_xfs_attr_list_full(context);
702 alist->al_more = 1; 701 alist->al_more = 1;
703 context->seen_enough = 1; 702 context->seen_enough = 1;
704 return 1; 703 return 1;
@@ -710,7 +709,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
710 aep->a_name[namelen] = 0; 709 aep->a_name[namelen] = 0;
711 alist->al_offset[context->count++] = context->firstu; 710 alist->al_offset[context->count++] = context->firstu;
712 alist->al_count = context->count; 711 alist->al_count = context->count;
713 xfs_attr_trace_l_c("add", context); 712 trace_xfs_attr_list_add(context);
714 return 0; 713 return 0;
715} 714}
716 715
@@ -1849,7 +1848,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1849 node = bp->data; 1848 node = bp->data;
1850 switch (be16_to_cpu(node->hdr.info.magic)) { 1849 switch (be16_to_cpu(node->hdr.info.magic)) {
1851 case XFS_DA_NODE_MAGIC: 1850 case XFS_DA_NODE_MAGIC:
1852 xfs_attr_trace_l_cn("wrong blk", context, node); 1851 trace_xfs_attr_list_wrong_blk(context);
1853 xfs_da_brelse(NULL, bp); 1852 xfs_da_brelse(NULL, bp);
1854 bp = NULL; 1853 bp = NULL;
1855 break; 1854 break;
@@ -1857,20 +1856,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1857 leaf = bp->data; 1856 leaf = bp->data;
1858 if (cursor->hashval > be32_to_cpu(leaf->entries[ 1857 if (cursor->hashval > be32_to_cpu(leaf->entries[
1859 be16_to_cpu(leaf->hdr.count)-1].hashval)) { 1858 be16_to_cpu(leaf->hdr.count)-1].hashval)) {
1860 xfs_attr_trace_l_cl("wrong blk", 1859 trace_xfs_attr_list_wrong_blk(context);
1861 context, leaf);
1862 xfs_da_brelse(NULL, bp); 1860 xfs_da_brelse(NULL, bp);
1863 bp = NULL; 1861 bp = NULL;
1864 } else if (cursor->hashval <= 1862 } else if (cursor->hashval <=
1865 be32_to_cpu(leaf->entries[0].hashval)) { 1863 be32_to_cpu(leaf->entries[0].hashval)) {
1866 xfs_attr_trace_l_cl("maybe wrong blk", 1864 trace_xfs_attr_list_wrong_blk(context);
1867 context, leaf);
1868 xfs_da_brelse(NULL, bp); 1865 xfs_da_brelse(NULL, bp);
1869 bp = NULL; 1866 bp = NULL;
1870 } 1867 }
1871 break; 1868 break;
1872 default: 1869 default:
1873 xfs_attr_trace_l_c("wrong blk - ??", context); 1870 trace_xfs_attr_list_wrong_blk(context);
1874 xfs_da_brelse(NULL, bp); 1871 xfs_da_brelse(NULL, bp);
1875 bp = NULL; 1872 bp = NULL;
1876 } 1873 }
@@ -1915,8 +1912,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1915 if (cursor->hashval 1912 if (cursor->hashval
1916 <= be32_to_cpu(btree->hashval)) { 1913 <= be32_to_cpu(btree->hashval)) {
1917 cursor->blkno = be32_to_cpu(btree->before); 1914 cursor->blkno = be32_to_cpu(btree->before);
1918 xfs_attr_trace_l_cb("descending", 1915 trace_xfs_attr_list_node_descend(context,
1919 context, btree); 1916 btree);
1920 break; 1917 break;
1921 } 1918 }
1922 } 1919 }
@@ -2143,8 +2140,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2143 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 2140 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2144 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2141 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2145 2142
2146 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt, 2143 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
2147 XFS_BUF_LOCK | XBF_DONT_BLOCK); 2144 XFS_BUF_LOCK | XBF_DONT_BLOCK);
2148 ASSERT(bp); 2145 ASSERT(bp);
2149 ASSERT(!XFS_BUF_GETERROR(bp)); 2146 ASSERT(!XFS_BUF_GETERROR(bp));
2150 2147
@@ -2266,85 +2263,3 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2266 } 2263 }
2267 return(0); 2264 return(0);
2268} 2265}
2269
2270#if defined(XFS_ATTR_TRACE)
2271/*
2272 * Add a trace buffer entry for an attr_list context structure.
2273 */
2274void
2275xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2276{
2277 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
2278 (__psunsigned_t)NULL,
2279 (__psunsigned_t)NULL,
2280 (__psunsigned_t)NULL);
2281}
2282
2283/*
2284 * Add a trace buffer entry for a context structure and a Btree node.
2285 */
2286void
2287xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2288 struct xfs_da_intnode *node)
2289{
2290 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
2291 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2292 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2293 (__psunsigned_t)be32_to_cpu(node->btree[
2294 be16_to_cpu(node->hdr.count)-1].hashval));
2295}
2296
2297/*
2298 * Add a trace buffer entry for a context structure and a Btree element.
2299 */
2300void
2301xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2302 struct xfs_da_node_entry *btree)
2303{
2304 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
2305 (__psunsigned_t)be32_to_cpu(btree->hashval),
2306 (__psunsigned_t)be32_to_cpu(btree->before),
2307 (__psunsigned_t)NULL);
2308}
2309
2310/*
2311 * Add a trace buffer entry for a context structure and a leaf block.
2312 */
2313void
2314xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2315 struct xfs_attr_leafblock *leaf)
2316{
2317 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
2318 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2319 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2320 (__psunsigned_t)be32_to_cpu(leaf->entries[
2321 be16_to_cpu(leaf->hdr.count)-1].hashval));
2322}
2323
2324/*
2325 * Add a trace buffer entry for the arguments given to the routine,
2326 * generic form.
2327 */
2328void
2329xfs_attr_trace_enter(int type, char *where,
2330 struct xfs_attr_list_context *context,
2331 __psunsigned_t a13, __psunsigned_t a14,
2332 __psunsigned_t a15)
2333{
2334 ASSERT(xfs_attr_trace_buf);
2335 ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2336 (void *)((__psunsigned_t)where),
2337 (void *)((__psunsigned_t)context->dp),
2338 (void *)((__psunsigned_t)context->cursor->hashval),
2339 (void *)((__psunsigned_t)context->cursor->blkno),
2340 (void *)((__psunsigned_t)context->cursor->offset),
2341 (void *)((__psunsigned_t)context->alist),
2342 (void *)((__psunsigned_t)context->bufsize),
2343 (void *)((__psunsigned_t)context->count),
2344 (void *)((__psunsigned_t)context->firstu),
2345 NULL,
2346 (void *)((__psunsigned_t)context->dupcnt),
2347 (void *)((__psunsigned_t)context->flags),
2348 (void *)a13, (void *)a14, (void *)a15);
2349}
2350#endif /* XFS_ATTR_TRACE */
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index fb3b2a68b9b..59b410ce69a 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -48,6 +48,16 @@ struct xfs_attr_list_context;
48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ 48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ 49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
50 50
51#define XFS_ATTR_FLAGS \
52 { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \
53 { ATTR_ROOT, "ROOT" }, \
54 { ATTR_TRUST, "TRUST" }, \
55 { ATTR_SECURE, "SECURE" }, \
56 { ATTR_CREATE, "CREATE" }, \
57 { ATTR_REPLACE, "REPLACE" }, \
58 { ATTR_KERNOTIME, "KERNOTIME" }, \
59 { ATTR_KERNOVAL, "KERNOVAL" }
60
51/* 61/*
52 * The maximum size (into the kernel or returned from the kernel) of an 62 * The maximum size (into the kernel or returned from the kernel) of an
53 * attribute value or the buffer used for an attr_list() call. Larger 63 * attribute value or the buffer used for an attr_list() call. Larger
@@ -131,7 +141,6 @@ typedef struct xfs_attr_list_context {
131 */ 141 */
132int xfs_attr_calc_size(struct xfs_inode *, int, int, int *); 142int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);
133int xfs_attr_inactive(struct xfs_inode *dp); 143int xfs_attr_inactive(struct xfs_inode *dp);
134int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
135int xfs_attr_rmtval_get(struct xfs_da_args *args); 144int xfs_attr_rmtval_get(struct xfs_da_args *args);
136int xfs_attr_list_int(struct xfs_attr_list_context *); 145int xfs_attr_list_int(struct xfs_attr_list_context *);
137 146
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index afdc8911637..baf41b5af75 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -42,6 +42,7 @@
42#include "xfs_attr.h" 42#include "xfs_attr.h"
43#include "xfs_attr_leaf.h" 43#include "xfs_attr_leaf.h"
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_trace.h"
45 46
46/* 47/*
47 * xfs_attr_leaf.c 48 * xfs_attr_leaf.c
@@ -98,7 +99,7 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
98 * If namespace bits don't match return 0. 99 * If namespace bits don't match return 0.
99 * If all match then return 1. 100 * If all match then return 1.
100 */ 101 */
101STATIC_INLINE int 102STATIC int
102xfs_attr_namesp_match(int arg_flags, int ondisk_flags) 103xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
103{ 104{
104 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); 105 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
@@ -594,7 +595,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
594 cursor = context->cursor; 595 cursor = context->cursor;
595 ASSERT(cursor != NULL); 596 ASSERT(cursor != NULL);
596 597
597 xfs_attr_trace_l_c("sf start", context); 598 trace_xfs_attr_list_sf(context);
598 599
599 /* 600 /*
600 * If the buffer is large enough and the cursor is at the start, 601 * If the buffer is large enough and the cursor is at the start,
@@ -627,7 +628,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
627 return error; 628 return error;
628 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 629 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
629 } 630 }
630 xfs_attr_trace_l_c("sf big-gulp", context); 631 trace_xfs_attr_list_sf_all(context);
631 return(0); 632 return(0);
632 } 633 }
633 634
@@ -653,7 +654,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
653 XFS_CORRUPTION_ERROR("xfs_attr_shortform_list", 654 XFS_CORRUPTION_ERROR("xfs_attr_shortform_list",
654 XFS_ERRLEVEL_LOW, 655 XFS_ERRLEVEL_LOW,
655 context->dp->i_mount, sfe); 656 context->dp->i_mount, sfe);
656 xfs_attr_trace_l_c("sf corrupted", context);
657 kmem_free(sbuf); 657 kmem_free(sbuf);
658 return XFS_ERROR(EFSCORRUPTED); 658 return XFS_ERROR(EFSCORRUPTED);
659 } 659 }
@@ -693,7 +693,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
693 } 693 }
694 if (i == nsbuf) { 694 if (i == nsbuf) {
695 kmem_free(sbuf); 695 kmem_free(sbuf);
696 xfs_attr_trace_l_c("blk end", context);
697 return(0); 696 return(0);
698 } 697 }
699 698
@@ -719,7 +718,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
719 } 718 }
720 719
721 kmem_free(sbuf); 720 kmem_free(sbuf);
722 xfs_attr_trace_l_c("sf E-O-F", context);
723 return(0); 721 return(0);
724} 722}
725 723
@@ -2323,7 +2321,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2323 cursor = context->cursor; 2321 cursor = context->cursor;
2324 cursor->initted = 1; 2322 cursor->initted = 1;
2325 2323
2326 xfs_attr_trace_l_cl("blk start", context, leaf); 2324 trace_xfs_attr_list_leaf(context);
2327 2325
2328 /* 2326 /*
2329 * Re-find our place in the leaf block if this is a new syscall. 2327 * Re-find our place in the leaf block if this is a new syscall.
@@ -2344,7 +2342,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2344 } 2342 }
2345 } 2343 }
2346 if (i == be16_to_cpu(leaf->hdr.count)) { 2344 if (i == be16_to_cpu(leaf->hdr.count)) {
2347 xfs_attr_trace_l_c("not found", context); 2345 trace_xfs_attr_list_notfound(context);
2348 return(0); 2346 return(0);
2349 } 2347 }
2350 } else { 2348 } else {
@@ -2419,7 +2417,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2419 break; 2417 break;
2420 cursor->offset++; 2418 cursor->offset++;
2421 } 2419 }
2422 xfs_attr_trace_l_cl("blk end", context, leaf); 2420 trace_xfs_attr_list_leaf_end(context);
2423 return(retval); 2421 return(retval);
2424} 2422}
2425 2423
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index ea22839caed..76ab7b0cbb3 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -25,8 +25,6 @@
25 * to fit into the literal area of the inode. 25 * to fit into the literal area of the inode.
26 */ 26 */
27 27
28struct xfs_inode;
29
30/* 28/*
31 * Entries are packed toward the top as tight as possible. 29 * Entries are packed toward the top as tight as possible.
32 */ 30 */
@@ -69,42 +67,4 @@ typedef struct xfs_attr_sf_sort {
69 (be16_to_cpu(((xfs_attr_shortform_t *) \ 67 (be16_to_cpu(((xfs_attr_shortform_t *) \
70 ((dp)->i_afp->if_u1.if_data))->hdr.totsize)) 68 ((dp)->i_afp->if_u1.if_data))->hdr.totsize))
71 69
72#if defined(XFS_ATTR_TRACE)
73/*
74 * Kernel tracing support for attribute lists
75 */
76struct xfs_attr_list_context;
77struct xfs_da_intnode;
78struct xfs_da_node_entry;
79struct xfs_attr_leafblock;
80
81#define XFS_ATTR_TRACE_SIZE 4096 /* size of global trace buffer */
82extern ktrace_t *xfs_attr_trace_buf;
83
84/*
85 * Trace record types.
86 */
87#define XFS_ATTR_KTRACE_L_C 1 /* context */
88#define XFS_ATTR_KTRACE_L_CN 2 /* context, node */
89#define XFS_ATTR_KTRACE_L_CB 3 /* context, btree */
90#define XFS_ATTR_KTRACE_L_CL 4 /* context, leaf */
91
92void xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context);
93void xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
94 struct xfs_da_intnode *node);
95void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
96 struct xfs_da_node_entry *btree);
97void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
98 struct xfs_attr_leafblock *leaf);
99void xfs_attr_trace_enter(int type, char *where,
100 struct xfs_attr_list_context *context,
101 __psunsigned_t a13, __psunsigned_t a14,
102 __psunsigned_t a15);
103#else
104#define xfs_attr_trace_l_c(w,c)
105#define xfs_attr_trace_l_cn(w,c,n)
106#define xfs_attr_trace_l_cb(w,c,b)
107#define xfs_attr_trace_l_cl(w,c,l)
108#endif /* XFS_ATTR_TRACE */
109
110#endif /* __XFS_ATTR_SF_H__ */ 70#endif /* __XFS_ATTR_SF_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8971fb09d38..98251cdc52a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -54,6 +54,7 @@
54#include "xfs_buf_item.h" 54#include "xfs_buf_item.h"
55#include "xfs_filestream.h" 55#include "xfs_filestream.h"
56#include "xfs_vnodeops.h" 56#include "xfs_vnodeops.h"
57#include "xfs_trace.h"
57 58
58 59
59#ifdef DEBUG 60#ifdef DEBUG
@@ -272,71 +273,6 @@ xfs_bmap_isaeof(
272 int whichfork, /* data or attribute fork */ 273 int whichfork, /* data or attribute fork */
273 char *aeof); /* return value */ 274 char *aeof); /* return value */
274 275
275#ifdef XFS_BMAP_TRACE
276/*
277 * Add bmap trace entry prior to a call to xfs_iext_remove.
278 */
279STATIC void
280xfs_bmap_trace_delete(
281 const char *fname, /* function name */
282 char *desc, /* operation description */
283 xfs_inode_t *ip, /* incore inode pointer */
284 xfs_extnum_t idx, /* index of entry(entries) deleted */
285 xfs_extnum_t cnt, /* count of entries deleted, 1 or 2 */
286 int whichfork); /* data or attr fork */
287
288/*
289 * Add bmap trace entry prior to a call to xfs_iext_insert, or
290 * reading in the extents list from the disk (in the btree).
291 */
292STATIC void
293xfs_bmap_trace_insert(
294 const char *fname, /* function name */
295 char *desc, /* operation description */
296 xfs_inode_t *ip, /* incore inode pointer */
297 xfs_extnum_t idx, /* index of entry(entries) inserted */
298 xfs_extnum_t cnt, /* count of entries inserted, 1 or 2 */
299 xfs_bmbt_irec_t *r1, /* inserted record 1 */
300 xfs_bmbt_irec_t *r2, /* inserted record 2 or null */
301 int whichfork); /* data or attr fork */
302
303/*
304 * Add bmap trace entry after updating an extent record in place.
305 */
306STATIC void
307xfs_bmap_trace_post_update(
308 const char *fname, /* function name */
309 char *desc, /* operation description */
310 xfs_inode_t *ip, /* incore inode pointer */
311 xfs_extnum_t idx, /* index of entry updated */
312 int whichfork); /* data or attr fork */
313
314/*
315 * Add bmap trace entry prior to updating an extent record in place.
316 */
317STATIC void
318xfs_bmap_trace_pre_update(
319 const char *fname, /* function name */
320 char *desc, /* operation description */
321 xfs_inode_t *ip, /* incore inode pointer */
322 xfs_extnum_t idx, /* index of entry to be updated */
323 int whichfork); /* data or attr fork */
324
325#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \
326 xfs_bmap_trace_delete(__func__,d,ip,i,c,w)
327#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \
328 xfs_bmap_trace_insert(__func__,d,ip,i,c,r1,r2,w)
329#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \
330 xfs_bmap_trace_post_update(__func__,d,ip,i,w)
331#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \
332 xfs_bmap_trace_pre_update(__func__,d,ip,i,w)
333#else
334#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
335#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
336#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)
337#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)
338#endif /* XFS_BMAP_TRACE */
339
340/* 276/*
341 * Compute the worst-case number of indirect blocks that will be used 277 * Compute the worst-case number of indirect blocks that will be used
342 * for ip's delayed extent of length "len". 278 * for ip's delayed extent of length "len".
@@ -363,18 +299,6 @@ xfs_bmap_validate_ret(
363#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) 299#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
364#endif /* DEBUG */ 300#endif /* DEBUG */
365 301
366#if defined(XFS_RW_TRACE)
367STATIC void
368xfs_bunmap_trace(
369 xfs_inode_t *ip,
370 xfs_fileoff_t bno,
371 xfs_filblks_t len,
372 int flags,
373 inst_t *ra);
374#else
375#define xfs_bunmap_trace(ip, bno, len, flags, ra)
376#endif /* XFS_RW_TRACE */
377
378STATIC int 302STATIC int
379xfs_bmap_count_tree( 303xfs_bmap_count_tree(
380 xfs_mount_t *mp, 304 xfs_mount_t *mp,
@@ -590,9 +514,9 @@ xfs_bmap_add_extent(
590 * already extents in the list. 514 * already extents in the list.
591 */ 515 */
592 if (nextents == 0) { 516 if (nextents == 0) {
593 XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL, 517 xfs_iext_insert(ip, 0, 1, new,
594 whichfork); 518 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
595 xfs_iext_insert(ifp, 0, 1, new); 519
596 ASSERT(cur == NULL); 520 ASSERT(cur == NULL);
597 ifp->if_lastex = 0; 521 ifp->if_lastex = 0;
598 if (!isnullstartblock(new->br_startblock)) { 522 if (!isnullstartblock(new->br_startblock)) {
@@ -759,26 +683,10 @@ xfs_bmap_add_extent_delay_real(
759 xfs_filblks_t temp=0; /* value for dnew calculations */ 683 xfs_filblks_t temp=0; /* value for dnew calculations */
760 xfs_filblks_t temp2=0;/* value for dnew calculations */ 684 xfs_filblks_t temp2=0;/* value for dnew calculations */
761 int tmp_rval; /* partial logging flags */ 685 int tmp_rval; /* partial logging flags */
762 enum { /* bit number definitions for state */
763 LEFT_CONTIG, RIGHT_CONTIG,
764 LEFT_FILLING, RIGHT_FILLING,
765 LEFT_DELAY, RIGHT_DELAY,
766 LEFT_VALID, RIGHT_VALID
767 };
768 686
769#define LEFT r[0] 687#define LEFT r[0]
770#define RIGHT r[1] 688#define RIGHT r[1]
771#define PREV r[2] 689#define PREV r[2]
772#define MASK(b) (1 << (b))
773#define MASK2(a,b) (MASK(a) | MASK(b))
774#define MASK3(a,b,c) (MASK2(a,b) | MASK(c))
775#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d))
776#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
777#define STATE_TEST(b) (state & MASK(b))
778#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
779 ((state &= ~MASK(b)), 0))
780#define SWITCH_STATE \
781 (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
782 690
783 /* 691 /*
784 * Set up a bunch of variables to make the tests simpler. 692 * Set up a bunch of variables to make the tests simpler.
@@ -790,69 +698,80 @@ xfs_bmap_add_extent_delay_real(
790 new_endoff = new->br_startoff + new->br_blockcount; 698 new_endoff = new->br_startoff + new->br_blockcount;
791 ASSERT(PREV.br_startoff <= new->br_startoff); 699 ASSERT(PREV.br_startoff <= new->br_startoff);
792 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 700 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
701
793 /* 702 /*
794 * Set flags determining what part of the previous delayed allocation 703 * Set flags determining what part of the previous delayed allocation
795 * extent is being replaced by a real allocation. 704 * extent is being replaced by a real allocation.
796 */ 705 */
797 STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); 706 if (PREV.br_startoff == new->br_startoff)
798 STATE_SET(RIGHT_FILLING, 707 state |= BMAP_LEFT_FILLING;
799 PREV.br_startoff + PREV.br_blockcount == new_endoff); 708 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
709 state |= BMAP_RIGHT_FILLING;
710
800 /* 711 /*
801 * Check and set flags if this segment has a left neighbor. 712 * Check and set flags if this segment has a left neighbor.
802 * Don't set contiguous if the combined extent would be too large. 713 * Don't set contiguous if the combined extent would be too large.
803 */ 714 */
804 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 715 if (idx > 0) {
716 state |= BMAP_LEFT_VALID;
805 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); 717 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
806 STATE_SET(LEFT_DELAY, isnullstartblock(LEFT.br_startblock)); 718
719 if (isnullstartblock(LEFT.br_startblock))
720 state |= BMAP_LEFT_DELAY;
807 } 721 }
808 STATE_SET(LEFT_CONTIG, 722
809 STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && 723 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
810 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 724 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
811 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 725 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
812 LEFT.br_state == new->br_state && 726 LEFT.br_state == new->br_state &&
813 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); 727 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
728 state |= BMAP_LEFT_CONTIG;
729
814 /* 730 /*
815 * Check and set flags if this segment has a right neighbor. 731 * Check and set flags if this segment has a right neighbor.
816 * Don't set contiguous if the combined extent would be too large. 732 * Don't set contiguous if the combined extent would be too large.
817 * Also check for all-three-contiguous being too large. 733 * Also check for all-three-contiguous being too large.
818 */ 734 */
819 if (STATE_SET_TEST(RIGHT_VALID, 735 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
820 idx < 736 state |= BMAP_RIGHT_VALID;
821 ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
822 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); 737 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
823 STATE_SET(RIGHT_DELAY, isnullstartblock(RIGHT.br_startblock)); 738
739 if (isnullstartblock(RIGHT.br_startblock))
740 state |= BMAP_RIGHT_DELAY;
824 } 741 }
825 STATE_SET(RIGHT_CONTIG, 742
826 STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && 743 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
827 new_endoff == RIGHT.br_startoff && 744 new_endoff == RIGHT.br_startoff &&
828 new->br_startblock + new->br_blockcount == 745 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
829 RIGHT.br_startblock && 746 new->br_state == RIGHT.br_state &&
830 new->br_state == RIGHT.br_state && 747 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
831 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 748 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
832 ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != 749 BMAP_RIGHT_FILLING)) !=
833 MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || 750 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
834 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 751 BMAP_RIGHT_FILLING) ||
835 <= MAXEXTLEN)); 752 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
753 <= MAXEXTLEN))
754 state |= BMAP_RIGHT_CONTIG;
755
836 error = 0; 756 error = 0;
837 /* 757 /*
838 * Switch out based on the FILLING and CONTIG state bits. 758 * Switch out based on the FILLING and CONTIG state bits.
839 */ 759 */
840 switch (SWITCH_STATE) { 760 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
841 761 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
842 case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 762 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
763 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
843 /* 764 /*
844 * Filling in all of a previously delayed allocation extent. 765 * Filling in all of a previously delayed allocation extent.
845 * The left and right neighbors are both contiguous with new. 766 * The left and right neighbors are both contiguous with new.
846 */ 767 */
847 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, 768 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
848 XFS_DATA_FORK);
849 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 769 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
850 LEFT.br_blockcount + PREV.br_blockcount + 770 LEFT.br_blockcount + PREV.br_blockcount +
851 RIGHT.br_blockcount); 771 RIGHT.br_blockcount);
852 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, 772 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
853 XFS_DATA_FORK); 773
854 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); 774 xfs_iext_remove(ip, idx, 2, state);
855 xfs_iext_remove(ifp, idx, 2);
856 ip->i_df.if_lastex = idx - 1; 775 ip->i_df.if_lastex = idx - 1;
857 ip->i_d.di_nextents--; 776 ip->i_d.di_nextents--;
858 if (cur == NULL) 777 if (cur == NULL)
@@ -885,20 +804,18 @@ xfs_bmap_add_extent_delay_real(
885 RIGHT.br_blockcount; 804 RIGHT.br_blockcount;
886 break; 805 break;
887 806
888 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): 807 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
889 /* 808 /*
890 * Filling in all of a previously delayed allocation extent. 809 * Filling in all of a previously delayed allocation extent.
891 * The left neighbor is contiguous, the right is not. 810 * The left neighbor is contiguous, the right is not.
892 */ 811 */
893 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, 812 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
894 XFS_DATA_FORK);
895 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 813 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
896 LEFT.br_blockcount + PREV.br_blockcount); 814 LEFT.br_blockcount + PREV.br_blockcount);
897 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, 815 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
898 XFS_DATA_FORK); 816
899 ip->i_df.if_lastex = idx - 1; 817 ip->i_df.if_lastex = idx - 1;
900 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); 818 xfs_iext_remove(ip, idx, 1, state);
901 xfs_iext_remove(ifp, idx, 1);
902 if (cur == NULL) 819 if (cur == NULL)
903 rval = XFS_ILOG_DEXT; 820 rval = XFS_ILOG_DEXT;
904 else { 821 else {
@@ -921,19 +838,19 @@ xfs_bmap_add_extent_delay_real(
921 PREV.br_blockcount; 838 PREV.br_blockcount;
922 break; 839 break;
923 840
924 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): 841 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
925 /* 842 /*
926 * Filling in all of a previously delayed allocation extent. 843 * Filling in all of a previously delayed allocation extent.
927 * The right neighbor is contiguous, the left is not. 844 * The right neighbor is contiguous, the left is not.
928 */ 845 */
929 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); 846 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
930 xfs_bmbt_set_startblock(ep, new->br_startblock); 847 xfs_bmbt_set_startblock(ep, new->br_startblock);
931 xfs_bmbt_set_blockcount(ep, 848 xfs_bmbt_set_blockcount(ep,
932 PREV.br_blockcount + RIGHT.br_blockcount); 849 PREV.br_blockcount + RIGHT.br_blockcount);
933 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); 850 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
851
934 ip->i_df.if_lastex = idx; 852 ip->i_df.if_lastex = idx;
935 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); 853 xfs_iext_remove(ip, idx + 1, 1, state);
936 xfs_iext_remove(ifp, idx + 1, 1);
937 if (cur == NULL) 854 if (cur == NULL)
938 rval = XFS_ILOG_DEXT; 855 rval = XFS_ILOG_DEXT;
939 else { 856 else {
@@ -956,15 +873,16 @@ xfs_bmap_add_extent_delay_real(
956 RIGHT.br_blockcount; 873 RIGHT.br_blockcount;
957 break; 874 break;
958 875
959 case MASK2(LEFT_FILLING, RIGHT_FILLING): 876 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
960 /* 877 /*
961 * Filling in all of a previously delayed allocation extent. 878 * Filling in all of a previously delayed allocation extent.
962 * Neither the left nor right neighbors are contiguous with 879 * Neither the left nor right neighbors are contiguous with
963 * the new one. 880 * the new one.
964 */ 881 */
965 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); 882 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
966 xfs_bmbt_set_startblock(ep, new->br_startblock); 883 xfs_bmbt_set_startblock(ep, new->br_startblock);
967 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); 884 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
885
968 ip->i_df.if_lastex = idx; 886 ip->i_df.if_lastex = idx;
969 ip->i_d.di_nextents++; 887 ip->i_d.di_nextents++;
970 if (cur == NULL) 888 if (cur == NULL)
@@ -987,19 +905,20 @@ xfs_bmap_add_extent_delay_real(
987 temp2 = new->br_blockcount; 905 temp2 = new->br_blockcount;
988 break; 906 break;
989 907
990 case MASK2(LEFT_FILLING, LEFT_CONTIG): 908 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
991 /* 909 /*
992 * Filling in the first part of a previous delayed allocation. 910 * Filling in the first part of a previous delayed allocation.
993 * The left neighbor is contiguous. 911 * The left neighbor is contiguous.
994 */ 912 */
995 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); 913 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
996 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 914 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
997 LEFT.br_blockcount + new->br_blockcount); 915 LEFT.br_blockcount + new->br_blockcount);
998 xfs_bmbt_set_startoff(ep, 916 xfs_bmbt_set_startoff(ep,
999 PREV.br_startoff + new->br_blockcount); 917 PREV.br_startoff + new->br_blockcount);
1000 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); 918 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
919
1001 temp = PREV.br_blockcount - new->br_blockcount; 920 temp = PREV.br_blockcount - new->br_blockcount;
1002 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); 921 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1003 xfs_bmbt_set_blockcount(ep, temp); 922 xfs_bmbt_set_blockcount(ep, temp);
1004 ip->i_df.if_lastex = idx - 1; 923 ip->i_df.if_lastex = idx - 1;
1005 if (cur == NULL) 924 if (cur == NULL)
@@ -1021,7 +940,7 @@ xfs_bmap_add_extent_delay_real(
1021 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 940 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1022 startblockval(PREV.br_startblock)); 941 startblockval(PREV.br_startblock));
1023 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 942 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1024 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); 943 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1025 *dnew = temp; 944 *dnew = temp;
1026 /* DELTA: The boundary between two in-core extents moved. */ 945 /* DELTA: The boundary between two in-core extents moved. */
1027 temp = LEFT.br_startoff; 946 temp = LEFT.br_startoff;
@@ -1029,18 +948,16 @@ xfs_bmap_add_extent_delay_real(
1029 PREV.br_blockcount; 948 PREV.br_blockcount;
1030 break; 949 break;
1031 950
1032 case MASK(LEFT_FILLING): 951 case BMAP_LEFT_FILLING:
1033 /* 952 /*
1034 * Filling in the first part of a previous delayed allocation. 953 * Filling in the first part of a previous delayed allocation.
1035 * The left neighbor is not contiguous. 954 * The left neighbor is not contiguous.
1036 */ 955 */
1037 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); 956 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1038 xfs_bmbt_set_startoff(ep, new_endoff); 957 xfs_bmbt_set_startoff(ep, new_endoff);
1039 temp = PREV.br_blockcount - new->br_blockcount; 958 temp = PREV.br_blockcount - new->br_blockcount;
1040 xfs_bmbt_set_blockcount(ep, temp); 959 xfs_bmbt_set_blockcount(ep, temp);
1041 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, 960 xfs_iext_insert(ip, idx, 1, new, state);
1042 XFS_DATA_FORK);
1043 xfs_iext_insert(ifp, idx, 1, new);
1044 ip->i_df.if_lastex = idx; 961 ip->i_df.if_lastex = idx;
1045 ip->i_d.di_nextents++; 962 ip->i_d.di_nextents++;
1046 if (cur == NULL) 963 if (cur == NULL)
@@ -1071,27 +988,27 @@ xfs_bmap_add_extent_delay_real(
1071 (cur ? cur->bc_private.b.allocated : 0)); 988 (cur ? cur->bc_private.b.allocated : 0));
1072 ep = xfs_iext_get_ext(ifp, idx + 1); 989 ep = xfs_iext_get_ext(ifp, idx + 1);
1073 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 990 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1074 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK); 991 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
1075 *dnew = temp; 992 *dnew = temp;
1076 /* DELTA: One in-core extent is split in two. */ 993 /* DELTA: One in-core extent is split in two. */
1077 temp = PREV.br_startoff; 994 temp = PREV.br_startoff;
1078 temp2 = PREV.br_blockcount; 995 temp2 = PREV.br_blockcount;
1079 break; 996 break;
1080 997
1081 case MASK2(RIGHT_FILLING, RIGHT_CONTIG): 998 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1082 /* 999 /*
1083 * Filling in the last part of a previous delayed allocation. 1000 * Filling in the last part of a previous delayed allocation.
1084 * The right neighbor is contiguous with the new allocation. 1001 * The right neighbor is contiguous with the new allocation.
1085 */ 1002 */
1086 temp = PREV.br_blockcount - new->br_blockcount; 1003 temp = PREV.br_blockcount - new->br_blockcount;
1087 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); 1004 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1088 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); 1005 trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
1089 xfs_bmbt_set_blockcount(ep, temp); 1006 xfs_bmbt_set_blockcount(ep, temp);
1090 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1007 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1091 new->br_startoff, new->br_startblock, 1008 new->br_startoff, new->br_startblock,
1092 new->br_blockcount + RIGHT.br_blockcount, 1009 new->br_blockcount + RIGHT.br_blockcount,
1093 RIGHT.br_state); 1010 RIGHT.br_state);
1094 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); 1011 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
1095 ip->i_df.if_lastex = idx + 1; 1012 ip->i_df.if_lastex = idx + 1;
1096 if (cur == NULL) 1013 if (cur == NULL)
1097 rval = XFS_ILOG_DEXT; 1014 rval = XFS_ILOG_DEXT;
@@ -1112,7 +1029,7 @@ xfs_bmap_add_extent_delay_real(
1112 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 1029 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1113 startblockval(PREV.br_startblock)); 1030 startblockval(PREV.br_startblock));
1114 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1031 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1115 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); 1032 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1116 *dnew = temp; 1033 *dnew = temp;
1117 /* DELTA: The boundary between two in-core extents moved. */ 1034 /* DELTA: The boundary between two in-core extents moved. */
1118 temp = PREV.br_startoff; 1035 temp = PREV.br_startoff;
@@ -1120,17 +1037,15 @@ xfs_bmap_add_extent_delay_real(
1120 RIGHT.br_blockcount; 1037 RIGHT.br_blockcount;
1121 break; 1038 break;
1122 1039
1123 case MASK(RIGHT_FILLING): 1040 case BMAP_RIGHT_FILLING:
1124 /* 1041 /*
1125 * Filling in the last part of a previous delayed allocation. 1042 * Filling in the last part of a previous delayed allocation.
1126 * The right neighbor is not contiguous. 1043 * The right neighbor is not contiguous.
1127 */ 1044 */
1128 temp = PREV.br_blockcount - new->br_blockcount; 1045 temp = PREV.br_blockcount - new->br_blockcount;
1129 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1046 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1130 xfs_bmbt_set_blockcount(ep, temp); 1047 xfs_bmbt_set_blockcount(ep, temp);
1131 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, 1048 xfs_iext_insert(ip, idx + 1, 1, new, state);
1132 XFS_DATA_FORK);
1133 xfs_iext_insert(ifp, idx + 1, 1, new);
1134 ip->i_df.if_lastex = idx + 1; 1049 ip->i_df.if_lastex = idx + 1;
1135 ip->i_d.di_nextents++; 1050 ip->i_d.di_nextents++;
1136 if (cur == NULL) 1051 if (cur == NULL)
@@ -1161,7 +1076,7 @@ xfs_bmap_add_extent_delay_real(
1161 (cur ? cur->bc_private.b.allocated : 0)); 1076 (cur ? cur->bc_private.b.allocated : 0));
1162 ep = xfs_iext_get_ext(ifp, idx); 1077 ep = xfs_iext_get_ext(ifp, idx);
1163 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1078 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1164 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1079 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1165 *dnew = temp; 1080 *dnew = temp;
1166 /* DELTA: One in-core extent is split in two. */ 1081 /* DELTA: One in-core extent is split in two. */
1167 temp = PREV.br_startoff; 1082 temp = PREV.br_startoff;
@@ -1175,7 +1090,7 @@ xfs_bmap_add_extent_delay_real(
1175 * This case is avoided almost all the time. 1090 * This case is avoided almost all the time.
1176 */ 1091 */
1177 temp = new->br_startoff - PREV.br_startoff; 1092 temp = new->br_startoff - PREV.br_startoff;
1178 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); 1093 trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
1179 xfs_bmbt_set_blockcount(ep, temp); 1094 xfs_bmbt_set_blockcount(ep, temp);
1180 r[0] = *new; 1095 r[0] = *new;
1181 r[1].br_state = PREV.br_state; 1096 r[1].br_state = PREV.br_state;
@@ -1183,9 +1098,7 @@ xfs_bmap_add_extent_delay_real(
1183 r[1].br_startoff = new_endoff; 1098 r[1].br_startoff = new_endoff;
1184 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1099 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
1185 r[1].br_blockcount = temp2; 1100 r[1].br_blockcount = temp2;
1186 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], 1101 xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
1187 XFS_DATA_FORK);
1188 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1189 ip->i_df.if_lastex = idx + 1; 1102 ip->i_df.if_lastex = idx + 1;
1190 ip->i_d.di_nextents++; 1103 ip->i_d.di_nextents++;
1191 if (cur == NULL) 1104 if (cur == NULL)
@@ -1242,24 +1155,24 @@ xfs_bmap_add_extent_delay_real(
1242 } 1155 }
1243 ep = xfs_iext_get_ext(ifp, idx); 1156 ep = xfs_iext_get_ext(ifp, idx);
1244 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1157 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1245 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); 1158 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1246 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); 1159 trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_);
1247 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), 1160 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
1248 nullstartblock((int)temp2)); 1161 nullstartblock((int)temp2));
1249 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); 1162 trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_);
1250 *dnew = temp + temp2; 1163 *dnew = temp + temp2;
1251 /* DELTA: One in-core extent is split in three. */ 1164 /* DELTA: One in-core extent is split in three. */
1252 temp = PREV.br_startoff; 1165 temp = PREV.br_startoff;
1253 temp2 = PREV.br_blockcount; 1166 temp2 = PREV.br_blockcount;
1254 break; 1167 break;
1255 1168
1256 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1169 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1257 case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1170 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1258 case MASK2(LEFT_FILLING, RIGHT_CONTIG): 1171 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1259 case MASK2(RIGHT_FILLING, LEFT_CONTIG): 1172 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1260 case MASK2(LEFT_CONTIG, RIGHT_CONTIG): 1173 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1261 case MASK(LEFT_CONTIG): 1174 case BMAP_LEFT_CONTIG:
1262 case MASK(RIGHT_CONTIG): 1175 case BMAP_RIGHT_CONTIG:
1263 /* 1176 /*
1264 * These cases are all impossible. 1177 * These cases are all impossible.
1265 */ 1178 */
@@ -1279,14 +1192,6 @@ done:
1279#undef LEFT 1192#undef LEFT
1280#undef RIGHT 1193#undef RIGHT
1281#undef PREV 1194#undef PREV
1282#undef MASK
1283#undef MASK2
1284#undef MASK3
1285#undef MASK4
1286#undef STATE_SET
1287#undef STATE_TEST
1288#undef STATE_SET_TEST
1289#undef SWITCH_STATE
1290} 1195}
1291 1196
1292/* 1197/*
@@ -1316,27 +1221,10 @@ xfs_bmap_add_extent_unwritten_real(
1316 int state = 0;/* state bits, accessed thru macros */ 1221 int state = 0;/* state bits, accessed thru macros */
1317 xfs_filblks_t temp=0; 1222 xfs_filblks_t temp=0;
1318 xfs_filblks_t temp2=0; 1223 xfs_filblks_t temp2=0;
1319 enum { /* bit number definitions for state */
1320 LEFT_CONTIG, RIGHT_CONTIG,
1321 LEFT_FILLING, RIGHT_FILLING,
1322 LEFT_DELAY, RIGHT_DELAY,
1323 LEFT_VALID, RIGHT_VALID
1324 };
1325 1224
1326#define LEFT r[0] 1225#define LEFT r[0]
1327#define RIGHT r[1] 1226#define RIGHT r[1]
1328#define PREV r[2] 1227#define PREV r[2]
1329#define MASK(b) (1 << (b))
1330#define MASK2(a,b) (MASK(a) | MASK(b))
1331#define MASK3(a,b,c) (MASK2(a,b) | MASK(c))
1332#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d))
1333#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
1334#define STATE_TEST(b) (state & MASK(b))
1335#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
1336 ((state &= ~MASK(b)), 0))
1337#define SWITCH_STATE \
1338 (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
1339
1340 /* 1228 /*
1341 * Set up a bunch of variables to make the tests simpler. 1229 * Set up a bunch of variables to make the tests simpler.
1342 */ 1230 */
@@ -1352,68 +1240,78 @@ xfs_bmap_add_extent_unwritten_real(
1352 new_endoff = new->br_startoff + new->br_blockcount; 1240 new_endoff = new->br_startoff + new->br_blockcount;
1353 ASSERT(PREV.br_startoff <= new->br_startoff); 1241 ASSERT(PREV.br_startoff <= new->br_startoff);
1354 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 1242 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1243
1355 /* 1244 /*
1356 * Set flags determining what part of the previous oldext allocation 1245 * Set flags determining what part of the previous oldext allocation
1357 * extent is being replaced by a newext allocation. 1246 * extent is being replaced by a newext allocation.
1358 */ 1247 */
1359 STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); 1248 if (PREV.br_startoff == new->br_startoff)
1360 STATE_SET(RIGHT_FILLING, 1249 state |= BMAP_LEFT_FILLING;
1361 PREV.br_startoff + PREV.br_blockcount == new_endoff); 1250 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1251 state |= BMAP_RIGHT_FILLING;
1252
1362 /* 1253 /*
1363 * Check and set flags if this segment has a left neighbor. 1254 * Check and set flags if this segment has a left neighbor.
1364 * Don't set contiguous if the combined extent would be too large. 1255 * Don't set contiguous if the combined extent would be too large.
1365 */ 1256 */
1366 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 1257 if (idx > 0) {
1258 state |= BMAP_LEFT_VALID;
1367 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); 1259 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
1368 STATE_SET(LEFT_DELAY, isnullstartblock(LEFT.br_startblock)); 1260
1261 if (isnullstartblock(LEFT.br_startblock))
1262 state |= BMAP_LEFT_DELAY;
1369 } 1263 }
1370 STATE_SET(LEFT_CONTIG, 1264
1371 STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && 1265 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1372 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 1266 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1373 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 1267 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1374 LEFT.br_state == newext && 1268 LEFT.br_state == newext &&
1375 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); 1269 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1270 state |= BMAP_LEFT_CONTIG;
1271
1376 /* 1272 /*
1377 * Check and set flags if this segment has a right neighbor. 1273 * Check and set flags if this segment has a right neighbor.
1378 * Don't set contiguous if the combined extent would be too large. 1274 * Don't set contiguous if the combined extent would be too large.
1379 * Also check for all-three-contiguous being too large. 1275 * Also check for all-three-contiguous being too large.
1380 */ 1276 */
1381 if (STATE_SET_TEST(RIGHT_VALID, 1277 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1382 idx < 1278 state |= BMAP_RIGHT_VALID;
1383 ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
1384 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); 1279 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
1385 STATE_SET(RIGHT_DELAY, isnullstartblock(RIGHT.br_startblock)); 1280 if (isnullstartblock(RIGHT.br_startblock))
1281 state |= BMAP_RIGHT_DELAY;
1386 } 1282 }
1387 STATE_SET(RIGHT_CONTIG, 1283
1388 STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && 1284 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1389 new_endoff == RIGHT.br_startoff && 1285 new_endoff == RIGHT.br_startoff &&
1390 new->br_startblock + new->br_blockcount == 1286 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1391 RIGHT.br_startblock && 1287 newext == RIGHT.br_state &&
1392 newext == RIGHT.br_state && 1288 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1393 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 1289 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1394 ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != 1290 BMAP_RIGHT_FILLING)) !=
1395 MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || 1291 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1396 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 1292 BMAP_RIGHT_FILLING) ||
1397 <= MAXEXTLEN)); 1293 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1294 <= MAXEXTLEN))
1295 state |= BMAP_RIGHT_CONTIG;
1296
1398 /* 1297 /*
1399 * Switch out based on the FILLING and CONTIG state bits. 1298 * Switch out based on the FILLING and CONTIG state bits.
1400 */ 1299 */
1401 switch (SWITCH_STATE) { 1300 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1402 1301 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1403 case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1302 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1303 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1404 /* 1304 /*
1405 * Setting all of a previous oldext extent to newext. 1305 * Setting all of a previous oldext extent to newext.
1406 * The left and right neighbors are both contiguous with new. 1306 * The left and right neighbors are both contiguous with new.
1407 */ 1307 */
1408 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, 1308 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1409 XFS_DATA_FORK);
1410 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1309 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1411 LEFT.br_blockcount + PREV.br_blockcount + 1310 LEFT.br_blockcount + PREV.br_blockcount +
1412 RIGHT.br_blockcount); 1311 RIGHT.br_blockcount);
1413 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, 1312 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1414 XFS_DATA_FORK); 1313
1415 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); 1314 xfs_iext_remove(ip, idx, 2, state);
1416 xfs_iext_remove(ifp, idx, 2);
1417 ip->i_df.if_lastex = idx - 1; 1315 ip->i_df.if_lastex = idx - 1;
1418 ip->i_d.di_nextents -= 2; 1316 ip->i_d.di_nextents -= 2;
1419 if (cur == NULL) 1317 if (cur == NULL)
@@ -1450,20 +1348,18 @@ xfs_bmap_add_extent_unwritten_real(
1450 RIGHT.br_blockcount; 1348 RIGHT.br_blockcount;
1451 break; 1349 break;
1452 1350
1453 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): 1351 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1454 /* 1352 /*
1455 * Setting all of a previous oldext extent to newext. 1353 * Setting all of a previous oldext extent to newext.
1456 * The left neighbor is contiguous, the right is not. 1354 * The left neighbor is contiguous, the right is not.
1457 */ 1355 */
1458 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, 1356 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1459 XFS_DATA_FORK);
1460 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1357 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1461 LEFT.br_blockcount + PREV.br_blockcount); 1358 LEFT.br_blockcount + PREV.br_blockcount);
1462 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, 1359 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1463 XFS_DATA_FORK); 1360
1464 ip->i_df.if_lastex = idx - 1; 1361 ip->i_df.if_lastex = idx - 1;
1465 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); 1362 xfs_iext_remove(ip, idx, 1, state);
1466 xfs_iext_remove(ifp, idx, 1);
1467 ip->i_d.di_nextents--; 1363 ip->i_d.di_nextents--;
1468 if (cur == NULL) 1364 if (cur == NULL)
1469 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1365 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1492,21 +1388,18 @@ xfs_bmap_add_extent_unwritten_real(
1492 PREV.br_blockcount; 1388 PREV.br_blockcount;
1493 break; 1389 break;
1494 1390
1495 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): 1391 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1496 /* 1392 /*
1497 * Setting all of a previous oldext extent to newext. 1393 * Setting all of a previous oldext extent to newext.
1498 * The right neighbor is contiguous, the left is not. 1394 * The right neighbor is contiguous, the left is not.
1499 */ 1395 */
1500 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, 1396 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1501 XFS_DATA_FORK);
1502 xfs_bmbt_set_blockcount(ep, 1397 xfs_bmbt_set_blockcount(ep,
1503 PREV.br_blockcount + RIGHT.br_blockcount); 1398 PREV.br_blockcount + RIGHT.br_blockcount);
1504 xfs_bmbt_set_state(ep, newext); 1399 xfs_bmbt_set_state(ep, newext);
1505 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, 1400 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1506 XFS_DATA_FORK);
1507 ip->i_df.if_lastex = idx; 1401 ip->i_df.if_lastex = idx;
1508 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); 1402 xfs_iext_remove(ip, idx + 1, 1, state);
1509 xfs_iext_remove(ifp, idx + 1, 1);
1510 ip->i_d.di_nextents--; 1403 ip->i_d.di_nextents--;
1511 if (cur == NULL) 1404 if (cur == NULL)
1512 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1405 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1535,17 +1428,16 @@ xfs_bmap_add_extent_unwritten_real(
1535 RIGHT.br_blockcount; 1428 RIGHT.br_blockcount;
1536 break; 1429 break;
1537 1430
1538 case MASK2(LEFT_FILLING, RIGHT_FILLING): 1431 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1539 /* 1432 /*
1540 * Setting all of a previous oldext extent to newext. 1433 * Setting all of a previous oldext extent to newext.
1541 * Neither the left nor right neighbors are contiguous with 1434 * Neither the left nor right neighbors are contiguous with
1542 * the new one. 1435 * the new one.
1543 */ 1436 */
1544 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, 1437 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1545 XFS_DATA_FORK);
1546 xfs_bmbt_set_state(ep, newext); 1438 xfs_bmbt_set_state(ep, newext);
1547 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, 1439 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1548 XFS_DATA_FORK); 1440
1549 ip->i_df.if_lastex = idx; 1441 ip->i_df.if_lastex = idx;
1550 if (cur == NULL) 1442 if (cur == NULL)
1551 rval = XFS_ILOG_DEXT; 1443 rval = XFS_ILOG_DEXT;
@@ -1566,27 +1458,25 @@ xfs_bmap_add_extent_unwritten_real(
1566 temp2 = new->br_blockcount; 1458 temp2 = new->br_blockcount;
1567 break; 1459 break;
1568 1460
1569 case MASK2(LEFT_FILLING, LEFT_CONTIG): 1461 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1570 /* 1462 /*
1571 * Setting the first part of a previous oldext extent to newext. 1463 * Setting the first part of a previous oldext extent to newext.
1572 * The left neighbor is contiguous. 1464 * The left neighbor is contiguous.
1573 */ 1465 */
1574 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, 1466 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1575 XFS_DATA_FORK);
1576 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1467 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1577 LEFT.br_blockcount + new->br_blockcount); 1468 LEFT.br_blockcount + new->br_blockcount);
1578 xfs_bmbt_set_startoff(ep, 1469 xfs_bmbt_set_startoff(ep,
1579 PREV.br_startoff + new->br_blockcount); 1470 PREV.br_startoff + new->br_blockcount);
1580 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, 1471 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1581 XFS_DATA_FORK); 1472
1582 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, 1473 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1583 XFS_DATA_FORK);
1584 xfs_bmbt_set_startblock(ep, 1474 xfs_bmbt_set_startblock(ep,
1585 new->br_startblock + new->br_blockcount); 1475 new->br_startblock + new->br_blockcount);
1586 xfs_bmbt_set_blockcount(ep, 1476 xfs_bmbt_set_blockcount(ep,
1587 PREV.br_blockcount - new->br_blockcount); 1477 PREV.br_blockcount - new->br_blockcount);
1588 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, 1478 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1589 XFS_DATA_FORK); 1479
1590 ip->i_df.if_lastex = idx - 1; 1480 ip->i_df.if_lastex = idx - 1;
1591 if (cur == NULL) 1481 if (cur == NULL)
1592 rval = XFS_ILOG_DEXT; 1482 rval = XFS_ILOG_DEXT;
@@ -1617,22 +1507,21 @@ xfs_bmap_add_extent_unwritten_real(
1617 PREV.br_blockcount; 1507 PREV.br_blockcount;
1618 break; 1508 break;
1619 1509
1620 case MASK(LEFT_FILLING): 1510 case BMAP_LEFT_FILLING:
1621 /* 1511 /*
1622 * Setting the first part of a previous oldext extent to newext. 1512 * Setting the first part of a previous oldext extent to newext.
1623 * The left neighbor is not contiguous. 1513 * The left neighbor is not contiguous.
1624 */ 1514 */
1625 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); 1515 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1626 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); 1516 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
1627 xfs_bmbt_set_startoff(ep, new_endoff); 1517 xfs_bmbt_set_startoff(ep, new_endoff);
1628 xfs_bmbt_set_blockcount(ep, 1518 xfs_bmbt_set_blockcount(ep,
1629 PREV.br_blockcount - new->br_blockcount); 1519 PREV.br_blockcount - new->br_blockcount);
1630 xfs_bmbt_set_startblock(ep, 1520 xfs_bmbt_set_startblock(ep,
1631 new->br_startblock + new->br_blockcount); 1521 new->br_startblock + new->br_blockcount);
1632 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK); 1522 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1633 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, 1523
1634 XFS_DATA_FORK); 1524 xfs_iext_insert(ip, idx, 1, new, state);
1635 xfs_iext_insert(ifp, idx, 1, new);
1636 ip->i_df.if_lastex = idx; 1525 ip->i_df.if_lastex = idx;
1637 ip->i_d.di_nextents++; 1526 ip->i_d.di_nextents++;
1638 if (cur == NULL) 1527 if (cur == NULL)
@@ -1660,24 +1549,21 @@ xfs_bmap_add_extent_unwritten_real(
1660 temp2 = PREV.br_blockcount; 1549 temp2 = PREV.br_blockcount;
1661 break; 1550 break;
1662 1551
1663 case MASK2(RIGHT_FILLING, RIGHT_CONTIG): 1552 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1664 /* 1553 /*
1665 * Setting the last part of a previous oldext extent to newext. 1554 * Setting the last part of a previous oldext extent to newext.
1666 * The right neighbor is contiguous with the new allocation. 1555 * The right neighbor is contiguous with the new allocation.
1667 */ 1556 */
1668 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, 1557 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1669 XFS_DATA_FORK); 1558 trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
1670 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1,
1671 XFS_DATA_FORK);
1672 xfs_bmbt_set_blockcount(ep, 1559 xfs_bmbt_set_blockcount(ep,
1673 PREV.br_blockcount - new->br_blockcount); 1560 PREV.br_blockcount - new->br_blockcount);
1674 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, 1561 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1675 XFS_DATA_FORK);
1676 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1562 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1677 new->br_startoff, new->br_startblock, 1563 new->br_startoff, new->br_startblock,
1678 new->br_blockcount + RIGHT.br_blockcount, newext); 1564 new->br_blockcount + RIGHT.br_blockcount, newext);
1679 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, 1565 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
1680 XFS_DATA_FORK); 1566
1681 ip->i_df.if_lastex = idx + 1; 1567 ip->i_df.if_lastex = idx + 1;
1682 if (cur == NULL) 1568 if (cur == NULL)
1683 rval = XFS_ILOG_DEXT; 1569 rval = XFS_ILOG_DEXT;
@@ -1707,18 +1593,17 @@ xfs_bmap_add_extent_unwritten_real(
1707 RIGHT.br_blockcount; 1593 RIGHT.br_blockcount;
1708 break; 1594 break;
1709 1595
1710 case MASK(RIGHT_FILLING): 1596 case BMAP_RIGHT_FILLING:
1711 /* 1597 /*
1712 * Setting the last part of a previous oldext extent to newext. 1598 * Setting the last part of a previous oldext extent to newext.
1713 * The right neighbor is not contiguous. 1599 * The right neighbor is not contiguous.
1714 */ 1600 */
1715 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1601 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1716 xfs_bmbt_set_blockcount(ep, 1602 xfs_bmbt_set_blockcount(ep,
1717 PREV.br_blockcount - new->br_blockcount); 1603 PREV.br_blockcount - new->br_blockcount);
1718 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1604 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1719 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, 1605
1720 XFS_DATA_FORK); 1606 xfs_iext_insert(ip, idx + 1, 1, new, state);
1721 xfs_iext_insert(ifp, idx + 1, 1, new);
1722 ip->i_df.if_lastex = idx + 1; 1607 ip->i_df.if_lastex = idx + 1;
1723 ip->i_d.di_nextents++; 1608 ip->i_d.di_nextents++;
1724 if (cur == NULL) 1609 if (cur == NULL)
@@ -1756,19 +1641,18 @@ xfs_bmap_add_extent_unwritten_real(
1756 * newext. Contiguity is impossible here. 1641 * newext. Contiguity is impossible here.
1757 * One extent becomes three extents. 1642 * One extent becomes three extents.
1758 */ 1643 */
1759 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); 1644 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1760 xfs_bmbt_set_blockcount(ep, 1645 xfs_bmbt_set_blockcount(ep,
1761 new->br_startoff - PREV.br_startoff); 1646 new->br_startoff - PREV.br_startoff);
1762 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); 1647 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1648
1763 r[0] = *new; 1649 r[0] = *new;
1764 r[1].br_startoff = new_endoff; 1650 r[1].br_startoff = new_endoff;
1765 r[1].br_blockcount = 1651 r[1].br_blockcount =
1766 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1652 PREV.br_startoff + PREV.br_blockcount - new_endoff;
1767 r[1].br_startblock = new->br_startblock + new->br_blockcount; 1653 r[1].br_startblock = new->br_startblock + new->br_blockcount;
1768 r[1].br_state = oldext; 1654 r[1].br_state = oldext;
1769 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], 1655 xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
1770 XFS_DATA_FORK);
1771 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1772 ip->i_df.if_lastex = idx + 1; 1656 ip->i_df.if_lastex = idx + 1;
1773 ip->i_d.di_nextents += 2; 1657 ip->i_d.di_nextents += 2;
1774 if (cur == NULL) 1658 if (cur == NULL)
@@ -1813,13 +1697,13 @@ xfs_bmap_add_extent_unwritten_real(
1813 temp2 = PREV.br_blockcount; 1697 temp2 = PREV.br_blockcount;
1814 break; 1698 break;
1815 1699
1816 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1700 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1817 case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1701 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1818 case MASK2(LEFT_FILLING, RIGHT_CONTIG): 1702 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1819 case MASK2(RIGHT_FILLING, LEFT_CONTIG): 1703 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1820 case MASK2(LEFT_CONTIG, RIGHT_CONTIG): 1704 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1821 case MASK(LEFT_CONTIG): 1705 case BMAP_LEFT_CONTIG:
1822 case MASK(RIGHT_CONTIG): 1706 case BMAP_RIGHT_CONTIG:
1823 /* 1707 /*
1824 * These cases are all impossible. 1708 * These cases are all impossible.
1825 */ 1709 */
@@ -1839,14 +1723,6 @@ done:
1839#undef LEFT 1723#undef LEFT
1840#undef RIGHT 1724#undef RIGHT
1841#undef PREV 1725#undef PREV
1842#undef MASK
1843#undef MASK2
1844#undef MASK3
1845#undef MASK4
1846#undef STATE_SET
1847#undef STATE_TEST
1848#undef STATE_SET_TEST
1849#undef SWITCH_STATE
1850} 1726}
1851 1727
1852/* 1728/*
@@ -1872,62 +1748,57 @@ xfs_bmap_add_extent_hole_delay(
1872 int state; /* state bits, accessed thru macros */ 1748 int state; /* state bits, accessed thru macros */
1873 xfs_filblks_t temp=0; /* temp for indirect calculations */ 1749 xfs_filblks_t temp=0; /* temp for indirect calculations */
1874 xfs_filblks_t temp2=0; 1750 xfs_filblks_t temp2=0;
1875 enum { /* bit number definitions for state */
1876 LEFT_CONTIG, RIGHT_CONTIG,
1877 LEFT_DELAY, RIGHT_DELAY,
1878 LEFT_VALID, RIGHT_VALID
1879 };
1880
1881#define MASK(b) (1 << (b))
1882#define MASK2(a,b) (MASK(a) | MASK(b))
1883#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
1884#define STATE_TEST(b) (state & MASK(b))
1885#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
1886 ((state &= ~MASK(b)), 0))
1887#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
1888 1751
1889 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1752 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1890 ep = xfs_iext_get_ext(ifp, idx); 1753 ep = xfs_iext_get_ext(ifp, idx);
1891 state = 0; 1754 state = 0;
1892 ASSERT(isnullstartblock(new->br_startblock)); 1755 ASSERT(isnullstartblock(new->br_startblock));
1756
1893 /* 1757 /*
1894 * Check and set flags if this segment has a left neighbor 1758 * Check and set flags if this segment has a left neighbor
1895 */ 1759 */
1896 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 1760 if (idx > 0) {
1761 state |= BMAP_LEFT_VALID;
1897 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); 1762 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
1898 STATE_SET(LEFT_DELAY, isnullstartblock(left.br_startblock)); 1763
1764 if (isnullstartblock(left.br_startblock))
1765 state |= BMAP_LEFT_DELAY;
1899 } 1766 }
1767
1900 /* 1768 /*
1901 * Check and set flags if the current (right) segment exists. 1769 * Check and set flags if the current (right) segment exists.
1902 * If it doesn't exist, we're converting the hole at end-of-file. 1770 * If it doesn't exist, we're converting the hole at end-of-file.
1903 */ 1771 */
1904 if (STATE_SET_TEST(RIGHT_VALID, 1772 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
1905 idx < 1773 state |= BMAP_RIGHT_VALID;
1906 ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1907 xfs_bmbt_get_all(ep, &right); 1774 xfs_bmbt_get_all(ep, &right);
1908 STATE_SET(RIGHT_DELAY, isnullstartblock(right.br_startblock)); 1775
1776 if (isnullstartblock(right.br_startblock))
1777 state |= BMAP_RIGHT_DELAY;
1909 } 1778 }
1779
1910 /* 1780 /*
1911 * Set contiguity flags on the left and right neighbors. 1781 * Set contiguity flags on the left and right neighbors.
1912 * Don't let extents get too large, even if the pieces are contiguous. 1782 * Don't let extents get too large, even if the pieces are contiguous.
1913 */ 1783 */
1914 STATE_SET(LEFT_CONTIG, 1784 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
1915 STATE_TEST(LEFT_VALID) && STATE_TEST(LEFT_DELAY) && 1785 left.br_startoff + left.br_blockcount == new->br_startoff &&
1916 left.br_startoff + left.br_blockcount == new->br_startoff && 1786 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1917 left.br_blockcount + new->br_blockcount <= MAXEXTLEN); 1787 state |= BMAP_LEFT_CONTIG;
1918 STATE_SET(RIGHT_CONTIG, 1788
1919 STATE_TEST(RIGHT_VALID) && STATE_TEST(RIGHT_DELAY) && 1789 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
1920 new->br_startoff + new->br_blockcount == right.br_startoff && 1790 new->br_startoff + new->br_blockcount == right.br_startoff &&
1921 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 1791 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
1922 (!STATE_TEST(LEFT_CONTIG) || 1792 (!(state & BMAP_LEFT_CONTIG) ||
1923 (left.br_blockcount + new->br_blockcount + 1793 (left.br_blockcount + new->br_blockcount +
1924 right.br_blockcount <= MAXEXTLEN))); 1794 right.br_blockcount <= MAXEXTLEN)))
1795 state |= BMAP_RIGHT_CONTIG;
1796
1925 /* 1797 /*
1926 * Switch out based on the contiguity flags. 1798 * Switch out based on the contiguity flags.
1927 */ 1799 */
1928 switch (SWITCH_STATE) { 1800 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
1929 1801 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1930 case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
1931 /* 1802 /*
1932 * New allocation is contiguous with delayed allocations 1803 * New allocation is contiguous with delayed allocations
1933 * on the left and on the right. 1804 * on the left and on the right.
@@ -1935,8 +1806,8 @@ xfs_bmap_add_extent_hole_delay(
1935 */ 1806 */
1936 temp = left.br_blockcount + new->br_blockcount + 1807 temp = left.br_blockcount + new->br_blockcount +
1937 right.br_blockcount; 1808 right.br_blockcount;
1938 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, 1809
1939 XFS_DATA_FORK); 1810 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1940 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1811 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1941 oldlen = startblockval(left.br_startblock) + 1812 oldlen = startblockval(left.br_startblock) +
1942 startblockval(new->br_startblock) + 1813 startblockval(new->br_startblock) +
@@ -1944,53 +1815,52 @@ xfs_bmap_add_extent_hole_delay(
1944 newlen = xfs_bmap_worst_indlen(ip, temp); 1815 newlen = xfs_bmap_worst_indlen(ip, temp);
1945 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1816 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1946 nullstartblock((int)newlen)); 1817 nullstartblock((int)newlen));
1947 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, 1818 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1948 XFS_DATA_FORK); 1819
1949 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK); 1820 xfs_iext_remove(ip, idx, 1, state);
1950 xfs_iext_remove(ifp, idx, 1);
1951 ip->i_df.if_lastex = idx - 1; 1821 ip->i_df.if_lastex = idx - 1;
1952 /* DELTA: Two in-core extents were replaced by one. */ 1822 /* DELTA: Two in-core extents were replaced by one. */
1953 temp2 = temp; 1823 temp2 = temp;
1954 temp = left.br_startoff; 1824 temp = left.br_startoff;
1955 break; 1825 break;
1956 1826
1957 case MASK(LEFT_CONTIG): 1827 case BMAP_LEFT_CONTIG:
1958 /* 1828 /*
1959 * New allocation is contiguous with a delayed allocation 1829 * New allocation is contiguous with a delayed allocation
1960 * on the left. 1830 * on the left.
1961 * Merge the new allocation with the left neighbor. 1831 * Merge the new allocation with the left neighbor.
1962 */ 1832 */
1963 temp = left.br_blockcount + new->br_blockcount; 1833 temp = left.br_blockcount + new->br_blockcount;
1964 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, 1834 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1965 XFS_DATA_FORK);
1966 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1835 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1967 oldlen = startblockval(left.br_startblock) + 1836 oldlen = startblockval(left.br_startblock) +
1968 startblockval(new->br_startblock); 1837 startblockval(new->br_startblock);
1969 newlen = xfs_bmap_worst_indlen(ip, temp); 1838 newlen = xfs_bmap_worst_indlen(ip, temp);
1970 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1839 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1971 nullstartblock((int)newlen)); 1840 nullstartblock((int)newlen));
1972 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, 1841 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1973 XFS_DATA_FORK); 1842
1974 ip->i_df.if_lastex = idx - 1; 1843 ip->i_df.if_lastex = idx - 1;
1975 /* DELTA: One in-core extent grew into a hole. */ 1844 /* DELTA: One in-core extent grew into a hole. */
1976 temp2 = temp; 1845 temp2 = temp;
1977 temp = left.br_startoff; 1846 temp = left.br_startoff;
1978 break; 1847 break;
1979 1848
1980 case MASK(RIGHT_CONTIG): 1849 case BMAP_RIGHT_CONTIG:
1981 /* 1850 /*
1982 * New allocation is contiguous with a delayed allocation 1851 * New allocation is contiguous with a delayed allocation
1983 * on the right. 1852 * on the right.
1984 * Merge the new allocation with the right neighbor. 1853 * Merge the new allocation with the right neighbor.
1985 */ 1854 */
1986 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK); 1855 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1987 temp = new->br_blockcount + right.br_blockcount; 1856 temp = new->br_blockcount + right.br_blockcount;
1988 oldlen = startblockval(new->br_startblock) + 1857 oldlen = startblockval(new->br_startblock) +
1989 startblockval(right.br_startblock); 1858 startblockval(right.br_startblock);
1990 newlen = xfs_bmap_worst_indlen(ip, temp); 1859 newlen = xfs_bmap_worst_indlen(ip, temp);
1991 xfs_bmbt_set_allf(ep, new->br_startoff, 1860 xfs_bmbt_set_allf(ep, new->br_startoff,
1992 nullstartblock((int)newlen), temp, right.br_state); 1861 nullstartblock((int)newlen), temp, right.br_state);
1993 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK); 1862 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1863
1994 ip->i_df.if_lastex = idx; 1864 ip->i_df.if_lastex = idx;
1995 /* DELTA: One in-core extent grew into a hole. */ 1865 /* DELTA: One in-core extent grew into a hole. */
1996 temp2 = temp; 1866 temp2 = temp;
@@ -2004,9 +1874,7 @@ xfs_bmap_add_extent_hole_delay(
2004 * Insert a new entry. 1874 * Insert a new entry.
2005 */ 1875 */
2006 oldlen = newlen = 0; 1876 oldlen = newlen = 0;
2007 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, 1877 xfs_iext_insert(ip, idx, 1, new, state);
2008 XFS_DATA_FORK);
2009 xfs_iext_insert(ifp, idx, 1, new);
2010 ip->i_df.if_lastex = idx; 1878 ip->i_df.if_lastex = idx;
2011 /* DELTA: A new in-core extent was added in a hole. */ 1879 /* DELTA: A new in-core extent was added in a hole. */
2012 temp2 = new->br_blockcount; 1880 temp2 = new->br_blockcount;
@@ -2030,12 +1898,6 @@ xfs_bmap_add_extent_hole_delay(
2030 } 1898 }
2031 *logflagsp = 0; 1899 *logflagsp = 0;
2032 return 0; 1900 return 0;
2033#undef MASK
2034#undef MASK2
2035#undef STATE_SET
2036#undef STATE_TEST
2037#undef STATE_SET_TEST
2038#undef SWITCH_STATE
2039} 1901}
2040 1902
2041/* 1903/*
@@ -2062,83 +1924,75 @@ xfs_bmap_add_extent_hole_real(
2062 int state; /* state bits, accessed thru macros */ 1924 int state; /* state bits, accessed thru macros */
2063 xfs_filblks_t temp=0; 1925 xfs_filblks_t temp=0;
2064 xfs_filblks_t temp2=0; 1926 xfs_filblks_t temp2=0;
2065 enum { /* bit number definitions for state */
2066 LEFT_CONTIG, RIGHT_CONTIG,
2067 LEFT_DELAY, RIGHT_DELAY,
2068 LEFT_VALID, RIGHT_VALID
2069 };
2070
2071#define MASK(b) (1 << (b))
2072#define MASK2(a,b) (MASK(a) | MASK(b))
2073#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
2074#define STATE_TEST(b) (state & MASK(b))
2075#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
2076 ((state &= ~MASK(b)), 0))
2077#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
2078 1927
2079 ifp = XFS_IFORK_PTR(ip, whichfork); 1928 ifp = XFS_IFORK_PTR(ip, whichfork);
2080 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 1929 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
2081 ep = xfs_iext_get_ext(ifp, idx); 1930 ep = xfs_iext_get_ext(ifp, idx);
2082 state = 0; 1931 state = 0;
1932
1933 if (whichfork == XFS_ATTR_FORK)
1934 state |= BMAP_ATTRFORK;
1935
2083 /* 1936 /*
2084 * Check and set flags if this segment has a left neighbor. 1937 * Check and set flags if this segment has a left neighbor.
2085 */ 1938 */
2086 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 1939 if (idx > 0) {
1940 state |= BMAP_LEFT_VALID;
2087 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); 1941 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
2088 STATE_SET(LEFT_DELAY, isnullstartblock(left.br_startblock)); 1942 if (isnullstartblock(left.br_startblock))
1943 state |= BMAP_LEFT_DELAY;
2089 } 1944 }
1945
2090 /* 1946 /*
2091 * Check and set flags if this segment has a current value. 1947 * Check and set flags if this segment has a current value.
2092 * Not true if we're inserting into the "hole" at eof. 1948 * Not true if we're inserting into the "hole" at eof.
2093 */ 1949 */
2094 if (STATE_SET_TEST(RIGHT_VALID, 1950 if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2095 idx < 1951 state |= BMAP_RIGHT_VALID;
2096 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
2097 xfs_bmbt_get_all(ep, &right); 1952 xfs_bmbt_get_all(ep, &right);
2098 STATE_SET(RIGHT_DELAY, isnullstartblock(right.br_startblock)); 1953 if (isnullstartblock(right.br_startblock))
1954 state |= BMAP_RIGHT_DELAY;
2099 } 1955 }
1956
2100 /* 1957 /*
2101 * We're inserting a real allocation between "left" and "right". 1958 * We're inserting a real allocation between "left" and "right".
2102 * Set the contiguity flags. Don't let extents get too large. 1959 * Set the contiguity flags. Don't let extents get too large.
2103 */ 1960 */
2104 STATE_SET(LEFT_CONTIG, 1961 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2105 STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && 1962 left.br_startoff + left.br_blockcount == new->br_startoff &&
2106 left.br_startoff + left.br_blockcount == new->br_startoff && 1963 left.br_startblock + left.br_blockcount == new->br_startblock &&
2107 left.br_startblock + left.br_blockcount == new->br_startblock && 1964 left.br_state == new->br_state &&
2108 left.br_state == new->br_state && 1965 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2109 left.br_blockcount + new->br_blockcount <= MAXEXTLEN); 1966 state |= BMAP_LEFT_CONTIG;
2110 STATE_SET(RIGHT_CONTIG, 1967
2111 STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && 1968 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2112 new->br_startoff + new->br_blockcount == right.br_startoff && 1969 new->br_startoff + new->br_blockcount == right.br_startoff &&
2113 new->br_startblock + new->br_blockcount == 1970 new->br_startblock + new->br_blockcount == right.br_startblock &&
2114 right.br_startblock && 1971 new->br_state == right.br_state &&
2115 new->br_state == right.br_state && 1972 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2116 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 1973 (!(state & BMAP_LEFT_CONTIG) ||
2117 (!STATE_TEST(LEFT_CONTIG) || 1974 left.br_blockcount + new->br_blockcount +
2118 left.br_blockcount + new->br_blockcount + 1975 right.br_blockcount <= MAXEXTLEN))
2119 right.br_blockcount <= MAXEXTLEN)); 1976 state |= BMAP_RIGHT_CONTIG;
2120 1977
2121 error = 0; 1978 error = 0;
2122 /* 1979 /*
2123 * Select which case we're in here, and implement it. 1980 * Select which case we're in here, and implement it.
2124 */ 1981 */
2125 switch (SWITCH_STATE) { 1982 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2126 1983 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2127 case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
2128 /* 1984 /*
2129 * New allocation is contiguous with real allocations on the 1985 * New allocation is contiguous with real allocations on the
2130 * left and on the right. 1986 * left and on the right.
2131 * Merge all three into a single extent record. 1987 * Merge all three into a single extent record.
2132 */ 1988 */
2133 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, 1989 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
2134 whichfork);
2135 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1990 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2136 left.br_blockcount + new->br_blockcount + 1991 left.br_blockcount + new->br_blockcount +
2137 right.br_blockcount); 1992 right.br_blockcount);
2138 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, 1993 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
2139 whichfork); 1994
2140 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork); 1995 xfs_iext_remove(ip, idx, 1, state);
2141 xfs_iext_remove(ifp, idx, 1);
2142 ifp->if_lastex = idx - 1; 1996 ifp->if_lastex = idx - 1;
2143 XFS_IFORK_NEXT_SET(ip, whichfork, 1997 XFS_IFORK_NEXT_SET(ip, whichfork,
2144 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 1998 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -2173,16 +2027,17 @@ xfs_bmap_add_extent_hole_real(
2173 right.br_blockcount; 2027 right.br_blockcount;
2174 break; 2028 break;
2175 2029
2176 case MASK(LEFT_CONTIG): 2030 case BMAP_LEFT_CONTIG:
2177 /* 2031 /*
2178 * New allocation is contiguous with a real allocation 2032 * New allocation is contiguous with a real allocation
2179 * on the left. 2033 * on the left.
2180 * Merge the new allocation with the left neighbor. 2034 * Merge the new allocation with the left neighbor.
2181 */ 2035 */
2182 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork); 2036 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
2183 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 2037 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2184 left.br_blockcount + new->br_blockcount); 2038 left.br_blockcount + new->br_blockcount);
2185 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork); 2039 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
2040
2186 ifp->if_lastex = idx - 1; 2041 ifp->if_lastex = idx - 1;
2187 if (cur == NULL) { 2042 if (cur == NULL) {
2188 rval = xfs_ilog_fext(whichfork); 2043 rval = xfs_ilog_fext(whichfork);
@@ -2207,17 +2062,18 @@ xfs_bmap_add_extent_hole_real(
2207 new->br_blockcount; 2062 new->br_blockcount;
2208 break; 2063 break;
2209 2064
2210 case MASK(RIGHT_CONTIG): 2065 case BMAP_RIGHT_CONTIG:
2211 /* 2066 /*
2212 * New allocation is contiguous with a real allocation 2067 * New allocation is contiguous with a real allocation
2213 * on the right. 2068 * on the right.
2214 * Merge the new allocation with the right neighbor. 2069 * Merge the new allocation with the right neighbor.
2215 */ 2070 */
2216 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork); 2071 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
2217 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, 2072 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
2218 new->br_blockcount + right.br_blockcount, 2073 new->br_blockcount + right.br_blockcount,
2219 right.br_state); 2074 right.br_state);
2220 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork); 2075 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
2076
2221 ifp->if_lastex = idx; 2077 ifp->if_lastex = idx;
2222 if (cur == NULL) { 2078 if (cur == NULL) {
2223 rval = xfs_ilog_fext(whichfork); 2079 rval = xfs_ilog_fext(whichfork);
@@ -2248,8 +2104,7 @@ xfs_bmap_add_extent_hole_real(
2248 * real allocation. 2104 * real allocation.
2249 * Insert a new entry. 2105 * Insert a new entry.
2250 */ 2106 */
2251 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork); 2107 xfs_iext_insert(ip, idx, 1, new, state);
2252 xfs_iext_insert(ifp, idx, 1, new);
2253 ifp->if_lastex = idx; 2108 ifp->if_lastex = idx;
2254 XFS_IFORK_NEXT_SET(ip, whichfork, 2109 XFS_IFORK_NEXT_SET(ip, whichfork,
2255 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2110 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
@@ -2283,12 +2138,6 @@ xfs_bmap_add_extent_hole_real(
2283done: 2138done:
2284 *logflagsp = rval; 2139 *logflagsp = rval;
2285 return error; 2140 return error;
2286#undef MASK
2287#undef MASK2
2288#undef STATE_SET
2289#undef STATE_TEST
2290#undef STATE_SET_TEST
2291#undef SWITCH_STATE
2292} 2141}
2293 2142
2294/* 2143/*
@@ -3115,8 +2964,13 @@ xfs_bmap_del_extent(
3115 uint qfield; /* quota field to update */ 2964 uint qfield; /* quota field to update */
3116 xfs_filblks_t temp; /* for indirect length calculations */ 2965 xfs_filblks_t temp; /* for indirect length calculations */
3117 xfs_filblks_t temp2; /* for indirect length calculations */ 2966 xfs_filblks_t temp2; /* for indirect length calculations */
2967 int state = 0;
3118 2968
3119 XFS_STATS_INC(xs_del_exlist); 2969 XFS_STATS_INC(xs_del_exlist);
2970
2971 if (whichfork == XFS_ATTR_FORK)
2972 state |= BMAP_ATTRFORK;
2973
3120 mp = ip->i_mount; 2974 mp = ip->i_mount;
3121 ifp = XFS_IFORK_PTR(ip, whichfork); 2975 ifp = XFS_IFORK_PTR(ip, whichfork);
3122 ASSERT((idx >= 0) && (idx < ifp->if_bytes / 2976 ASSERT((idx >= 0) && (idx < ifp->if_bytes /
@@ -3196,8 +3050,8 @@ xfs_bmap_del_extent(
3196 /* 3050 /*
3197 * Matches the whole extent. Delete the entry. 3051 * Matches the whole extent. Delete the entry.
3198 */ 3052 */
3199 XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork); 3053 xfs_iext_remove(ip, idx, 1,
3200 xfs_iext_remove(ifp, idx, 1); 3054 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
3201 ifp->if_lastex = idx; 3055 ifp->if_lastex = idx;
3202 if (delay) 3056 if (delay)
3203 break; 3057 break;
@@ -3217,7 +3071,7 @@ xfs_bmap_del_extent(
3217 /* 3071 /*
3218 * Deleting the first part of the extent. 3072 * Deleting the first part of the extent.
3219 */ 3073 */
3220 XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork); 3074 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
3221 xfs_bmbt_set_startoff(ep, del_endoff); 3075 xfs_bmbt_set_startoff(ep, del_endoff);
3222 temp = got.br_blockcount - del->br_blockcount; 3076 temp = got.br_blockcount - del->br_blockcount;
3223 xfs_bmbt_set_blockcount(ep, temp); 3077 xfs_bmbt_set_blockcount(ep, temp);
@@ -3226,13 +3080,12 @@ xfs_bmap_del_extent(
3226 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3080 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3227 da_old); 3081 da_old);
3228 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 3082 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
3229 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, 3083 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3230 whichfork);
3231 da_new = temp; 3084 da_new = temp;
3232 break; 3085 break;
3233 } 3086 }
3234 xfs_bmbt_set_startblock(ep, del_endblock); 3087 xfs_bmbt_set_startblock(ep, del_endblock);
3235 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork); 3088 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3236 if (!cur) { 3089 if (!cur) {
3237 flags |= xfs_ilog_fext(whichfork); 3090 flags |= xfs_ilog_fext(whichfork);
3238 break; 3091 break;
@@ -3248,19 +3101,18 @@ xfs_bmap_del_extent(
3248 * Deleting the last part of the extent. 3101 * Deleting the last part of the extent.
3249 */ 3102 */
3250 temp = got.br_blockcount - del->br_blockcount; 3103 temp = got.br_blockcount - del->br_blockcount;
3251 XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork); 3104 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
3252 xfs_bmbt_set_blockcount(ep, temp); 3105 xfs_bmbt_set_blockcount(ep, temp);
3253 ifp->if_lastex = idx; 3106 ifp->if_lastex = idx;
3254 if (delay) { 3107 if (delay) {
3255 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3108 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3256 da_old); 3109 da_old);
3257 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 3110 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
3258 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, 3111 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3259 whichfork);
3260 da_new = temp; 3112 da_new = temp;
3261 break; 3113 break;
3262 } 3114 }
3263 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork); 3115 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3264 if (!cur) { 3116 if (!cur) {
3265 flags |= xfs_ilog_fext(whichfork); 3117 flags |= xfs_ilog_fext(whichfork);
3266 break; 3118 break;
@@ -3277,7 +3129,7 @@ xfs_bmap_del_extent(
3277 * Deleting the middle of the extent. 3129 * Deleting the middle of the extent.
3278 */ 3130 */
3279 temp = del->br_startoff - got.br_startoff; 3131 temp = del->br_startoff - got.br_startoff;
3280 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork); 3132 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
3281 xfs_bmbt_set_blockcount(ep, temp); 3133 xfs_bmbt_set_blockcount(ep, temp);
3282 new.br_startoff = del_endoff; 3134 new.br_startoff = del_endoff;
3283 temp2 = got_endoff - del_endoff; 3135 temp2 = got_endoff - del_endoff;
@@ -3364,10 +3216,8 @@ xfs_bmap_del_extent(
3364 } 3216 }
3365 } 3217 }
3366 } 3218 }
3367 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork); 3219 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3368 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL, 3220 xfs_iext_insert(ip, idx + 1, 1, &new, state);
3369 whichfork);
3370 xfs_iext_insert(ifp, idx + 1, 1, &new);
3371 ifp->if_lastex = idx + 1; 3221 ifp->if_lastex = idx + 1;
3372 break; 3222 break;
3373 } 3223 }
@@ -3687,7 +3537,9 @@ xfs_bmap_local_to_extents(
3687 xfs_iext_add(ifp, 0, 1); 3537 xfs_iext_add(ifp, 0, 1);
3688 ep = xfs_iext_get_ext(ifp, 0); 3538 ep = xfs_iext_get_ext(ifp, 0);
3689 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); 3539 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3690 XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork); 3540 trace_xfs_bmap_post_update(ip, 0,
3541 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
3542 _THIS_IP_);
3691 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 3543 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
3692 ip->i_d.di_nblocks = 1; 3544 ip->i_d.di_nblocks = 1;
3693 xfs_trans_mod_dquot_byino(tp, ip, 3545 xfs_trans_mod_dquot_byino(tp, ip,
@@ -3800,158 +3652,6 @@ xfs_bmap_search_extents(
3800 return ep; 3652 return ep;
3801} 3653}
3802 3654
3803
3804#ifdef XFS_BMAP_TRACE
3805ktrace_t *xfs_bmap_trace_buf;
3806
3807/*
3808 * Add a bmap trace buffer entry. Base routine for the others.
3809 */
3810STATIC void
3811xfs_bmap_trace_addentry(
3812 int opcode, /* operation */
3813 const char *fname, /* function name */
3814 char *desc, /* operation description */
3815 xfs_inode_t *ip, /* incore inode pointer */
3816 xfs_extnum_t idx, /* index of entry(ies) */
3817 xfs_extnum_t cnt, /* count of entries, 1 or 2 */
3818 xfs_bmbt_rec_host_t *r1, /* first record */
3819 xfs_bmbt_rec_host_t *r2, /* second record or null */
3820 int whichfork) /* data or attr fork */
3821{
3822 xfs_bmbt_rec_host_t tr2;
3823
3824 ASSERT(cnt == 1 || cnt == 2);
3825 ASSERT(r1 != NULL);
3826 if (cnt == 1) {
3827 ASSERT(r2 == NULL);
3828 r2 = &tr2;
3829 memset(&tr2, 0, sizeof(tr2));
3830 } else
3831 ASSERT(r2 != NULL);
3832 ktrace_enter(xfs_bmap_trace_buf,
3833 (void *)(__psint_t)(opcode | (whichfork << 16)),
3834 (void *)fname, (void *)desc, (void *)ip,
3835 (void *)(__psint_t)idx,
3836 (void *)(__psint_t)cnt,
3837 (void *)(__psunsigned_t)(ip->i_ino >> 32),
3838 (void *)(__psunsigned_t)(unsigned)ip->i_ino,
3839 (void *)(__psunsigned_t)(r1->l0 >> 32),
3840 (void *)(__psunsigned_t)(unsigned)(r1->l0),
3841 (void *)(__psunsigned_t)(r1->l1 >> 32),
3842 (void *)(__psunsigned_t)(unsigned)(r1->l1),
3843 (void *)(__psunsigned_t)(r2->l0 >> 32),
3844 (void *)(__psunsigned_t)(unsigned)(r2->l0),
3845 (void *)(__psunsigned_t)(r2->l1 >> 32),
3846 (void *)(__psunsigned_t)(unsigned)(r2->l1)
3847 );
3848 ASSERT(ip->i_xtrace);
3849 ktrace_enter(ip->i_xtrace,
3850 (void *)(__psint_t)(opcode | (whichfork << 16)),
3851 (void *)fname, (void *)desc, (void *)ip,
3852 (void *)(__psint_t)idx,
3853 (void *)(__psint_t)cnt,
3854 (void *)(__psunsigned_t)(ip->i_ino >> 32),
3855 (void *)(__psunsigned_t)(unsigned)ip->i_ino,
3856 (void *)(__psunsigned_t)(r1->l0 >> 32),
3857 (void *)(__psunsigned_t)(unsigned)(r1->l0),
3858 (void *)(__psunsigned_t)(r1->l1 >> 32),
3859 (void *)(__psunsigned_t)(unsigned)(r1->l1),
3860 (void *)(__psunsigned_t)(r2->l0 >> 32),
3861 (void *)(__psunsigned_t)(unsigned)(r2->l0),
3862 (void *)(__psunsigned_t)(r2->l1 >> 32),
3863 (void *)(__psunsigned_t)(unsigned)(r2->l1)
3864 );
3865}
3866
3867/*
3868 * Add bmap trace entry prior to a call to xfs_iext_remove.
3869 */
3870STATIC void
3871xfs_bmap_trace_delete(
3872 const char *fname, /* function name */
3873 char *desc, /* operation description */
3874 xfs_inode_t *ip, /* incore inode pointer */
3875 xfs_extnum_t idx, /* index of entry(entries) deleted */
3876 xfs_extnum_t cnt, /* count of entries deleted, 1 or 2 */
3877 int whichfork) /* data or attr fork */
3878{
3879 xfs_ifork_t *ifp; /* inode fork pointer */
3880
3881 ifp = XFS_IFORK_PTR(ip, whichfork);
3882 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_DELETE, fname, desc, ip, idx,
3883 cnt, xfs_iext_get_ext(ifp, idx),
3884 cnt == 2 ? xfs_iext_get_ext(ifp, idx + 1) : NULL,
3885 whichfork);
3886}
3887
3888/*
3889 * Add bmap trace entry prior to a call to xfs_iext_insert, or
3890 * reading in the extents list from the disk (in the btree).
3891 */
3892STATIC void
3893xfs_bmap_trace_insert(
3894 const char *fname, /* function name */
3895 char *desc, /* operation description */
3896 xfs_inode_t *ip, /* incore inode pointer */
3897 xfs_extnum_t idx, /* index of entry(entries) inserted */
3898 xfs_extnum_t cnt, /* count of entries inserted, 1 or 2 */
3899 xfs_bmbt_irec_t *r1, /* inserted record 1 */
3900 xfs_bmbt_irec_t *r2, /* inserted record 2 or null */
3901 int whichfork) /* data or attr fork */
3902{
3903 xfs_bmbt_rec_host_t tr1; /* compressed record 1 */
3904 xfs_bmbt_rec_host_t tr2; /* compressed record 2 if needed */
3905
3906 xfs_bmbt_set_all(&tr1, r1);
3907 if (cnt == 2) {
3908 ASSERT(r2 != NULL);
3909 xfs_bmbt_set_all(&tr2, r2);
3910 } else {
3911 ASSERT(cnt == 1);
3912 ASSERT(r2 == NULL);
3913 }
3914 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_INSERT, fname, desc, ip, idx,
3915 cnt, &tr1, cnt == 2 ? &tr2 : NULL, whichfork);
3916}
3917
3918/*
3919 * Add bmap trace entry after updating an extent record in place.
3920 */
3921STATIC void
3922xfs_bmap_trace_post_update(
3923 const char *fname, /* function name */
3924 char *desc, /* operation description */
3925 xfs_inode_t *ip, /* incore inode pointer */
3926 xfs_extnum_t idx, /* index of entry updated */
3927 int whichfork) /* data or attr fork */
3928{
3929 xfs_ifork_t *ifp; /* inode fork pointer */
3930
3931 ifp = XFS_IFORK_PTR(ip, whichfork);
3932 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_POST_UP, fname, desc, ip, idx,
3933 1, xfs_iext_get_ext(ifp, idx), NULL, whichfork);
3934}
3935
3936/*
3937 * Add bmap trace entry prior to updating an extent record in place.
3938 */
3939STATIC void
3940xfs_bmap_trace_pre_update(
3941 const char *fname, /* function name */
3942 char *desc, /* operation description */
3943 xfs_inode_t *ip, /* incore inode pointer */
3944 xfs_extnum_t idx, /* index of entry to be updated */
3945 int whichfork) /* data or attr fork */
3946{
3947 xfs_ifork_t *ifp; /* inode fork pointer */
3948
3949 ifp = XFS_IFORK_PTR(ip, whichfork);
3950 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_PRE_UP, fname, desc, ip, idx, 1,
3951 xfs_iext_get_ext(ifp, idx), NULL, whichfork);
3952}
3953#endif /* XFS_BMAP_TRACE */
3954
3955/* 3655/*
3956 * Compute the worst-case number of indirect blocks that will be used 3656 * Compute the worst-case number of indirect blocks that will be used
3957 * for ip's delayed extent of length "len". 3657 * for ip's delayed extent of length "len".
@@ -3983,37 +3683,6 @@ xfs_bmap_worst_indlen(
3983 return rval; 3683 return rval;
3984} 3684}
3985 3685
3986#if defined(XFS_RW_TRACE)
3987STATIC void
3988xfs_bunmap_trace(
3989 xfs_inode_t *ip,
3990 xfs_fileoff_t bno,
3991 xfs_filblks_t len,
3992 int flags,
3993 inst_t *ra)
3994{
3995 if (ip->i_rwtrace == NULL)
3996 return;
3997 ktrace_enter(ip->i_rwtrace,
3998 (void *)(__psint_t)XFS_BUNMAP,
3999 (void *)ip,
4000 (void *)(__psint_t)((ip->i_d.di_size >> 32) & 0xffffffff),
4001 (void *)(__psint_t)(ip->i_d.di_size & 0xffffffff),
4002 (void *)(__psint_t)(((xfs_dfiloff_t)bno >> 32) & 0xffffffff),
4003 (void *)(__psint_t)((xfs_dfiloff_t)bno & 0xffffffff),
4004 (void *)(__psint_t)len,
4005 (void *)(__psint_t)flags,
4006 (void *)(unsigned long)current_cpu(),
4007 (void *)ra,
4008 (void *)0,
4009 (void *)0,
4010 (void *)0,
4011 (void *)0,
4012 (void *)0,
4013 (void *)0);
4014}
4015#endif
4016
4017/* 3686/*
4018 * Convert inode from non-attributed to attributed. 3687 * Convert inode from non-attributed to attributed.
4019 * Must not be in a transaction, ip must not be locked. 3688 * Must not be in a transaction, ip must not be locked.
@@ -4702,34 +4371,30 @@ error0:
4702 return XFS_ERROR(EFSCORRUPTED); 4371 return XFS_ERROR(EFSCORRUPTED);
4703} 4372}
4704 4373
4705#ifdef XFS_BMAP_TRACE 4374#ifdef DEBUG
4706/* 4375/*
4707 * Add bmap trace insert entries for all the contents of the extent records. 4376 * Add bmap trace insert entries for all the contents of the extent records.
4708 */ 4377 */
4709void 4378void
4710xfs_bmap_trace_exlist( 4379xfs_bmap_trace_exlist(
4711 const char *fname, /* function name */
4712 xfs_inode_t *ip, /* incore inode pointer */ 4380 xfs_inode_t *ip, /* incore inode pointer */
4713 xfs_extnum_t cnt, /* count of entries in the list */ 4381 xfs_extnum_t cnt, /* count of entries in the list */
4714 int whichfork) /* data or attr fork */ 4382 int whichfork, /* data or attr fork */
4383 unsigned long caller_ip)
4715{ 4384{
4716 xfs_bmbt_rec_host_t *ep; /* current extent record */
4717 xfs_extnum_t idx; /* extent record index */ 4385 xfs_extnum_t idx; /* extent record index */
4718 xfs_ifork_t *ifp; /* inode fork pointer */ 4386 xfs_ifork_t *ifp; /* inode fork pointer */
4719 xfs_bmbt_irec_t s; /* file extent record */ 4387 int state = 0;
4388
4389 if (whichfork == XFS_ATTR_FORK)
4390 state |= BMAP_ATTRFORK;
4720 4391
4721 ifp = XFS_IFORK_PTR(ip, whichfork); 4392 ifp = XFS_IFORK_PTR(ip, whichfork);
4722 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 4393 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4723 for (idx = 0; idx < cnt; idx++) { 4394 for (idx = 0; idx < cnt; idx++)
4724 ep = xfs_iext_get_ext(ifp, idx); 4395 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
4725 xfs_bmbt_get_all(ep, &s);
4726 XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL,
4727 whichfork);
4728 }
4729} 4396}
4730#endif
4731 4397
4732#ifdef DEBUG
4733/* 4398/*
4734 * Validate that the bmbt_irecs being returned from bmapi are valid 4399 * Validate that the bmbt_irecs being returned from bmapi are valid
4735 * given the callers original parameters. Specifically check the 4400 * given the callers original parameters. Specifically check the
@@ -5478,7 +5143,8 @@ xfs_bunmapi(
5478 int rsvd; /* OK to allocate reserved blocks */ 5143 int rsvd; /* OK to allocate reserved blocks */
5479 xfs_fsblock_t sum; 5144 xfs_fsblock_t sum;
5480 5145
5481 xfs_bunmap_trace(ip, bno, len, flags, (inst_t *)__return_address); 5146 trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5147
5482 whichfork = (flags & XFS_BMAPI_ATTRFORK) ? 5148 whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5483 XFS_ATTR_FORK : XFS_DATA_FORK; 5149 XFS_ATTR_FORK : XFS_DATA_FORK;
5484 ifp = XFS_IFORK_PTR(ip, whichfork); 5150 ifp = XFS_IFORK_PTR(ip, whichfork);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 56f62d2edc3..419dafb9d87 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -95,6 +95,21 @@ typedef struct xfs_bmap_free
95 /* need write cache flushing and no */ 95 /* need write cache flushing and no */
96 /* additional allocation alignments */ 96 /* additional allocation alignments */
97 97
98#define XFS_BMAPI_FLAGS \
99 { XFS_BMAPI_WRITE, "WRITE" }, \
100 { XFS_BMAPI_DELAY, "DELAY" }, \
101 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
102 { XFS_BMAPI_METADATA, "METADATA" }, \
103 { XFS_BMAPI_EXACT, "EXACT" }, \
104 { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
105 { XFS_BMAPI_ASYNC, "ASYNC" }, \
106 { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \
107 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
108 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
109 { XFS_BMAPI_CONTIG, "CONTIG" }, \
110 { XFS_BMAPI_CONVERT, "CONVERT" }
111
112
98static inline int xfs_bmapi_aflag(int w) 113static inline int xfs_bmapi_aflag(int w)
99{ 114{
100 return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0); 115 return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0);
@@ -135,36 +150,43 @@ typedef struct xfs_bmalloca {
135 char conv; /* overwriting unwritten extents */ 150 char conv; /* overwriting unwritten extents */
136} xfs_bmalloca_t; 151} xfs_bmalloca_t;
137 152
138#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE)
139/* 153/*
140 * Trace operations for bmap extent tracing 154 * Flags for xfs_bmap_add_extent*.
141 */ 155 */
142#define XFS_BMAP_KTRACE_DELETE 1 156#define BMAP_LEFT_CONTIG (1 << 0)
143#define XFS_BMAP_KTRACE_INSERT 2 157#define BMAP_RIGHT_CONTIG (1 << 1)
144#define XFS_BMAP_KTRACE_PRE_UP 3 158#define BMAP_LEFT_FILLING (1 << 2)
145#define XFS_BMAP_KTRACE_POST_UP 4 159#define BMAP_RIGHT_FILLING (1 << 3)
146 160#define BMAP_LEFT_DELAY (1 << 4)
147#define XFS_BMAP_TRACE_SIZE 4096 /* size of global trace buffer */ 161#define BMAP_RIGHT_DELAY (1 << 5)
148#define XFS_BMAP_KTRACE_SIZE 32 /* size of per-inode trace buffer */ 162#define BMAP_LEFT_VALID (1 << 6)
149extern ktrace_t *xfs_bmap_trace_buf; 163#define BMAP_RIGHT_VALID (1 << 7)
164#define BMAP_ATTRFORK (1 << 8)
165
166#define XFS_BMAP_EXT_FLAGS \
167 { BMAP_LEFT_CONTIG, "LC" }, \
168 { BMAP_RIGHT_CONTIG, "RC" }, \
169 { BMAP_LEFT_FILLING, "LF" }, \
170 { BMAP_RIGHT_FILLING, "RF" }, \
171 { BMAP_ATTRFORK, "ATTR" }
150 172
151/* 173/*
152 * Add bmap trace insert entries for all the contents of the extent list. 174 * Add bmap trace insert entries for all the contents of the extent list.
175 *
176 * Quite excessive tracing. Only do this for debug builds.
153 */ 177 */
178#if defined(__KERNEL) && defined(DEBUG)
154void 179void
155xfs_bmap_trace_exlist( 180xfs_bmap_trace_exlist(
156 const char *fname, /* function name */
157 struct xfs_inode *ip, /* incore inode pointer */ 181 struct xfs_inode *ip, /* incore inode pointer */
158 xfs_extnum_t cnt, /* count of entries in list */ 182 xfs_extnum_t cnt, /* count of entries in list */
159 int whichfork); /* data or attr fork */ 183 int whichfork,
184 unsigned long caller_ip); /* data or attr fork */
160#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ 185#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
161 xfs_bmap_trace_exlist(__func__,ip,c,w) 186 xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
162 187#else
163#else /* __KERNEL__ && XFS_BMAP_TRACE */
164
165#define XFS_BMAP_TRACE_EXLIST(ip,c,w) 188#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
166 189#endif
167#endif /* __KERNEL__ && XFS_BMAP_TRACE */
168 190
169/* 191/*
170 * Convert inode from non-attributed to attributed. 192 * Convert inode from non-attributed to attributed.
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index eb7b702d069..38751d5fac6 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -98,8 +98,7 @@ xfs_bmdr_to_bmbt(
98 * This code must be in sync with the routines xfs_bmbt_get_startoff, 98 * This code must be in sync with the routines xfs_bmbt_get_startoff,
99 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. 99 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
100 */ 100 */
101 101STATIC void
102STATIC_INLINE void
103__xfs_bmbt_get_all( 102__xfs_bmbt_get_all(
104 __uint64_t l0, 103 __uint64_t l0,
105 __uint64_t l1, 104 __uint64_t l1,
@@ -769,12 +768,6 @@ xfs_bmbt_trace_enter(
769 (void *)a0, (void *)a1, (void *)a2, (void *)a3, 768 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
770 (void *)a4, (void *)a5, (void *)a6, (void *)a7, 769 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
771 (void *)a8, (void *)a9, (void *)a10); 770 (void *)a8, (void *)a9, (void *)a10);
772 ktrace_enter(ip->i_btrace,
773 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
774 (void *)func, (void *)s, (void *)ip, (void *)cur,
775 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
776 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
777 (void *)a8, (void *)a9, (void *)a10);
778} 771}
779 772
780STATIC void 773STATIC void
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 5549d495947..cf07ca7c22e 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -46,20 +46,12 @@ typedef struct xfs_bmdr_block {
46#define BMBT_STARTBLOCK_BITLEN 52 46#define BMBT_STARTBLOCK_BITLEN 52
47#define BMBT_BLOCKCOUNT_BITLEN 21 47#define BMBT_BLOCKCOUNT_BITLEN 21
48 48
49 49typedef struct xfs_bmbt_rec {
50#define BMBT_USE_64 1
51
52typedef struct xfs_bmbt_rec_32
53{
54 __uint32_t l0, l1, l2, l3;
55} xfs_bmbt_rec_32_t;
56typedef struct xfs_bmbt_rec_64
57{
58 __be64 l0, l1; 50 __be64 l0, l1;
59} xfs_bmbt_rec_64_t; 51} xfs_bmbt_rec_t;
60 52
61typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ 53typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
62typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t; 54typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
63 55
64typedef struct xfs_bmbt_rec_host { 56typedef struct xfs_bmbt_rec_host {
65 __uint64_t l0, l1; 57 __uint64_t l0, l1;
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 52b5f14d0c3..36a0992dd66 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -39,6 +39,7 @@
39#include "xfs_btree_trace.h" 39#include "xfs_btree_trace.h"
40#include "xfs_ialloc.h" 40#include "xfs_ialloc.h"
41#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_trace.h"
42 43
43/* 44/*
44 * Cursor allocation zone. 45 * Cursor allocation zone.
@@ -81,7 +82,7 @@ xfs_btree_check_lblock(
81 XFS_ERRTAG_BTREE_CHECK_LBLOCK, 82 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
82 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 83 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
83 if (bp) 84 if (bp)
84 xfs_buftrace("LBTREE ERROR", bp); 85 trace_xfs_btree_corrupt(bp, _RET_IP_);
85 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, 86 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
86 mp); 87 mp);
87 return XFS_ERROR(EFSCORRUPTED); 88 return XFS_ERROR(EFSCORRUPTED);
@@ -119,7 +120,7 @@ xfs_btree_check_sblock(
119 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 120 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 121 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 if (bp) 122 if (bp)
122 xfs_buftrace("SBTREE ERROR", bp); 123 trace_xfs_btree_corrupt(bp, _RET_IP_);
123 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", 124 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124 XFS_ERRLEVEL_LOW, cur->bc_mp, block); 125 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125 return XFS_ERROR(EFSCORRUPTED); 126 return XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
index b3f5eb3c3c6..2d8a309873e 100644
--- a/fs/xfs/xfs_btree_trace.h
+++ b/fs/xfs/xfs_btree_trace.h
@@ -58,8 +58,6 @@ void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
58 struct xfs_buf *, int, int); 58 struct xfs_buf *, int, int);
59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, 59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
60 struct xfs_buf *, int, int, int); 60 struct xfs_buf *, int, int, int);
61void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *,
62 xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int);
63void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); 61void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
64void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, 62void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
65 union xfs_btree_ptr, union xfs_btree_key *, int); 63 union xfs_btree_ptr, union xfs_btree_key *, int);
@@ -71,24 +69,10 @@ void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
71 union xfs_btree_rec *, int); 69 union xfs_btree_rec *, int);
72void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); 70void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
73 71
74
75#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */
76extern ktrace_t *xfs_allocbt_trace_buf;
77
78#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */
79extern ktrace_t *xfs_inobt_trace_buf;
80
81#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
82#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
83extern ktrace_t *xfs_bmbt_trace_buf;
84
85
86#define XFS_BTREE_TRACE_ARGBI(c, b, i) \ 72#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
87 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) 73 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
88#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ 74#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
89 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) 75 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
90#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \
91 xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
92#define XFS_BTREE_TRACE_ARGI(c, i) \ 76#define XFS_BTREE_TRACE_ARGI(c, i) \
93 xfs_btree_trace_argi(__func__, c, i, __LINE__) 77 xfs_btree_trace_argi(__func__, c, i, __LINE__)
94#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ 78#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
@@ -104,7 +88,6 @@ extern ktrace_t *xfs_bmbt_trace_buf;
104#else 88#else
105#define XFS_BTREE_TRACE_ARGBI(c, b, i) 89#define XFS_BTREE_TRACE_ARGBI(c, b, i)
106#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) 90#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
107#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j)
108#define XFS_BTREE_TRACE_ARGI(c, i) 91#define XFS_BTREE_TRACE_ARGI(c, i)
109#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) 92#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
110#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) 93#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 92af4098c7e..a30f7e9eb2b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -29,6 +29,7 @@
29#include "xfs_buf_item.h" 29#include "xfs_buf_item.h"
30#include "xfs_trans_priv.h" 30#include "xfs_trans_priv.h"
31#include "xfs_error.h" 31#include "xfs_error.h"
32#include "xfs_trace.h"
32 33
33 34
34kmem_zone_t *xfs_buf_item_zone; 35kmem_zone_t *xfs_buf_item_zone;
@@ -164,7 +165,7 @@ xfs_buf_item_size(
164 * is the buf log format structure with the 165 * is the buf log format structure with the
165 * cancel flag in it. 166 * cancel flag in it.
166 */ 167 */
167 xfs_buf_item_trace("SIZE STALE", bip); 168 trace_xfs_buf_item_size_stale(bip);
168 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 169 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
169 return 1; 170 return 1;
170 } 171 }
@@ -206,7 +207,7 @@ xfs_buf_item_size(
206 } 207 }
207 } 208 }
208 209
209 xfs_buf_item_trace("SIZE NORM", bip); 210 trace_xfs_buf_item_size(bip);
210 return nvecs; 211 return nvecs;
211} 212}
212 213
@@ -259,7 +260,7 @@ xfs_buf_item_format(
259 * is the buf log format structure with the 260 * is the buf log format structure with the
260 * cancel flag in it. 261 * cancel flag in it.
261 */ 262 */
262 xfs_buf_item_trace("FORMAT STALE", bip); 263 trace_xfs_buf_item_format_stale(bip);
263 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 264 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
264 bip->bli_format.blf_size = nvecs; 265 bip->bli_format.blf_size = nvecs;
265 return; 266 return;
@@ -335,7 +336,7 @@ xfs_buf_item_format(
335 /* 336 /*
336 * Check to make sure everything is consistent. 337 * Check to make sure everything is consistent.
337 */ 338 */
338 xfs_buf_item_trace("FORMAT NORM", bip); 339 trace_xfs_buf_item_format(bip);
339 xfs_buf_item_log_check(bip); 340 xfs_buf_item_log_check(bip);
340} 341}
341 342
@@ -355,8 +356,7 @@ xfs_buf_item_pin(
355 ASSERT(atomic_read(&bip->bli_refcount) > 0); 356 ASSERT(atomic_read(&bip->bli_refcount) > 0);
356 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 357 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
357 (bip->bli_flags & XFS_BLI_STALE)); 358 (bip->bli_flags & XFS_BLI_STALE));
358 xfs_buf_item_trace("PIN", bip); 359 trace_xfs_buf_item_pin(bip);
359 xfs_buftrace("XFS_PIN", bp);
360 xfs_bpin(bp); 360 xfs_bpin(bp);
361} 361}
362 362
@@ -383,8 +383,7 @@ xfs_buf_item_unpin(
383 ASSERT(bp != NULL); 383 ASSERT(bp != NULL);
384 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); 384 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
385 ASSERT(atomic_read(&bip->bli_refcount) > 0); 385 ASSERT(atomic_read(&bip->bli_refcount) > 0);
386 xfs_buf_item_trace("UNPIN", bip); 386 trace_xfs_buf_item_unpin(bip);
387 xfs_buftrace("XFS_UNPIN", bp);
388 387
389 freed = atomic_dec_and_test(&bip->bli_refcount); 388 freed = atomic_dec_and_test(&bip->bli_refcount);
390 ailp = bip->bli_item.li_ailp; 389 ailp = bip->bli_item.li_ailp;
@@ -395,8 +394,8 @@ xfs_buf_item_unpin(
395 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 394 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
396 ASSERT(XFS_BUF_ISSTALE(bp)); 395 ASSERT(XFS_BUF_ISSTALE(bp));
397 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 396 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
398 xfs_buf_item_trace("UNPIN STALE", bip); 397 trace_xfs_buf_item_unpin_stale(bip);
399 xfs_buftrace("XFS_UNPIN STALE", bp); 398
400 /* 399 /*
401 * If we get called here because of an IO error, we may 400 * If we get called here because of an IO error, we may
402 * or may not have the item on the AIL. xfs_trans_ail_delete() 401 * or may not have the item on the AIL. xfs_trans_ail_delete()
@@ -440,8 +439,8 @@ xfs_buf_item_unpin_remove(
440 if ((atomic_read(&bip->bli_refcount) == 1) && 439 if ((atomic_read(&bip->bli_refcount) == 1) &&
441 (bip->bli_flags & XFS_BLI_STALE)) { 440 (bip->bli_flags & XFS_BLI_STALE)) {
442 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); 441 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
443 xfs_buf_item_trace("UNPIN REMOVE", bip); 442 trace_xfs_buf_item_unpin_stale(bip);
444 xfs_buftrace("XFS_UNPIN_REMOVE", bp); 443
445 /* 444 /*
446 * yes -- clear the xaction descriptor in-use flag 445 * yes -- clear the xaction descriptor in-use flag
447 * and free the chunk if required. We can safely 446 * and free the chunk if required. We can safely
@@ -495,7 +494,7 @@ xfs_buf_item_trylock(
495 XFS_BUF_HOLD(bp); 494 XFS_BUF_HOLD(bp);
496 495
497 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 496 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
498 xfs_buf_item_trace("TRYLOCK SUCCESS", bip); 497 trace_xfs_buf_item_trylock(bip);
499 return XFS_ITEM_SUCCESS; 498 return XFS_ITEM_SUCCESS;
500} 499}
501 500
@@ -524,7 +523,6 @@ xfs_buf_item_unlock(
524 uint hold; 523 uint hold;
525 524
526 bp = bip->bli_buf; 525 bp = bip->bli_buf;
527 xfs_buftrace("XFS_UNLOCK", bp);
528 526
529 /* 527 /*
530 * Clear the buffer's association with this transaction. 528 * Clear the buffer's association with this transaction.
@@ -547,7 +545,7 @@ xfs_buf_item_unlock(
547 */ 545 */
548 if (bip->bli_flags & XFS_BLI_STALE) { 546 if (bip->bli_flags & XFS_BLI_STALE) {
549 bip->bli_flags &= ~XFS_BLI_LOGGED; 547 bip->bli_flags &= ~XFS_BLI_LOGGED;
550 xfs_buf_item_trace("UNLOCK STALE", bip); 548 trace_xfs_buf_item_unlock_stale(bip);
551 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 549 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
552 if (!aborted) 550 if (!aborted)
553 return; 551 return;
@@ -574,7 +572,7 @@ xfs_buf_item_unlock(
574 * release the buffer at the end of this routine. 572 * release the buffer at the end of this routine.
575 */ 573 */
576 hold = bip->bli_flags & XFS_BLI_HOLD; 574 hold = bip->bli_flags & XFS_BLI_HOLD;
577 xfs_buf_item_trace("UNLOCK", bip); 575 trace_xfs_buf_item_unlock(bip);
578 576
579 /* 577 /*
580 * If the buf item isn't tracking any data, free it. 578 * If the buf item isn't tracking any data, free it.
@@ -618,7 +616,8 @@ xfs_buf_item_committed(
618 xfs_buf_log_item_t *bip, 616 xfs_buf_log_item_t *bip,
619 xfs_lsn_t lsn) 617 xfs_lsn_t lsn)
620{ 618{
621 xfs_buf_item_trace("COMMITTED", bip); 619 trace_xfs_buf_item_committed(bip);
620
622 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 621 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
623 (bip->bli_item.li_lsn != 0)) { 622 (bip->bli_item.li_lsn != 0)) {
624 return bip->bli_item.li_lsn; 623 return bip->bli_item.li_lsn;
@@ -640,7 +639,7 @@ xfs_buf_item_push(
640 xfs_buf_t *bp; 639 xfs_buf_t *bp;
641 640
642 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 641 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
643 xfs_buf_item_trace("PUSH", bip); 642 trace_xfs_buf_item_push(bip);
644 643
645 bp = bip->bli_buf; 644 bp = bip->bli_buf;
646 645
@@ -738,9 +737,6 @@ xfs_buf_item_init(
738 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); 737 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
739 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); 738 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
740 bip->bli_format.blf_map_size = map_size; 739 bip->bli_format.blf_map_size = map_size;
741#ifdef XFS_BLI_TRACE
742 bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS);
743#endif
744 740
745#ifdef XFS_TRANS_DEBUG 741#ifdef XFS_TRANS_DEBUG
746 /* 742 /*
@@ -878,9 +874,6 @@ xfs_buf_item_free(
878 kmem_free(bip->bli_logged); 874 kmem_free(bip->bli_logged);
879#endif /* XFS_TRANS_DEBUG */ 875#endif /* XFS_TRANS_DEBUG */
880 876
881#ifdef XFS_BLI_TRACE
882 ktrace_free(bip->bli_trace);
883#endif
884 kmem_zone_free(xfs_buf_item_zone, bip); 877 kmem_zone_free(xfs_buf_item_zone, bip);
885} 878}
886 879
@@ -897,7 +890,8 @@ xfs_buf_item_relse(
897{ 890{
898 xfs_buf_log_item_t *bip; 891 xfs_buf_log_item_t *bip;
899 892
900 xfs_buftrace("XFS_RELSE", bp); 893 trace_xfs_buf_item_relse(bp, _RET_IP_);
894
901 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 895 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
902 XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); 896 XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
903 if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && 897 if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
@@ -994,7 +988,7 @@ xfs_buf_iodone_callbacks(
994 if (XFS_FORCED_SHUTDOWN(mp)) { 988 if (XFS_FORCED_SHUTDOWN(mp)) {
995 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); 989 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
996 XFS_BUF_SUPER_STALE(bp); 990 XFS_BUF_SUPER_STALE(bp);
997 xfs_buftrace("BUF_IODONE_CB", bp); 991 trace_xfs_buf_item_iodone(bp, _RET_IP_);
998 xfs_buf_do_callbacks(bp, lip); 992 xfs_buf_do_callbacks(bp, lip);
999 XFS_BUF_SET_FSPRIVATE(bp, NULL); 993 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1000 XFS_BUF_CLR_IODONE_FUNC(bp); 994 XFS_BUF_CLR_IODONE_FUNC(bp);
@@ -1030,7 +1024,7 @@ xfs_buf_iodone_callbacks(
1030 XFS_BUF_SET_START(bp); 1024 XFS_BUF_SET_START(bp);
1031 } 1025 }
1032 ASSERT(XFS_BUF_IODONE_FUNC(bp)); 1026 ASSERT(XFS_BUF_IODONE_FUNC(bp));
1033 xfs_buftrace("BUF_IODONE ASYNC", bp); 1027 trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
1034 xfs_buf_relse(bp); 1028 xfs_buf_relse(bp);
1035 } else { 1029 } else {
1036 /* 1030 /*
@@ -1053,9 +1047,7 @@ xfs_buf_iodone_callbacks(
1053 } 1047 }
1054 return; 1048 return;
1055 } 1049 }
1056#ifdef XFSERRORDEBUG 1050
1057 xfs_buftrace("XFS BUFCB NOERR", bp);
1058#endif
1059 xfs_buf_do_callbacks(bp, lip); 1051 xfs_buf_do_callbacks(bp, lip);
1060 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1052 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1061 XFS_BUF_CLR_IODONE_FUNC(bp); 1053 XFS_BUF_CLR_IODONE_FUNC(bp);
@@ -1081,7 +1073,9 @@ xfs_buf_error_relse(
1081 XFS_BUF_DONE(bp); 1073 XFS_BUF_DONE(bp);
1082 XFS_BUF_UNDELAYWRITE(bp); 1074 XFS_BUF_UNDELAYWRITE(bp);
1083 XFS_BUF_ERROR(bp,0); 1075 XFS_BUF_ERROR(bp,0);
1084 xfs_buftrace("BUF_ERROR_RELSE", bp); 1076
1077 trace_xfs_buf_error_relse(bp, _RET_IP_);
1078
1085 if (! XFS_FORCED_SHUTDOWN(mp)) 1079 if (! XFS_FORCED_SHUTDOWN(mp))
1086 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1080 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1087 /* 1081 /*
@@ -1128,34 +1122,3 @@ xfs_buf_iodone(
1128 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); 1122 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
1129 xfs_buf_item_free(bip); 1123 xfs_buf_item_free(bip);
1130} 1124}
1131
1132#if defined(XFS_BLI_TRACE)
1133void
1134xfs_buf_item_trace(
1135 char *id,
1136 xfs_buf_log_item_t *bip)
1137{
1138 xfs_buf_t *bp;
1139 ASSERT(bip->bli_trace != NULL);
1140
1141 bp = bip->bli_buf;
1142 ktrace_enter(bip->bli_trace,
1143 (void *)id,
1144 (void *)bip->bli_buf,
1145 (void *)((unsigned long)bip->bli_flags),
1146 (void *)((unsigned long)bip->bli_recur),
1147 (void *)((unsigned long)atomic_read(&bip->bli_refcount)),
1148 (void *)((unsigned long)
1149 (0xFFFFFFFF & XFS_BUF_ADDR(bp) >> 32)),
1150 (void *)((unsigned long)(0xFFFFFFFF & XFS_BUF_ADDR(bp))),
1151 (void *)((unsigned long)XFS_BUF_COUNT(bp)),
1152 (void *)((unsigned long)XFS_BUF_BFLAGS(bp)),
1153 XFS_BUF_FSPRIVATE(bp, void *),
1154 XFS_BUF_FSPRIVATE2(bp, void *),
1155 (void *)(unsigned long)XFS_BUF_ISPINNED(bp),
1156 (void *)XFS_BUF_IODONE_FUNC(bp),
1157 (void *)((unsigned long)(XFS_BUF_VALUSEMA(bp))),
1158 (void *)bip->bli_item.li_desc,
1159 (void *)((unsigned long)bip->bli_item.li_flags));
1160}
1161#endif /* XFS_BLI_TRACE */
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 5a41c348bb1..217f34af00c 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -70,22 +70,21 @@ typedef struct xfs_buf_log_format_t {
70#define XFS_BLI_INODE_ALLOC_BUF 0x10 70#define XFS_BLI_INODE_ALLOC_BUF 0x10
71#define XFS_BLI_STALE_INODE 0x20 71#define XFS_BLI_STALE_INODE 0x20
72 72
73#define XFS_BLI_FLAGS \
74 { XFS_BLI_HOLD, "HOLD" }, \
75 { XFS_BLI_DIRTY, "DIRTY" }, \
76 { XFS_BLI_STALE, "STALE" }, \
77 { XFS_BLI_LOGGED, "LOGGED" }, \
78 { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \
79 { XFS_BLI_STALE_INODE, "STALE_INODE" }
80
73 81
74#ifdef __KERNEL__ 82#ifdef __KERNEL__
75 83
76struct xfs_buf; 84struct xfs_buf;
77struct ktrace;
78struct xfs_mount; 85struct xfs_mount;
79struct xfs_buf_log_item; 86struct xfs_buf_log_item;
80 87
81#if defined(XFS_BLI_TRACE)
82#define XFS_BLI_TRACE_SIZE 32
83
84void xfs_buf_item_trace(char *, struct xfs_buf_log_item *);
85#else
86#define xfs_buf_item_trace(id, bip)
87#endif
88
89/* 88/*
90 * This is the in core log item structure used to track information 89 * This is the in core log item structure used to track information
91 * needed to log buffers. It tracks how many times the lock has been 90 * needed to log buffers. It tracks how many times the lock has been
@@ -97,9 +96,6 @@ typedef struct xfs_buf_log_item {
97 unsigned int bli_flags; /* misc flags */ 96 unsigned int bli_flags; /* misc flags */
98 unsigned int bli_recur; /* lock recursion count */ 97 unsigned int bli_recur; /* lock recursion count */
99 atomic_t bli_refcount; /* cnt of tp refs */ 98 atomic_t bli_refcount; /* cnt of tp refs */
100#ifdef XFS_BLI_TRACE
101 struct ktrace *bli_trace; /* event trace buf */
102#endif
103#ifdef XFS_TRANS_DEBUG 99#ifdef XFS_TRANS_DEBUG
104 char *bli_orig; /* original buffer copy */ 100 char *bli_orig; /* original buffer copy */
105 char *bli_logged; /* bytes logged (bitmap) */ 101 char *bli_logged; /* bytes logged (bitmap) */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 2847bbc1c53..c0c8869115b 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -46,6 +46,7 @@
46#include "xfs_dir2_block.h" 46#include "xfs_dir2_block.h"
47#include "xfs_dir2_node.h" 47#include "xfs_dir2_node.h"
48#include "xfs_error.h" 48#include "xfs_error.h"
49#include "xfs_trace.h"
49 50
50/* 51/*
51 * xfs_da_btree.c 52 * xfs_da_btree.c
@@ -2107,7 +2108,7 @@ xfs_da_do_buf(
2107 (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), 2108 (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
2108 mp, XFS_ERRTAG_DA_READ_BUF, 2109 mp, XFS_ERRTAG_DA_READ_BUF,
2109 XFS_RANDOM_DA_READ_BUF))) { 2110 XFS_RANDOM_DA_READ_BUF))) {
2110 xfs_buftrace("DA READ ERROR", rbp->bps[0]); 2111 trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
2111 XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", 2112 XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2112 XFS_ERRLEVEL_LOW, mp, info); 2113 XFS_ERRLEVEL_LOW, mp, info);
2113 error = XFS_ERROR(EFSCORRUPTED); 2114 error = XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 8c536167bf7..30cd08f56a3 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -125,6 +125,13 @@ typedef struct xfs_da_args {
125#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ 125#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
126#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ 126#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
127 127
128#define XFS_DA_OP_FLAGS \
129 { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
130 { XFS_DA_OP_RENAME, "RENAME" }, \
131 { XFS_DA_OP_ADDNAME, "ADDNAME" }, \
132 { XFS_DA_OP_OKNOENT, "OKNOENT" }, \
133 { XFS_DA_OP_CILOOKUP, "CILOOKUP" }
134
128/* 135/*
129 * Structure to describe buffer(s) for a block. 136 * Structure to describe buffer(s) for a block.
130 * This is needed in the directory version 2 format case, when 137 * This is needed in the directory version 2 format case, when
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index ab89a7e94a0..84ca1cf16a1 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -43,6 +43,7 @@
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_vnodeops.h" 45#include "xfs_vnodeops.h"
46#include "xfs_trace.h"
46 47
47/* 48/*
48 * Syssgi interface for swapext 49 * Syssgi interface for swapext
@@ -113,10 +114,82 @@ xfs_swapext(
113 return error; 114 return error;
114} 115}
115 116
117/*
118 * We need to check that the format of the data fork in the temporary inode is
119 * valid for the target inode before doing the swap. This is not a problem with
120 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
121 * data fork depending on the space the attribute fork is taking so we can get
122 * invalid formats on the target inode.
123 *
124 * E.g. target has space for 7 extents in extent format, temp inode only has
125 * space for 6. If we defragment down to 7 extents, then the tmp format is a
126 * btree, but when swapped it needs to be in extent format. Hence we can't just
127 * blindly swap data forks on attr2 filesystems.
128 *
129 * Note that we check the swap in both directions so that we don't end up with
130 * a corrupt temporary inode, either.
131 *
132 * Note that fixing the way xfs_fsr sets up the attribute fork in the source
133 * inode will prevent this situation from occurring, so all we do here is
134 * reject and log the attempt. basically we are putting the responsibility on
135 * userspace to get this right.
136 */
137static int
138xfs_swap_extents_check_format(
139 xfs_inode_t *ip, /* target inode */
140 xfs_inode_t *tip) /* tmp inode */
141{
142
143 /* Should never get a local format */
144 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
145 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
146 return EINVAL;
147
148 /*
149 * if the target inode has less extents that then temporary inode then
150 * why did userspace call us?
151 */
152 if (ip->i_d.di_nextents < tip->i_d.di_nextents)
153 return EINVAL;
154
155 /*
156 * if the target inode is in extent form and the temp inode is in btree
157 * form then we will end up with the target inode in the wrong format
158 * as we already know there are less extents in the temp inode.
159 */
160 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
161 tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
162 return EINVAL;
163
164 /* Check temp in extent form to max in target */
165 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
166 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
167 return EINVAL;
168
169 /* Check target in extent form to max in temp */
170 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
171 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
172 return EINVAL;
173
174 /* Check root block of temp in btree form to max in target */
175 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
176 XFS_IFORK_BOFF(ip) &&
177 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
178 return EINVAL;
179
180 /* Check root block of target in btree form to max in temp */
181 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(tip) &&
183 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
184 return EINVAL;
185
186 return 0;
187}
188
116int 189int
117xfs_swap_extents( 190xfs_swap_extents(
118 xfs_inode_t *ip, 191 xfs_inode_t *ip, /* target inode */
119 xfs_inode_t *tip, 192 xfs_inode_t *tip, /* tmp inode */
120 xfs_swapext_t *sxp) 193 xfs_swapext_t *sxp)
121{ 194{
122 xfs_mount_t *mp; 195 xfs_mount_t *mp;
@@ -160,15 +233,7 @@ xfs_swap_extents(
160 goto out_unlock; 233 goto out_unlock;
161 } 234 }
162 235
163 /* Should never get a local format */
164 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
165 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
166 error = XFS_ERROR(EINVAL);
167 goto out_unlock;
168 }
169
170 if (VN_CACHED(VFS_I(tip)) != 0) { 236 if (VN_CACHED(VFS_I(tip)) != 0) {
171 xfs_inval_cached_trace(tip, 0, -1, 0, -1);
172 error = xfs_flushinval_pages(tip, 0, -1, 237 error = xfs_flushinval_pages(tip, 0, -1,
173 FI_REMAPF_LOCKED); 238 FI_REMAPF_LOCKED);
174 if (error) 239 if (error)
@@ -189,13 +254,12 @@ xfs_swap_extents(
189 goto out_unlock; 254 goto out_unlock;
190 } 255 }
191 256
192 /* 257 /* check inode formats now that data is flushed */
193 * If the target has extended attributes, the tmp file 258 error = xfs_swap_extents_check_format(ip, tip);
194 * must also in order to ensure the correct data fork 259 if (error) {
195 * format. 260 xfs_fs_cmn_err(CE_NOTE, mp,
196 */ 261 "%s: inode 0x%llx format is incompatible for exchanging.",
197 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 262 __FILE__, ip->i_ino);
198 error = XFS_ERROR(EINVAL);
199 goto out_unlock; 263 goto out_unlock;
200 } 264 }
201 265
@@ -276,6 +340,16 @@ xfs_swap_extents(
276 *tifp = *tempifp; /* struct copy */ 340 *tifp = *tempifp; /* struct copy */
277 341
278 /* 342 /*
343 * Fix the in-memory data fork values that are dependent on the fork
344 * offset in the inode. We can't assume they remain the same as attr2
345 * has dynamic fork offsets.
346 */
347 ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
348 (uint)sizeof(xfs_bmbt_rec_t);
349 tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
350 (uint)sizeof(xfs_bmbt_rec_t);
351
352 /*
279 * Fix the on-disk inode values 353 * Fix the on-disk inode values
280 */ 354 */
281 tmp = (__uint64_t)ip->i_d.di_nblocks; 355 tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index bb1d58eb398..93634a7e90e 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -40,9 +40,9 @@
40#include "xfs_dir2_leaf.h" 40#include "xfs_dir2_leaf.h"
41#include "xfs_dir2_block.h" 41#include "xfs_dir2_block.h"
42#include "xfs_dir2_node.h" 42#include "xfs_dir2_node.h"
43#include "xfs_dir2_trace.h"
44#include "xfs_error.h" 43#include "xfs_error.h"
45#include "xfs_vnodeops.h" 44#include "xfs_vnodeops.h"
45#include "xfs_trace.h"
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2}; 47struct xfs_name xfs_name_dotdot = {"..", 2};
48 48
@@ -525,7 +525,8 @@ xfs_dir2_grow_inode(
525 xfs_trans_t *tp; 525 xfs_trans_t *tp;
526 xfs_drfsbno_t nblks; 526 xfs_drfsbno_t nblks;
527 527
528 xfs_dir2_trace_args_s("grow_inode", args, space); 528 trace_xfs_dir2_grow_inode(args, space);
529
529 dp = args->dp; 530 dp = args->dp;
530 tp = args->trans; 531 tp = args->trans;
531 mp = dp->i_mount; 532 mp = dp->i_mount;
@@ -703,7 +704,8 @@ xfs_dir2_shrink_inode(
703 xfs_mount_t *mp; 704 xfs_mount_t *mp;
704 xfs_trans_t *tp; 705 xfs_trans_t *tp;
705 706
706 xfs_dir2_trace_args_db("shrink_inode", args, db, bp); 707 trace_xfs_dir2_shrink_inode(args, db);
708
707 dp = args->dp; 709 dp = args->dp;
708 mp = dp->i_mount; 710 mp = dp->i_mount;
709 tp = args->trans; 711 tp = args->trans;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index ab52e9e1c1e..ddc4ecc7807 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -36,8 +36,8 @@
36#include "xfs_dir2_data.h" 36#include "xfs_dir2_data.h"
37#include "xfs_dir2_leaf.h" 37#include "xfs_dir2_leaf.h"
38#include "xfs_dir2_block.h" 38#include "xfs_dir2_block.h"
39#include "xfs_dir2_trace.h"
40#include "xfs_error.h" 39#include "xfs_error.h"
40#include "xfs_trace.h"
41 41
42/* 42/*
43 * Local function prototypes. 43 * Local function prototypes.
@@ -94,7 +94,8 @@ xfs_dir2_block_addname(
94 __be16 *tagp; /* pointer to tag value */ 94 __be16 *tagp; /* pointer to tag value */
95 xfs_trans_t *tp; /* transaction structure */ 95 xfs_trans_t *tp; /* transaction structure */
96 96
97 xfs_dir2_trace_args("block_addname", args); 97 trace_xfs_dir2_block_addname(args);
98
98 dp = args->dp; 99 dp = args->dp;
99 tp = args->trans; 100 tp = args->trans;
100 mp = dp->i_mount; 101 mp = dp->i_mount;
@@ -590,7 +591,8 @@ xfs_dir2_block_lookup(
590 int error; /* error return value */ 591 int error; /* error return value */
591 xfs_mount_t *mp; /* filesystem mount point */ 592 xfs_mount_t *mp; /* filesystem mount point */
592 593
593 xfs_dir2_trace_args("block_lookup", args); 594 trace_xfs_dir2_block_lookup(args);
595
594 /* 596 /*
595 * Get the buffer, look up the entry. 597 * Get the buffer, look up the entry.
596 * If not found (ENOENT) then return, have no buffer. 598 * If not found (ENOENT) then return, have no buffer.
@@ -747,7 +749,8 @@ xfs_dir2_block_removename(
747 int size; /* shortform size */ 749 int size; /* shortform size */
748 xfs_trans_t *tp; /* transaction pointer */ 750 xfs_trans_t *tp; /* transaction pointer */
749 751
750 xfs_dir2_trace_args("block_removename", args); 752 trace_xfs_dir2_block_removename(args);
753
751 /* 754 /*
752 * Look up the entry in the block. Gets the buffer and entry index. 755 * Look up the entry in the block. Gets the buffer and entry index.
753 * It will always be there, the vnodeops level does a lookup first. 756 * It will always be there, the vnodeops level does a lookup first.
@@ -823,7 +826,8 @@ xfs_dir2_block_replace(
823 int error; /* error return value */ 826 int error; /* error return value */
824 xfs_mount_t *mp; /* filesystem mount point */ 827 xfs_mount_t *mp; /* filesystem mount point */
825 828
826 xfs_dir2_trace_args("block_replace", args); 829 trace_xfs_dir2_block_replace(args);
830
827 /* 831 /*
828 * Lookup the entry in the directory. Get buffer and entry index. 832 * Lookup the entry in the directory. Get buffer and entry index.
829 * This will always succeed since the caller has already done a lookup. 833 * This will always succeed since the caller has already done a lookup.
@@ -897,7 +901,8 @@ xfs_dir2_leaf_to_block(
897 int to; /* block/leaf to index */ 901 int to; /* block/leaf to index */
898 xfs_trans_t *tp; /* transaction pointer */ 902 xfs_trans_t *tp; /* transaction pointer */
899 903
900 xfs_dir2_trace_args_bb("leaf_to_block", args, lbp, dbp); 904 trace_xfs_dir2_leaf_to_block(args);
905
901 dp = args->dp; 906 dp = args->dp;
902 tp = args->trans; 907 tp = args->trans;
903 mp = dp->i_mount; 908 mp = dp->i_mount;
@@ -1044,7 +1049,8 @@ xfs_dir2_sf_to_block(
1044 xfs_trans_t *tp; /* transaction pointer */ 1049 xfs_trans_t *tp; /* transaction pointer */
1045 struct xfs_name name; 1050 struct xfs_name name;
1046 1051
1047 xfs_dir2_trace_args("sf_to_block", args); 1052 trace_xfs_dir2_sf_to_block(args);
1053
1048 dp = args->dp; 1054 dp = args->dp;
1049 tp = args->trans; 1055 tp = args->trans;
1050 mp = dp->i_mount; 1056 mp = dp->i_mount;
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 41ad537c49e..29f484c11b3 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -38,8 +38,8 @@
38#include "xfs_dir2_leaf.h" 38#include "xfs_dir2_leaf.h"
39#include "xfs_dir2_block.h" 39#include "xfs_dir2_block.h"
40#include "xfs_dir2_node.h" 40#include "xfs_dir2_node.h"
41#include "xfs_dir2_trace.h"
42#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_trace.h"
43 43
44/* 44/*
45 * Local function declarations. 45 * Local function declarations.
@@ -80,7 +80,8 @@ xfs_dir2_block_to_leaf(
80 int needscan; /* need to rescan bestfree */ 80 int needscan; /* need to rescan bestfree */
81 xfs_trans_t *tp; /* transaction pointer */ 81 xfs_trans_t *tp; /* transaction pointer */
82 82
83 xfs_dir2_trace_args_b("block_to_leaf", args, dbp); 83 trace_xfs_dir2_block_to_leaf(args);
84
84 dp = args->dp; 85 dp = args->dp;
85 mp = dp->i_mount; 86 mp = dp->i_mount;
86 tp = args->trans; 87 tp = args->trans;
@@ -188,7 +189,8 @@ xfs_dir2_leaf_addname(
188 xfs_trans_t *tp; /* transaction pointer */ 189 xfs_trans_t *tp; /* transaction pointer */
189 xfs_dir2_db_t use_block; /* data block number */ 190 xfs_dir2_db_t use_block; /* data block number */
190 191
191 xfs_dir2_trace_args("leaf_addname", args); 192 trace_xfs_dir2_leaf_addname(args);
193
192 dp = args->dp; 194 dp = args->dp;
193 tp = args->trans; 195 tp = args->trans;
194 mp = dp->i_mount; 196 mp = dp->i_mount;
@@ -1266,7 +1268,8 @@ xfs_dir2_leaf_lookup(
1266 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1268 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1267 xfs_trans_t *tp; /* transaction pointer */ 1269 xfs_trans_t *tp; /* transaction pointer */
1268 1270
1269 xfs_dir2_trace_args("leaf_lookup", args); 1271 trace_xfs_dir2_leaf_lookup(args);
1272
1270 /* 1273 /*
1271 * Look up name in the leaf block, returning both buffers and index. 1274 * Look up name in the leaf block, returning both buffers and index.
1272 */ 1275 */
@@ -1454,7 +1457,8 @@ xfs_dir2_leaf_removename(
1454 xfs_dir2_data_off_t oldbest; /* old value of best free */ 1457 xfs_dir2_data_off_t oldbest; /* old value of best free */
1455 xfs_trans_t *tp; /* transaction pointer */ 1458 xfs_trans_t *tp; /* transaction pointer */
1456 1459
1457 xfs_dir2_trace_args("leaf_removename", args); 1460 trace_xfs_dir2_leaf_removename(args);
1461
1458 /* 1462 /*
1459 * Lookup the leaf entry, get the leaf and data blocks read in. 1463 * Lookup the leaf entry, get the leaf and data blocks read in.
1460 */ 1464 */
@@ -1586,7 +1590,8 @@ xfs_dir2_leaf_replace(
1586 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1590 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1587 xfs_trans_t *tp; /* transaction pointer */ 1591 xfs_trans_t *tp; /* transaction pointer */
1588 1592
1589 xfs_dir2_trace_args("leaf_replace", args); 1593 trace_xfs_dir2_leaf_replace(args);
1594
1590 /* 1595 /*
1591 * Look up the entry. 1596 * Look up the entry.
1592 */ 1597 */
@@ -1766,7 +1771,9 @@ xfs_dir2_node_to_leaf(
1766 if (state->path.active > 1) 1771 if (state->path.active > 1)
1767 return 0; 1772 return 0;
1768 args = state->args; 1773 args = state->args;
1769 xfs_dir2_trace_args("node_to_leaf", args); 1774
1775 trace_xfs_dir2_node_to_leaf(args);
1776
1770 mp = state->mp; 1777 mp = state->mp;
1771 dp = args->dp; 1778 dp = args->dp;
1772 tp = args->trans; 1779 tp = args->trans;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5a81ccd1045..ce6e355199b 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -37,8 +37,8 @@
37#include "xfs_dir2_leaf.h" 37#include "xfs_dir2_leaf.h"
38#include "xfs_dir2_block.h" 38#include "xfs_dir2_block.h"
39#include "xfs_dir2_node.h" 39#include "xfs_dir2_node.h"
40#include "xfs_dir2_trace.h"
41#include "xfs_error.h" 40#include "xfs_error.h"
41#include "xfs_trace.h"
42 42
43/* 43/*
44 * Function declarations. 44 * Function declarations.
@@ -123,7 +123,8 @@ xfs_dir2_leaf_to_node(
123 __be16 *to; /* pointer to freespace entry */ 123 __be16 *to; /* pointer to freespace entry */
124 xfs_trans_t *tp; /* transaction pointer */ 124 xfs_trans_t *tp; /* transaction pointer */
125 125
126 xfs_dir2_trace_args_b("leaf_to_node", args, lbp); 126 trace_xfs_dir2_leaf_to_node(args);
127
127 dp = args->dp; 128 dp = args->dp;
128 mp = dp->i_mount; 129 mp = dp->i_mount;
129 tp = args->trans; 130 tp = args->trans;
@@ -196,7 +197,8 @@ xfs_dir2_leafn_add(
196 xfs_mount_t *mp; /* filesystem mount point */ 197 xfs_mount_t *mp; /* filesystem mount point */
197 xfs_trans_t *tp; /* transaction pointer */ 198 xfs_trans_t *tp; /* transaction pointer */
198 199
199 xfs_dir2_trace_args_sb("leafn_add", args, index, bp); 200 trace_xfs_dir2_leafn_add(args, index);
201
200 dp = args->dp; 202 dp = args->dp;
201 mp = dp->i_mount; 203 mp = dp->i_mount;
202 tp = args->trans; 204 tp = args->trans;
@@ -711,8 +713,8 @@ xfs_dir2_leafn_moveents(
711 int stale; /* count stale leaves copied */ 713 int stale; /* count stale leaves copied */
712 xfs_trans_t *tp; /* transaction pointer */ 714 xfs_trans_t *tp; /* transaction pointer */
713 715
714 xfs_dir2_trace_args_bibii("leafn_moveents", args, bp_s, start_s, bp_d, 716 trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count);
715 start_d, count); 717
716 /* 718 /*
717 * Silently return if nothing to do. 719 * Silently return if nothing to do.
718 */ 720 */
@@ -933,7 +935,8 @@ xfs_dir2_leafn_remove(
933 int needscan; /* need to rescan data frees */ 935 int needscan; /* need to rescan data frees */
934 xfs_trans_t *tp; /* transaction pointer */ 936 xfs_trans_t *tp; /* transaction pointer */
935 937
936 xfs_dir2_trace_args_sb("leafn_remove", args, index, bp); 938 trace_xfs_dir2_leafn_remove(args, index);
939
937 dp = args->dp; 940 dp = args->dp;
938 tp = args->trans; 941 tp = args->trans;
939 mp = dp->i_mount; 942 mp = dp->i_mount;
@@ -1363,7 +1366,8 @@ xfs_dir2_node_addname(
1363 int rval; /* sub-return value */ 1366 int rval; /* sub-return value */
1364 xfs_da_state_t *state; /* btree cursor */ 1367 xfs_da_state_t *state; /* btree cursor */
1365 1368
1366 xfs_dir2_trace_args("node_addname", args); 1369 trace_xfs_dir2_node_addname(args);
1370
1367 /* 1371 /*
1368 * Allocate and initialize the state (btree cursor). 1372 * Allocate and initialize the state (btree cursor).
1369 */ 1373 */
@@ -1822,7 +1826,8 @@ xfs_dir2_node_lookup(
1822 int rval; /* operation return value */ 1826 int rval; /* operation return value */
1823 xfs_da_state_t *state; /* btree cursor */ 1827 xfs_da_state_t *state; /* btree cursor */
1824 1828
1825 xfs_dir2_trace_args("node_lookup", args); 1829 trace_xfs_dir2_node_lookup(args);
1830
1826 /* 1831 /*
1827 * Allocate and initialize the btree cursor. 1832 * Allocate and initialize the btree cursor.
1828 */ 1833 */
@@ -1875,7 +1880,8 @@ xfs_dir2_node_removename(
1875 int rval; /* operation return value */ 1880 int rval; /* operation return value */
1876 xfs_da_state_t *state; /* btree cursor */ 1881 xfs_da_state_t *state; /* btree cursor */
1877 1882
1878 xfs_dir2_trace_args("node_removename", args); 1883 trace_xfs_dir2_node_removename(args);
1884
1879 /* 1885 /*
1880 * Allocate and initialize the btree cursor. 1886 * Allocate and initialize the btree cursor.
1881 */ 1887 */
@@ -1944,7 +1950,8 @@ xfs_dir2_node_replace(
1944 int rval; /* internal return value */ 1950 int rval; /* internal return value */
1945 xfs_da_state_t *state; /* btree cursor */ 1951 xfs_da_state_t *state; /* btree cursor */
1946 1952
1947 xfs_dir2_trace_args("node_replace", args); 1953 trace_xfs_dir2_node_replace(args);
1954
1948 /* 1955 /*
1949 * Allocate and initialize the btree cursor. 1956 * Allocate and initialize the btree cursor.
1950 */ 1957 */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index e89734e8464..9d4f17a6967 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -37,7 +37,7 @@
37#include "xfs_dir2_data.h" 37#include "xfs_dir2_data.h"
38#include "xfs_dir2_leaf.h" 38#include "xfs_dir2_leaf.h"
39#include "xfs_dir2_block.h" 39#include "xfs_dir2_block.h"
40#include "xfs_dir2_trace.h" 40#include "xfs_trace.h"
41 41
42/* 42/*
43 * Prototypes for internal functions. 43 * Prototypes for internal functions.
@@ -169,7 +169,8 @@ xfs_dir2_block_to_sf(
169 xfs_dir2_sf_t *sfp; /* shortform structure */ 169 xfs_dir2_sf_t *sfp; /* shortform structure */
170 xfs_ino_t temp; 170 xfs_ino_t temp;
171 171
172 xfs_dir2_trace_args_sb("block_to_sf", args, size, bp); 172 trace_xfs_dir2_block_to_sf(args);
173
173 dp = args->dp; 174 dp = args->dp;
174 mp = dp->i_mount; 175 mp = dp->i_mount;
175 176
@@ -281,7 +282,8 @@ xfs_dir2_sf_addname(
281 xfs_dir2_sf_t *sfp; /* shortform structure */ 282 xfs_dir2_sf_t *sfp; /* shortform structure */
282 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ 283 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */
283 284
284 xfs_dir2_trace_args("sf_addname", args); 285 trace_xfs_dir2_sf_addname(args);
286
285 ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); 287 ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
286 dp = args->dp; 288 dp = args->dp;
287 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 289 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
@@ -654,7 +656,8 @@ xfs_dir2_sf_create(
654 xfs_dir2_sf_t *sfp; /* shortform structure */ 656 xfs_dir2_sf_t *sfp; /* shortform structure */
655 int size; /* directory size */ 657 int size; /* directory size */
656 658
657 xfs_dir2_trace_args_i("sf_create", args, pino); 659 trace_xfs_dir2_sf_create(args);
660
658 dp = args->dp; 661 dp = args->dp;
659 662
660 ASSERT(dp != NULL); 663 ASSERT(dp != NULL);
@@ -808,7 +811,8 @@ xfs_dir2_sf_lookup(
808 enum xfs_dacmp cmp; /* comparison result */ 811 enum xfs_dacmp cmp; /* comparison result */
809 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ 812 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
810 813
811 xfs_dir2_trace_args("sf_lookup", args); 814 trace_xfs_dir2_sf_lookup(args);
815
812 xfs_dir2_sf_check(args); 816 xfs_dir2_sf_check(args);
813 dp = args->dp; 817 dp = args->dp;
814 818
@@ -891,7 +895,8 @@ xfs_dir2_sf_removename(
891 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 895 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
892 xfs_dir2_sf_t *sfp; /* shortform structure */ 896 xfs_dir2_sf_t *sfp; /* shortform structure */
893 897
894 xfs_dir2_trace_args("sf_removename", args); 898 trace_xfs_dir2_sf_removename(args);
899
895 dp = args->dp; 900 dp = args->dp;
896 901
897 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 902 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
@@ -982,7 +987,8 @@ xfs_dir2_sf_replace(
982 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 987 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
983 xfs_dir2_sf_t *sfp; /* shortform structure */ 988 xfs_dir2_sf_t *sfp; /* shortform structure */
984 989
985 xfs_dir2_trace_args("sf_replace", args); 990 trace_xfs_dir2_sf_replace(args);
991
986 dp = args->dp; 992 dp = args->dp;
987 993
988 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 994 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
@@ -1125,7 +1131,8 @@ xfs_dir2_sf_toino4(
1125 xfs_dir2_sf_entry_t *sfep; /* new sf entry */ 1131 xfs_dir2_sf_entry_t *sfep; /* new sf entry */
1126 xfs_dir2_sf_t *sfp; /* new sf directory */ 1132 xfs_dir2_sf_t *sfp; /* new sf directory */
1127 1133
1128 xfs_dir2_trace_args("sf_toino4", args); 1134 trace_xfs_dir2_sf_toino4(args);
1135
1129 dp = args->dp; 1136 dp = args->dp;
1130 1137
1131 /* 1138 /*
@@ -1202,7 +1209,8 @@ xfs_dir2_sf_toino8(
1202 xfs_dir2_sf_entry_t *sfep; /* new sf entry */ 1209 xfs_dir2_sf_entry_t *sfep; /* new sf entry */
1203 xfs_dir2_sf_t *sfp; /* new sf directory */ 1210 xfs_dir2_sf_t *sfp; /* new sf directory */
1204 1211
1205 xfs_dir2_trace_args("sf_toino8", args); 1212 trace_xfs_dir2_sf_toino8(args);
1213
1206 dp = args->dp; 1214 dp = args->dp;
1207 1215
1208 /* 1216 /*
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
deleted file mode 100644
index 6cc7c0c681a..00000000000
--- a/fs/xfs/xfs_dir2_trace.c
+++ /dev/null
@@ -1,216 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_inum.h"
22#include "xfs_dir2.h"
23#include "xfs_da_btree.h"
24#include "xfs_bmap_btree.h"
25#include "xfs_dir2_sf.h"
26#include "xfs_attr_sf.h"
27#include "xfs_dinode.h"
28#include "xfs_inode.h"
29#include "xfs_dir2_trace.h"
30
31#ifdef XFS_DIR2_TRACE
32ktrace_t *xfs_dir2_trace_buf;
33
34/*
35 * Enter something in the trace buffers.
36 */
37static void
38xfs_dir2_trace_enter(
39 xfs_inode_t *dp,
40 int type,
41 char *where,
42 char *name,
43 int namelen,
44 void *a0,
45 void *a1,
46 void *a2,
47 void *a3,
48 void *a4,
49 void *a5,
50 void *a6,
51 void *a7)
52{
53 void *n[5];
54
55 ASSERT(xfs_dir2_trace_buf);
56 ASSERT(dp->i_dir_trace);
57 if (name)
58 memcpy(n, name, min((int)sizeof(n), namelen));
59 else
60 memset((char *)n, 0, sizeof(n));
61 ktrace_enter(xfs_dir2_trace_buf,
62 (void *)(long)type, (void *)where,
63 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
64 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
65 (void *)(long)namelen,
66 (void *)n[0], (void *)n[1], (void *)n[2],
67 (void *)n[3], (void *)n[4]);
68 ktrace_enter(dp->i_dir_trace,
69 (void *)(long)type, (void *)where,
70 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
71 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
72 (void *)(long)namelen,
73 (void *)n[0], (void *)n[1], (void *)n[2],
74 (void *)n[3], (void *)n[4]);
75}
76
77void
78xfs_dir2_trace_args(
79 char *where,
80 xfs_da_args_t *args)
81{
82 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS, where,
83 (char *)args->name, (int)args->namelen,
84 (void *)(unsigned long)args->hashval,
85 (void *)((unsigned long)(args->inumber >> 32)),
86 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
87 (void *)args->dp, (void *)args->trans,
88 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
89 NULL, NULL);
90}
91
92void
93xfs_dir2_trace_args_b(
94 char *where,
95 xfs_da_args_t *args,
96 xfs_dabuf_t *bp)
97{
98 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_B, where,
99 (char *)args->name, (int)args->namelen,
100 (void *)(unsigned long)args->hashval,
101 (void *)((unsigned long)(args->inumber >> 32)),
102 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
103 (void *)args->dp, (void *)args->trans,
104 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
105 (void *)(bp ? bp->bps[0] : NULL), NULL);
106}
107
108void
109xfs_dir2_trace_args_bb(
110 char *where,
111 xfs_da_args_t *args,
112 xfs_dabuf_t *lbp,
113 xfs_dabuf_t *dbp)
114{
115 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_BB, where,
116 (char *)args->name, (int)args->namelen,
117 (void *)(unsigned long)args->hashval,
118 (void *)((unsigned long)(args->inumber >> 32)),
119 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
120 (void *)args->dp, (void *)args->trans,
121 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
122 (void *)(lbp ? lbp->bps[0] : NULL),
123 (void *)(dbp ? dbp->bps[0] : NULL));
124}
125
126void
127xfs_dir2_trace_args_bibii(
128 char *where,
129 xfs_da_args_t *args,
130 xfs_dabuf_t *bs,
131 int ss,
132 xfs_dabuf_t *bd,
133 int sd,
134 int c)
135{
136 xfs_buf_t *bpbs = bs ? bs->bps[0] : NULL;
137 xfs_buf_t *bpbd = bd ? bd->bps[0] : NULL;
138
139 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_BIBII, where,
140 (char *)args->name, (int)args->namelen,
141 (void *)args->dp, (void *)args->trans,
142 (void *)bpbs, (void *)(long)ss, (void *)bpbd, (void *)(long)sd,
143 (void *)(long)c, NULL);
144}
145
146void
147xfs_dir2_trace_args_db(
148 char *where,
149 xfs_da_args_t *args,
150 xfs_dir2_db_t db,
151 xfs_dabuf_t *bp)
152{
153 xfs_buf_t *dbp = bp ? bp->bps[0] : NULL;
154
155 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_DB, where,
156 (char *)args->name, (int)args->namelen,
157 (void *)(unsigned long)args->hashval,
158 (void *)((unsigned long)(args->inumber >> 32)),
159 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
160 (void *)args->dp, (void *)args->trans,
161 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
162 (void *)(long)db, (void *)dbp);
163}
164
165void
166xfs_dir2_trace_args_i(
167 char *where,
168 xfs_da_args_t *args,
169 xfs_ino_t i)
170{
171 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_I, where,
172 (char *)args->name, (int)args->namelen,
173 (void *)(unsigned long)args->hashval,
174 (void *)((unsigned long)(args->inumber >> 32)),
175 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
176 (void *)args->dp, (void *)args->trans,
177 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
178 (void *)((unsigned long)(i >> 32)),
179 (void *)((unsigned long)(i & 0xFFFFFFFF)));
180}
181
182void
183xfs_dir2_trace_args_s(
184 char *where,
185 xfs_da_args_t *args,
186 int s)
187{
188 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_S, where,
189 (char *)args->name, (int)args->namelen,
190 (void *)(unsigned long)args->hashval,
191 (void *)((unsigned long)(args->inumber >> 32)),
192 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
193 (void *)args->dp, (void *)args->trans,
194 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
195 (void *)(long)s, NULL);
196}
197
198void
199xfs_dir2_trace_args_sb(
200 char *where,
201 xfs_da_args_t *args,
202 int s,
203 xfs_dabuf_t *bp)
204{
205 xfs_buf_t *dbp = bp ? bp->bps[0] : NULL;
206
207 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_SB, where,
208 (char *)args->name, (int)args->namelen,
209 (void *)(unsigned long)args->hashval,
210 (void *)((unsigned long)(args->inumber >> 32)),
211 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
212 (void *)args->dp, (void *)args->trans,
213 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
214 (void *)(long)s, (void *)dbp);
215}
216#endif /* XFS_DIR2_TRACE */
diff --git a/fs/xfs/xfs_dir2_trace.h b/fs/xfs/xfs_dir2_trace.h
deleted file mode 100644
index ca3c754f482..00000000000
--- a/fs/xfs/xfs_dir2_trace.h
+++ /dev/null
@@ -1,72 +0,0 @@
1/*
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_TRACE_H__
19#define __XFS_DIR2_TRACE_H__
20
21/*
22 * Tracing for xfs v2 directories.
23 */
24
25#if defined(XFS_DIR2_TRACE)
26
27struct ktrace;
28struct xfs_dabuf;
29struct xfs_da_args;
30
31#define XFS_DIR2_GTRACE_SIZE 4096 /* global buffer */
32#define XFS_DIR2_KTRACE_SIZE 32 /* per-inode buffer */
33extern struct ktrace *xfs_dir2_trace_buf;
34
35#define XFS_DIR2_KTRACE_ARGS 1 /* args only */
36#define XFS_DIR2_KTRACE_ARGS_B 2 /* args + buffer */
37#define XFS_DIR2_KTRACE_ARGS_BB 3 /* args + 2 buffers */
38#define XFS_DIR2_KTRACE_ARGS_DB 4 /* args, db, buffer */
39#define XFS_DIR2_KTRACE_ARGS_I 5 /* args, inum */
40#define XFS_DIR2_KTRACE_ARGS_S 6 /* args, int */
41#define XFS_DIR2_KTRACE_ARGS_SB 7 /* args, int, buffer */
42#define XFS_DIR2_KTRACE_ARGS_BIBII 8 /* args, buf/int/buf/int/int */
43
44void xfs_dir2_trace_args(char *where, struct xfs_da_args *args);
45void xfs_dir2_trace_args_b(char *where, struct xfs_da_args *args,
46 struct xfs_dabuf *bp);
47void xfs_dir2_trace_args_bb(char *where, struct xfs_da_args *args,
48 struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
49void xfs_dir2_trace_args_bibii(char *where, struct xfs_da_args *args,
50 struct xfs_dabuf *bs, int ss,
51 struct xfs_dabuf *bd, int sd, int c);
52void xfs_dir2_trace_args_db(char *where, struct xfs_da_args *args,
53 xfs_dir2_db_t db, struct xfs_dabuf *bp);
54void xfs_dir2_trace_args_i(char *where, struct xfs_da_args *args, xfs_ino_t i);
55void xfs_dir2_trace_args_s(char *where, struct xfs_da_args *args, int s);
56void xfs_dir2_trace_args_sb(char *where, struct xfs_da_args *args, int s,
57 struct xfs_dabuf *bp);
58
59#else /* XFS_DIR2_TRACE */
60
61#define xfs_dir2_trace_args(where, args)
62#define xfs_dir2_trace_args_b(where, args, bp)
63#define xfs_dir2_trace_args_bb(where, args, lbp, dbp)
64#define xfs_dir2_trace_args_bibii(where, args, bs, ss, bd, sd, c)
65#define xfs_dir2_trace_args_db(where, args, db, bp)
66#define xfs_dir2_trace_args_i(where, args, i)
67#define xfs_dir2_trace_args_s(where, args, s)
68#define xfs_dir2_trace_args_sb(where, args, s, bp)
69
70#endif /* XFS_DIR2_TRACE */
71
72#endif /* __XFS_DIR2_TRACE_H__ */
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index edf8bdf4141..a631e1451ab 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -34,6 +34,7 @@
34#include "xfs_utils.h" 34#include "xfs_utils.h"
35#include "xfs_mru_cache.h" 35#include "xfs_mru_cache.h"
36#include "xfs_filestream.h" 36#include "xfs_filestream.h"
37#include "xfs_trace.h"
37 38
38#ifdef XFS_FILESTREAMS_TRACE 39#ifdef XFS_FILESTREAMS_TRACE
39 40
@@ -394,9 +395,7 @@ xfs_filestream_init(void)
394 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); 395 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
395 if (!item_zone) 396 if (!item_zone)
396 return -ENOMEM; 397 return -ENOMEM;
397#ifdef XFS_FILESTREAMS_TRACE 398
398 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS);
399#endif
400 return 0; 399 return 0;
401} 400}
402 401
@@ -407,9 +406,6 @@ xfs_filestream_init(void)
407void 406void
408xfs_filestream_uninit(void) 407xfs_filestream_uninit(void)
409{ 408{
410#ifdef XFS_FILESTREAMS_TRACE
411 ktrace_free(xfs_filestreams_trace_buf);
412#endif
413 kmem_zone_destroy(item_zone); 409 kmem_zone_destroy(item_zone);
414} 410}
415 411
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index f655f7dc334..4aba67c5f64 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -79,7 +79,7 @@ extern ktrace_t *xfs_filestreams_trace_buf;
79 * the cache that reference per-ag array elements that have since been 79 * the cache that reference per-ag array elements that have since been
80 * reallocated. 80 * reallocated.
81 */ 81 */
82STATIC_INLINE int 82static inline int
83xfs_filestream_peek_ag( 83xfs_filestream_peek_ag(
84 xfs_mount_t *mp, 84 xfs_mount_t *mp,
85 xfs_agnumber_t agno) 85 xfs_agnumber_t agno)
@@ -87,7 +87,7 @@ xfs_filestream_peek_ag(
87 return atomic_read(&mp->m_perag[agno].pagf_fstrms); 87 return atomic_read(&mp->m_perag[agno].pagf_fstrms);
88} 88}
89 89
90STATIC_INLINE int 90static inline int
91xfs_filestream_get_ag( 91xfs_filestream_get_ag(
92 xfs_mount_t *mp, 92 xfs_mount_t *mp,
93 xfs_agnumber_t agno) 93 xfs_agnumber_t agno)
@@ -95,7 +95,7 @@ xfs_filestream_get_ag(
95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms); 95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
96} 96}
97 97
98STATIC_INLINE int 98static inline int
99xfs_filestream_put_ag( 99xfs_filestream_put_ag(
100 xfs_mount_t *mp, 100 xfs_mount_t *mp,
101 xfs_agnumber_t agno) 101 xfs_agnumber_t agno)
@@ -122,7 +122,7 @@ int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
122 122
123 123
124/* filestreams for the inode? */ 124/* filestreams for the inode? */
125STATIC_INLINE int 125static inline int
126xfs_inode_is_filestream( 126xfs_inode_is_filestream(
127 struct xfs_inode *ip) 127 struct xfs_inode *ip)
128{ 128{
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 2d0b3e1da9e..a13919a6a36 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -45,6 +45,7 @@
45#include "xfs_rtalloc.h" 45#include "xfs_rtalloc.h"
46#include "xfs_rw.h" 46#include "xfs_rw.h"
47#include "xfs_filestream.h" 47#include "xfs_filestream.h"
48#include "xfs_trace.h"
48 49
49/* 50/*
50 * File system operations 51 * File system operations
@@ -201,8 +202,8 @@ xfs_growfs_data_private(
201 * AG freelist header block 202 * AG freelist header block
202 */ 203 */
203 bp = xfs_buf_get(mp->m_ddev_targp, 204 bp = xfs_buf_get(mp->m_ddev_targp,
204 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 205 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
205 XFS_FSS_TO_BB(mp, 1), 0); 206 XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
206 agf = XFS_BUF_TO_AGF(bp); 207 agf = XFS_BUF_TO_AGF(bp);
207 memset(agf, 0, mp->m_sb.sb_sectsize); 208 memset(agf, 0, mp->m_sb.sb_sectsize);
208 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); 209 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
@@ -233,8 +234,8 @@ xfs_growfs_data_private(
233 * AG inode header block 234 * AG inode header block
234 */ 235 */
235 bp = xfs_buf_get(mp->m_ddev_targp, 236 bp = xfs_buf_get(mp->m_ddev_targp,
236 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 237 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
237 XFS_FSS_TO_BB(mp, 1), 0); 238 XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
238 agi = XFS_BUF_TO_AGI(bp); 239 agi = XFS_BUF_TO_AGI(bp);
239 memset(agi, 0, mp->m_sb.sb_sectsize); 240 memset(agi, 0, mp->m_sb.sb_sectsize);
240 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); 241 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
@@ -257,8 +258,9 @@ xfs_growfs_data_private(
257 * BNO btree root block 258 * BNO btree root block
258 */ 259 */
259 bp = xfs_buf_get(mp->m_ddev_targp, 260 bp = xfs_buf_get(mp->m_ddev_targp,
260 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), 261 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
261 BTOBB(mp->m_sb.sb_blocksize), 0); 262 BTOBB(mp->m_sb.sb_blocksize),
263 XBF_LOCK | XBF_MAPPED);
262 block = XFS_BUF_TO_BLOCK(bp); 264 block = XFS_BUF_TO_BLOCK(bp);
263 memset(block, 0, mp->m_sb.sb_blocksize); 265 memset(block, 0, mp->m_sb.sb_blocksize);
264 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); 266 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
@@ -278,8 +280,9 @@ xfs_growfs_data_private(
278 * CNT btree root block 280 * CNT btree root block
279 */ 281 */
280 bp = xfs_buf_get(mp->m_ddev_targp, 282 bp = xfs_buf_get(mp->m_ddev_targp,
281 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), 283 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
282 BTOBB(mp->m_sb.sb_blocksize), 0); 284 BTOBB(mp->m_sb.sb_blocksize),
285 XBF_LOCK | XBF_MAPPED);
283 block = XFS_BUF_TO_BLOCK(bp); 286 block = XFS_BUF_TO_BLOCK(bp);
284 memset(block, 0, mp->m_sb.sb_blocksize); 287 memset(block, 0, mp->m_sb.sb_blocksize);
285 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); 288 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
@@ -300,8 +303,9 @@ xfs_growfs_data_private(
300 * INO btree root block 303 * INO btree root block
301 */ 304 */
302 bp = xfs_buf_get(mp->m_ddev_targp, 305 bp = xfs_buf_get(mp->m_ddev_targp,
303 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), 306 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
304 BTOBB(mp->m_sb.sb_blocksize), 0); 307 BTOBB(mp->m_sb.sb_blocksize),
308 XBF_LOCK | XBF_MAPPED);
305 block = XFS_BUF_TO_BLOCK(bp); 309 block = XFS_BUF_TO_BLOCK(bp);
306 memset(block, 0, mp->m_sb.sb_blocksize); 310 memset(block, 0, mp->m_sb.sb_blocksize);
307 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); 311 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
@@ -344,6 +348,7 @@ xfs_growfs_data_private(
344 be32_add_cpu(&agf->agf_length, new); 348 be32_add_cpu(&agf->agf_length, new);
345 ASSERT(be32_to_cpu(agf->agf_length) == 349 ASSERT(be32_to_cpu(agf->agf_length) ==
346 be32_to_cpu(agi->agi_length)); 350 be32_to_cpu(agi->agi_length));
351
347 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); 352 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
348 /* 353 /*
349 * Free the new space. 354 * Free the new space.
@@ -611,7 +616,7 @@ xfs_fs_log_dummy(
611 xfs_inode_t *ip; 616 xfs_inode_t *ip;
612 int error; 617 int error;
613 618
614 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); 619 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
615 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); 620 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
616 if (error) { 621 if (error) {
617 xfs_trans_cancel(tp, 0); 622 xfs_trans_cancel(tp, 0);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 0785797db82..cb907ba69c4 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -425,7 +425,7 @@ xfs_ialloc_ag_alloc(
425 return 0; 425 return 0;
426} 426}
427 427
428STATIC_INLINE xfs_agnumber_t 428STATIC xfs_agnumber_t
429xfs_ialloc_next_ag( 429xfs_ialloc_next_ag(
430 xfs_mount_t *mp) 430 xfs_mount_t *mp)
431{ 431{
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 80e526489be..155e798f30a 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -43,7 +43,7 @@
43#include "xfs_inode_item.h" 43#include "xfs_inode_item.h"
44#include "xfs_bmap.h" 44#include "xfs_bmap.h"
45#include "xfs_btree_trace.h" 45#include "xfs_btree_trace.h"
46#include "xfs_dir2_trace.h" 46#include "xfs_trace.h"
47 47
48 48
49/* 49/*
@@ -74,6 +74,8 @@ xfs_inode_alloc(
74 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 74 ASSERT(!spin_is_locked(&ip->i_flags_lock));
75 ASSERT(completion_done(&ip->i_flush)); 75 ASSERT(completion_done(&ip->i_flush));
76 76
77 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
78
77 /* initialise the xfs inode */ 79 /* initialise the xfs inode */
78 ip->i_ino = ino; 80 ip->i_ino = ino;
79 ip->i_mount = mp; 81 ip->i_mount = mp;
@@ -87,30 +89,8 @@ xfs_inode_alloc(
87 ip->i_size = 0; 89 ip->i_size = 0;
88 ip->i_new_size = 0; 90 ip->i_new_size = 0;
89 91
90 /*
91 * Initialize inode's trace buffers.
92 */
93#ifdef XFS_INODE_TRACE
94 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
95#endif
96#ifdef XFS_BMAP_TRACE
97 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
98#endif
99#ifdef XFS_BTREE_TRACE
100 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
101#endif
102#ifdef XFS_RW_TRACE
103 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
104#endif
105#ifdef XFS_ILOCK_TRACE
106 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
107#endif
108#ifdef XFS_DIR2_TRACE
109 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
110#endif
111
112 /* prevent anyone from using this yet */ 92 /* prevent anyone from using this yet */
113 VFS_I(ip)->i_state = I_NEW|I_LOCK; 93 VFS_I(ip)->i_state = I_NEW;
114 94
115 return ip; 95 return ip;
116} 96}
@@ -130,25 +110,6 @@ xfs_inode_free(
130 if (ip->i_afp) 110 if (ip->i_afp)
131 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 111 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
132 112
133#ifdef XFS_INODE_TRACE
134 ktrace_free(ip->i_trace);
135#endif
136#ifdef XFS_BMAP_TRACE
137 ktrace_free(ip->i_xtrace);
138#endif
139#ifdef XFS_BTREE_TRACE
140 ktrace_free(ip->i_btrace);
141#endif
142#ifdef XFS_RW_TRACE
143 ktrace_free(ip->i_rwtrace);
144#endif
145#ifdef XFS_ILOCK_TRACE
146 ktrace_free(ip->i_lock_trace);
147#endif
148#ifdef XFS_DIR2_TRACE
149 ktrace_free(ip->i_dir_trace);
150#endif
151
152 if (ip->i_itemp) { 113 if (ip->i_itemp) {
153 /* 114 /*
154 * Only if we are shutting down the fs will we see an 115 * Only if we are shutting down the fs will we see an
@@ -207,6 +168,7 @@ xfs_iget_cache_hit(
207 * instead of polling for it. 168 * instead of polling for it.
208 */ 169 */
209 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 170 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
171 trace_xfs_iget_skip(ip);
210 XFS_STATS_INC(xs_ig_frecycle); 172 XFS_STATS_INC(xs_ig_frecycle);
211 error = EAGAIN; 173 error = EAGAIN;
212 goto out_error; 174 goto out_error;
@@ -225,7 +187,7 @@ xfs_iget_cache_hit(
225 * Need to carefully get it back into useable state. 187 * Need to carefully get it back into useable state.
226 */ 188 */
227 if (ip->i_flags & XFS_IRECLAIMABLE) { 189 if (ip->i_flags & XFS_IRECLAIMABLE) {
228 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 190 trace_xfs_iget_reclaim(ip);
229 191
230 /* 192 /*
231 * We need to set XFS_INEW atomically with clearing the 193 * We need to set XFS_INEW atomically with clearing the
@@ -251,9 +213,10 @@ xfs_iget_cache_hit(
251 ip->i_flags &= ~XFS_INEW; 213 ip->i_flags &= ~XFS_INEW;
252 ip->i_flags |= XFS_IRECLAIMABLE; 214 ip->i_flags |= XFS_IRECLAIMABLE;
253 __xfs_inode_set_reclaim_tag(pag, ip); 215 __xfs_inode_set_reclaim_tag(pag, ip);
216 trace_xfs_iget_reclaim(ip);
254 goto out_error; 217 goto out_error;
255 } 218 }
256 inode->i_state = I_LOCK|I_NEW; 219 inode->i_state = I_NEW;
257 } else { 220 } else {
258 /* If the VFS inode is being torn down, pause and try again. */ 221 /* If the VFS inode is being torn down, pause and try again. */
259 if (!igrab(inode)) { 222 if (!igrab(inode)) {
@@ -270,8 +233,9 @@ xfs_iget_cache_hit(
270 xfs_ilock(ip, lock_flags); 233 xfs_ilock(ip, lock_flags);
271 234
272 xfs_iflags_clear(ip, XFS_ISTALE); 235 xfs_iflags_clear(ip, XFS_ISTALE);
273 xfs_itrace_exit_tag(ip, "xfs_iget.found");
274 XFS_STATS_INC(xs_ig_found); 236 XFS_STATS_INC(xs_ig_found);
237
238 trace_xfs_iget_found(ip);
275 return 0; 239 return 0;
276 240
277out_error: 241out_error:
@@ -290,7 +254,7 @@ xfs_iget_cache_miss(
290 struct xfs_inode **ipp, 254 struct xfs_inode **ipp,
291 xfs_daddr_t bno, 255 xfs_daddr_t bno,
292 int flags, 256 int flags,
293 int lock_flags) __releases(pag->pag_ici_lock) 257 int lock_flags)
294{ 258{
295 struct xfs_inode *ip; 259 struct xfs_inode *ip;
296 int error; 260 int error;
@@ -305,7 +269,7 @@ xfs_iget_cache_miss(
305 if (error) 269 if (error)
306 goto out_destroy; 270 goto out_destroy;
307 271
308 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 272 xfs_itrace_entry(ip);
309 273
310 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 274 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
311 error = ENOENT; 275 error = ENOENT;
@@ -350,6 +314,8 @@ xfs_iget_cache_miss(
350 314
351 write_unlock(&pag->pag_ici_lock); 315 write_unlock(&pag->pag_ici_lock);
352 radix_tree_preload_end(); 316 radix_tree_preload_end();
317
318 trace_xfs_iget_alloc(ip);
353 *ipp = ip; 319 *ipp = ip;
354 return 0; 320 return 0;
355 321
@@ -511,17 +477,21 @@ xfs_ireclaim(
511{ 477{
512 struct xfs_mount *mp = ip->i_mount; 478 struct xfs_mount *mp = ip->i_mount;
513 struct xfs_perag *pag; 479 struct xfs_perag *pag;
480 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
514 481
515 XFS_STATS_INC(xs_ig_reclaims); 482 XFS_STATS_INC(xs_ig_reclaims);
516 483
517 /* 484 /*
518 * Remove the inode from the per-AG radix tree. It doesn't matter 485 * Remove the inode from the per-AG radix tree.
519 * if it was never added to it because radix_tree_delete can deal 486 *
520 * with that case just fine. 487 * Because radix_tree_delete won't complain even if the item was never
488 * added to the tree assert that it's been there before to catch
489 * problems with the inode life time early on.
521 */ 490 */
522 pag = xfs_get_perag(mp, ip->i_ino); 491 pag = xfs_get_perag(mp, ip->i_ino);
523 write_lock(&pag->pag_ici_lock); 492 write_lock(&pag->pag_ici_lock);
524 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 493 if (!radix_tree_delete(&pag->pag_ici_root, agino))
494 ASSERT(0);
525 write_unlock(&pag->pag_ici_lock); 495 write_unlock(&pag->pag_ici_lock);
526 xfs_put_perag(mp, pag); 496 xfs_put_perag(mp, pag);
527 497
@@ -636,7 +606,7 @@ xfs_ilock(
636 else if (lock_flags & XFS_ILOCK_SHARED) 606 else if (lock_flags & XFS_ILOCK_SHARED)
637 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 607 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
638 608
639 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); 609 trace_xfs_ilock(ip, lock_flags, _RET_IP_);
640} 610}
641 611
642/* 612/*
@@ -681,7 +651,7 @@ xfs_ilock_nowait(
681 if (!mrtryaccess(&ip->i_lock)) 651 if (!mrtryaccess(&ip->i_lock))
682 goto out_undo_iolock; 652 goto out_undo_iolock;
683 } 653 }
684 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); 654 trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
685 return 1; 655 return 1;
686 656
687 out_undo_iolock: 657 out_undo_iolock:
@@ -743,7 +713,7 @@ xfs_iunlock(
743 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, 713 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
744 (xfs_log_item_t*)(ip->i_itemp)); 714 (xfs_log_item_t*)(ip->i_itemp));
745 } 715 }
746 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 716 trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
747} 717}
748 718
749/* 719/*
@@ -762,6 +732,8 @@ xfs_ilock_demote(
762 mrdemote(&ip->i_lock); 732 mrdemote(&ip->i_lock);
763 if (lock_flags & XFS_IOLOCK_EXCL) 733 if (lock_flags & XFS_IOLOCK_EXCL)
764 mrdemote(&ip->i_iolock); 734 mrdemote(&ip->i_iolock);
735
736 trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
765} 737}
766 738
767#ifdef DEBUG 739#ifdef DEBUG
@@ -792,52 +764,3 @@ xfs_isilocked(
792 return 1; 764 return 1;
793} 765}
794#endif 766#endif
795
796#ifdef XFS_INODE_TRACE
797
798#define KTRACE_ENTER(ip, vk, s, line, ra) \
799 ktrace_enter((ip)->i_trace, \
800/* 0 */ (void *)(__psint_t)(vk), \
801/* 1 */ (void *)(s), \
802/* 2 */ (void *)(__psint_t) line, \
803/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
804/* 4 */ (void *)(ra), \
805/* 5 */ NULL, \
806/* 6 */ (void *)(__psint_t)current_cpu(), \
807/* 7 */ (void *)(__psint_t)current_pid(), \
808/* 8 */ (void *)__return_address, \
809/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
810
811/*
812 * Vnode tracing code.
813 */
814void
815_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
816{
817 KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
818}
819
820void
821_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
822{
823 KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
824}
825
826void
827xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
828{
829 KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
830}
831
832void
833_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
834{
835 KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
836}
837
838void
839xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
840{
841 KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
842}
843#endif /* XFS_INODE_TRACE */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b92a4fa2a0a..ef77fd88c8e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -47,10 +47,10 @@
47#include "xfs_rw.h" 47#include "xfs_rw.h"
48#include "xfs_error.h" 48#include "xfs_error.h"
49#include "xfs_utils.h" 49#include "xfs_utils.h"
50#include "xfs_dir2_trace.h"
51#include "xfs_quota.h" 50#include "xfs_quota.h"
52#include "xfs_filestream.h" 51#include "xfs_filestream.h"
53#include "xfs_vnodeops.h" 52#include "xfs_vnodeops.h"
53#include "xfs_trace.h"
54 54
55kmem_zone_t *xfs_ifork_zone; 55kmem_zone_t *xfs_ifork_zone;
56kmem_zone_t *xfs_inode_zone; 56kmem_zone_t *xfs_inode_zone;
@@ -1291,42 +1291,6 @@ xfs_file_last_byte(
1291 return last_byte; 1291 return last_byte;
1292} 1292}
1293 1293
1294#if defined(XFS_RW_TRACE)
1295STATIC void
1296xfs_itrunc_trace(
1297 int tag,
1298 xfs_inode_t *ip,
1299 int flag,
1300 xfs_fsize_t new_size,
1301 xfs_off_t toss_start,
1302 xfs_off_t toss_finish)
1303{
1304 if (ip->i_rwtrace == NULL) {
1305 return;
1306 }
1307
1308 ktrace_enter(ip->i_rwtrace,
1309 (void*)((long)tag),
1310 (void*)ip,
1311 (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff),
1312 (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff),
1313 (void*)((long)flag),
1314 (void*)(unsigned long)((new_size >> 32) & 0xffffffff),
1315 (void*)(unsigned long)(new_size & 0xffffffff),
1316 (void*)(unsigned long)((toss_start >> 32) & 0xffffffff),
1317 (void*)(unsigned long)(toss_start & 0xffffffff),
1318 (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff),
1319 (void*)(unsigned long)(toss_finish & 0xffffffff),
1320 (void*)(unsigned long)current_cpu(),
1321 (void*)(unsigned long)current_pid(),
1322 (void*)NULL,
1323 (void*)NULL,
1324 (void*)NULL);
1325}
1326#else
1327#define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish)
1328#endif
1329
1330/* 1294/*
1331 * Start the truncation of the file to new_size. The new size 1295 * Start the truncation of the file to new_size. The new size
1332 * must be smaller than the current size. This routine will 1296 * must be smaller than the current size. This routine will
@@ -1409,8 +1373,7 @@ xfs_itruncate_start(
1409 return 0; 1373 return 0;
1410 } 1374 }
1411 last_byte = xfs_file_last_byte(ip); 1375 last_byte = xfs_file_last_byte(ip);
1412 xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, 1376 trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte);
1413 last_byte);
1414 if (last_byte > toss_start) { 1377 if (last_byte > toss_start) {
1415 if (flags & XFS_ITRUNC_DEFINITE) { 1378 if (flags & XFS_ITRUNC_DEFINITE) {
1416 xfs_tosspages(ip, toss_start, 1379 xfs_tosspages(ip, toss_start,
@@ -1514,7 +1477,8 @@ xfs_itruncate_finish(
1514 new_size = 0LL; 1477 new_size = 0LL;
1515 } 1478 }
1516 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1479 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1517 xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); 1480 trace_xfs_itruncate_finish_start(ip, new_size);
1481
1518 /* 1482 /*
1519 * The first thing we do is set the size to new_size permanently 1483 * The first thing we do is set the size to new_size permanently
1520 * on disk. This way we don't have to worry about anyone ever 1484 * on disk. This way we don't have to worry about anyone ever
@@ -1731,7 +1695,7 @@ xfs_itruncate_finish(
1731 ASSERT((new_size != 0) || 1695 ASSERT((new_size != 0) ||
1732 (fork == XFS_ATTR_FORK) || 1696 (fork == XFS_ATTR_FORK) ||
1733 (ip->i_d.di_nextents == 0)); 1697 (ip->i_d.di_nextents == 0));
1734 xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); 1698 trace_xfs_itruncate_finish_end(ip, new_size);
1735 return 0; 1699 return 0;
1736} 1700}
1737 1701
@@ -2877,8 +2841,8 @@ xfs_iflush(
2877 mp = ip->i_mount; 2841 mp = ip->i_mount;
2878 2842
2879 /* 2843 /*
2880 * If the inode isn't dirty, then just release the inode 2844 * If the inode isn't dirty, then just release the inode flush lock and
2881 * flush lock and do nothing. 2845 * do nothing.
2882 */ 2846 */
2883 if (xfs_inode_clean(ip)) { 2847 if (xfs_inode_clean(ip)) {
2884 xfs_ifunlock(ip); 2848 xfs_ifunlock(ip);
@@ -2904,6 +2868,19 @@ xfs_iflush(
2904 xfs_iunpin_wait(ip); 2868 xfs_iunpin_wait(ip);
2905 2869
2906 /* 2870 /*
2871 * For stale inodes we cannot rely on the backing buffer remaining
2872 * stale in cache for the remaining life of the stale inode and so
2873 * xfs_itobp() below may give us a buffer that no longer contains
2874 * inodes below. We have to check this after ensuring the inode is
2875 * unpinned so that it is safe to reclaim the stale inode after the
2876 * flush call.
2877 */
2878 if (xfs_iflags_test(ip, XFS_ISTALE)) {
2879 xfs_ifunlock(ip);
2880 return 0;
2881 }
2882
2883 /*
2907 * This may have been unpinned because the filesystem is shutting 2884 * This may have been unpinned because the filesystem is shutting
2908 * down forcibly. If that's the case we must not write this inode 2885 * down forcibly. If that's the case we must not write this inode
2909 * to disk, because the log record didn't make it to disk! 2886 * to disk, because the log record didn't make it to disk!
@@ -3252,23 +3229,6 @@ corrupt_out:
3252 return XFS_ERROR(EFSCORRUPTED); 3229 return XFS_ERROR(EFSCORRUPTED);
3253} 3230}
3254 3231
3255
3256
3257#ifdef XFS_ILOCK_TRACE
3258void
3259xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra)
3260{
3261 ktrace_enter(ip->i_lock_trace,
3262 (void *)ip,
3263 (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */
3264 (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */
3265 (void *)ra, /* caller of ilock */
3266 (void *)(unsigned long)current_cpu(),
3267 (void *)(unsigned long)current_pid(),
3268 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);
3269}
3270#endif
3271
3272/* 3232/*
3273 * Return a pointer to the extent record at file index idx. 3233 * Return a pointer to the extent record at file index idx.
3274 */ 3234 */
@@ -3300,13 +3260,17 @@ xfs_iext_get_ext(
3300 */ 3260 */
3301void 3261void
3302xfs_iext_insert( 3262xfs_iext_insert(
3303 xfs_ifork_t *ifp, /* inode fork pointer */ 3263 xfs_inode_t *ip, /* incore inode pointer */
3304 xfs_extnum_t idx, /* starting index of new items */ 3264 xfs_extnum_t idx, /* starting index of new items */
3305 xfs_extnum_t count, /* number of inserted items */ 3265 xfs_extnum_t count, /* number of inserted items */
3306 xfs_bmbt_irec_t *new) /* items to insert */ 3266 xfs_bmbt_irec_t *new, /* items to insert */
3267 int state) /* type of extent conversion */
3307{ 3268{
3269 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
3308 xfs_extnum_t i; /* extent record index */ 3270 xfs_extnum_t i; /* extent record index */
3309 3271
3272 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
3273
3310 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3274 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3311 xfs_iext_add(ifp, idx, count); 3275 xfs_iext_add(ifp, idx, count);
3312 for (i = idx; i < idx + count; i++, new++) 3276 for (i = idx; i < idx + count; i++, new++)
@@ -3549,13 +3513,17 @@ xfs_iext_add_indirect_multi(
3549 */ 3513 */
3550void 3514void
3551xfs_iext_remove( 3515xfs_iext_remove(
3552 xfs_ifork_t *ifp, /* inode fork pointer */ 3516 xfs_inode_t *ip, /* incore inode pointer */
3553 xfs_extnum_t idx, /* index to begin removing exts */ 3517 xfs_extnum_t idx, /* index to begin removing exts */
3554 int ext_diff) /* number of extents to remove */ 3518 int ext_diff, /* number of extents to remove */
3519 int state) /* type of extent conversion */
3555{ 3520{
3521 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
3556 xfs_extnum_t nextents; /* number of extents in file */ 3522 xfs_extnum_t nextents; /* number of extents in file */
3557 int new_size; /* size of extents after removal */ 3523 int new_size; /* size of extents after removal */
3558 3524
3525 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
3526
3559 ASSERT(ext_diff > 0); 3527 ASSERT(ext_diff > 0);
3560 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3528 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3561 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 3529 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 41555de1d1d..ec1f28c4fc4 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -213,7 +213,6 @@ typedef struct xfs_icdinode {
213 213
214struct bhv_desc; 214struct bhv_desc;
215struct cred; 215struct cred;
216struct ktrace;
217struct xfs_buf; 216struct xfs_buf;
218struct xfs_bmap_free; 217struct xfs_bmap_free;
219struct xfs_bmbt_irec; 218struct xfs_bmbt_irec;
@@ -222,13 +221,6 @@ struct xfs_mount;
222struct xfs_trans; 221struct xfs_trans;
223struct xfs_dquot; 222struct xfs_dquot;
224 223
225#if defined(XFS_ILOCK_TRACE)
226#define XFS_ILOCK_KTRACE_SIZE 32
227extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
228#else
229#define xfs_ilock_trace(i,n,f,ra)
230#endif
231
232typedef struct dm_attrs_s { 224typedef struct dm_attrs_s {
233 __uint32_t da_dmevmask; /* DMIG event mask */ 225 __uint32_t da_dmevmask; /* DMIG event mask */
234 __uint16_t da_dmstate; /* DMIG state info */ 226 __uint16_t da_dmstate; /* DMIG state info */
@@ -271,26 +263,6 @@ typedef struct xfs_inode {
271 263
272 /* VFS inode */ 264 /* VFS inode */
273 struct inode i_vnode; /* embedded VFS inode */ 265 struct inode i_vnode; /* embedded VFS inode */
274
275 /* Trace buffers per inode. */
276#ifdef XFS_INODE_TRACE
277 struct ktrace *i_trace; /* general inode trace */
278#endif
279#ifdef XFS_BMAP_TRACE
280 struct ktrace *i_xtrace; /* inode extent list trace */
281#endif
282#ifdef XFS_BTREE_TRACE
283 struct ktrace *i_btrace; /* inode bmap btree trace */
284#endif
285#ifdef XFS_RW_TRACE
286 struct ktrace *i_rwtrace; /* inode read/write trace */
287#endif
288#ifdef XFS_ILOCK_TRACE
289 struct ktrace *i_lock_trace; /* inode lock/unlock trace */
290#endif
291#ifdef XFS_DIR2_TRACE
292 struct ktrace *i_dir_trace; /* inode directory trace */
293#endif
294} xfs_inode_t; 266} xfs_inode_t;
295 267
296#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ 268#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
@@ -406,6 +378,14 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
406#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 378#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
407 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 379 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
408 380
381#define XFS_LOCK_FLAGS \
382 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
383 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
384 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
385 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
386 { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" }
387
388
409/* 389/*
410 * Flags for lockdep annotations. 390 * Flags for lockdep annotations.
411 * 391 *
@@ -455,6 +435,10 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
455#define XFS_ITRUNC_DEFINITE 0x1 435#define XFS_ITRUNC_DEFINITE 0x1
456#define XFS_ITRUNC_MAYBE 0x2 436#define XFS_ITRUNC_MAYBE 0x2
457 437
438#define XFS_ITRUNC_FLAGS \
439 { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \
440 { XFS_ITRUNC_MAYBE, "MAYBE" }
441
458/* 442/*
459 * For multiple groups support: if S_ISGID bit is set in the parent 443 * For multiple groups support: if S_ISGID bit is set in the parent
460 * directory, group of new file is set to that of the parent, and 444 * directory, group of new file is set to that of the parent, and
@@ -507,48 +491,16 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
507void xfs_synchronize_times(xfs_inode_t *); 491void xfs_synchronize_times(xfs_inode_t *);
508void xfs_mark_inode_dirty_sync(xfs_inode_t *); 492void xfs_mark_inode_dirty_sync(xfs_inode_t *);
509 493
510#if defined(XFS_INODE_TRACE)
511
512#define INODE_TRACE_SIZE 16 /* number of trace entries */
513#define INODE_KTRACE_ENTRY 1
514#define INODE_KTRACE_EXIT 2
515#define INODE_KTRACE_HOLD 3
516#define INODE_KTRACE_REF 4
517#define INODE_KTRACE_RELE 5
518
519extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
520extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
521extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
522extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
523extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
524#define xfs_itrace_entry(ip) \
525 _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
526#define xfs_itrace_exit(ip) \
527 _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
528#define xfs_itrace_exit_tag(ip, tag) \
529 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
530#define xfs_itrace_ref(ip) \
531 _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
532
533#else
534#define xfs_itrace_entry(a)
535#define xfs_itrace_exit(a)
536#define xfs_itrace_exit_tag(a, b)
537#define xfs_itrace_hold(a, b, c, d)
538#define xfs_itrace_ref(a)
539#define xfs_itrace_rele(a, b, c, d)
540#endif
541
542#define IHOLD(ip) \ 494#define IHOLD(ip) \
543do { \ 495do { \
544 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 496 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
545 atomic_inc(&(VFS_I(ip)->i_count)); \ 497 atomic_inc(&(VFS_I(ip)->i_count)); \
546 xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ 498 trace_xfs_ihold(ip, _THIS_IP_); \
547} while (0) 499} while (0)
548 500
549#define IRELE(ip) \ 501#define IRELE(ip) \
550do { \ 502do { \
551 xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ 503 trace_xfs_irele(ip, _THIS_IP_); \
552 iput(VFS_I(ip)); \ 504 iput(VFS_I(ip)); \
553} while (0) 505} while (0)
554 506
@@ -577,11 +529,11 @@ int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
577int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int); 529int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
578 530
579xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); 531xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
580void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 532void xfs_iext_insert(xfs_inode_t *, xfs_extnum_t, xfs_extnum_t,
581 xfs_bmbt_irec_t *); 533 xfs_bmbt_irec_t *, int);
582void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int); 534void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
583void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int); 535void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
584void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int); 536void xfs_iext_remove(xfs_inode_t *, xfs_extnum_t, int, int);
585void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); 537void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
586void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); 538void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
587void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); 539void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 9794b876d6f..f38855d21ea 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -41,6 +41,7 @@
41#include "xfs_ialloc.h" 41#include "xfs_ialloc.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_trace.h"
44 45
45 46
46kmem_zone_t *xfs_ili_zone; /* inode log item zone */ 47kmem_zone_t *xfs_ili_zone; /* inode log item zone */
@@ -800,7 +801,9 @@ xfs_inode_item_pushbuf(
800 !completion_done(&ip->i_flush)); 801 !completion_done(&ip->i_flush));
801 iip->ili_pushbuf_flag = 0; 802 iip->ili_pushbuf_flag = 0;
802 xfs_iunlock(ip, XFS_ILOCK_SHARED); 803 xfs_iunlock(ip, XFS_ILOCK_SHARED);
803 xfs_buftrace("INODE ITEM PUSH", bp); 804
805 trace_xfs_inode_item_push(bp, _RET_IP_);
806
804 if (XFS_BUF_ISPINNED(bp)) { 807 if (XFS_BUF_ISPINNED(bp)) {
805 xfs_log_force(mp, (xfs_lsn_t)0, 808 xfs_log_force(mp, (xfs_lsn_t)0,
806 XFS_LOG_FORCE); 809 XFS_LOG_FORCE);
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 65bae4c9b8b..cc8df1ac778 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -127,7 +127,7 @@ static inline int xfs_ilog_fdata(int w)
127#ifdef __KERNEL__ 127#ifdef __KERNEL__
128 128
129struct xfs_buf; 129struct xfs_buf;
130struct xfs_bmbt_rec_64; 130struct xfs_bmbt_rec;
131struct xfs_inode; 131struct xfs_inode;
132struct xfs_mount; 132struct xfs_mount;
133 133
@@ -140,9 +140,9 @@ typedef struct xfs_inode_log_item {
140 unsigned short ili_flags; /* misc flags */ 140 unsigned short ili_flags; /* misc flags */
141 unsigned short ili_logged; /* flushed logged data */ 141 unsigned short ili_logged; /* flushed logged data */
142 unsigned int ili_last_fields; /* fields when flushed */ 142 unsigned int ili_last_fields; /* fields when flushed */
143 struct xfs_bmbt_rec_64 *ili_extents_buf; /* array of logged 143 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
144 data exts */ 144 data exts */
145 struct xfs_bmbt_rec_64 *ili_aextents_buf; /* array of logged 145 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
146 attr exts */ 146 attr exts */
147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */ 147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */
148 148
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 67ae5555a30..0b65039951a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -47,72 +47,8 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_iomap.h" 49#include "xfs_iomap.h"
50#include "xfs_trace.h"
50 51
51#if defined(XFS_RW_TRACE)
52void
53xfs_iomap_enter_trace(
54 int tag,
55 xfs_inode_t *ip,
56 xfs_off_t offset,
57 ssize_t count)
58{
59 if (!ip->i_rwtrace)
60 return;
61
62 ktrace_enter(ip->i_rwtrace,
63 (void *)((unsigned long)tag),
64 (void *)ip,
65 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
66 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
67 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
68 (void *)((unsigned long)(offset & 0xffffffff)),
69 (void *)((unsigned long)count),
70 (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
71 (void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
72 (void *)((unsigned long)current_pid()),
73 (void *)NULL,
74 (void *)NULL,
75 (void *)NULL,
76 (void *)NULL,
77 (void *)NULL,
78 (void *)NULL);
79}
80
81void
82xfs_iomap_map_trace(
83 int tag,
84 xfs_inode_t *ip,
85 xfs_off_t offset,
86 ssize_t count,
87 xfs_iomap_t *iomapp,
88 xfs_bmbt_irec_t *imapp,
89 int flags)
90{
91 if (!ip->i_rwtrace)
92 return;
93
94 ktrace_enter(ip->i_rwtrace,
95 (void *)((unsigned long)tag),
96 (void *)ip,
97 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
98 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
99 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
100 (void *)((unsigned long)(offset & 0xffffffff)),
101 (void *)((unsigned long)count),
102 (void *)((unsigned long)flags),
103 (void *)((unsigned long)((iomapp->iomap_offset >> 32) & 0xffffffff)),
104 (void *)((unsigned long)(iomapp->iomap_offset & 0xffffffff)),
105 (void *)((unsigned long)(iomapp->iomap_delta)),
106 (void *)((unsigned long)(iomapp->iomap_bsize)),
107 (void *)((unsigned long)(iomapp->iomap_bn)),
108 (void *)(__psint_t)(imapp->br_startoff),
109 (void *)((unsigned long)(imapp->br_blockcount)),
110 (void *)(__psint_t)(imapp->br_startblock));
111}
112#else
113#define xfs_iomap_enter_trace(tag, io, offset, count)
114#define xfs_iomap_map_trace(tag, io, offset, count, iomapp, imapp, flags)
115#endif
116 52
117#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 53#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
118 << mp->m_writeio_log) 54 << mp->m_writeio_log)
@@ -187,21 +123,20 @@ xfs_iomap(
187 if (XFS_FORCED_SHUTDOWN(mp)) 123 if (XFS_FORCED_SHUTDOWN(mp))
188 return XFS_ERROR(EIO); 124 return XFS_ERROR(EIO);
189 125
126 trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
127
190 switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { 128 switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
191 case BMAPI_READ: 129 case BMAPI_READ:
192 xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, ip, offset, count);
193 lockmode = xfs_ilock_map_shared(ip); 130 lockmode = xfs_ilock_map_shared(ip);
194 bmapi_flags = XFS_BMAPI_ENTIRE; 131 bmapi_flags = XFS_BMAPI_ENTIRE;
195 break; 132 break;
196 case BMAPI_WRITE: 133 case BMAPI_WRITE:
197 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
198 lockmode = XFS_ILOCK_EXCL; 134 lockmode = XFS_ILOCK_EXCL;
199 if (flags & BMAPI_IGNSTATE) 135 if (flags & BMAPI_IGNSTATE)
200 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; 136 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
201 xfs_ilock(ip, lockmode); 137 xfs_ilock(ip, lockmode);
202 break; 138 break;
203 case BMAPI_ALLOCATE: 139 case BMAPI_ALLOCATE:
204 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
205 lockmode = XFS_ILOCK_SHARED; 140 lockmode = XFS_ILOCK_SHARED;
206 bmapi_flags = XFS_BMAPI_ENTIRE; 141 bmapi_flags = XFS_BMAPI_ENTIRE;
207 142
@@ -237,8 +172,7 @@ xfs_iomap(
237 if (nimaps && 172 if (nimaps &&
238 (imap.br_startblock != HOLESTARTBLOCK) && 173 (imap.br_startblock != HOLESTARTBLOCK) &&
239 (imap.br_startblock != DELAYSTARTBLOCK)) { 174 (imap.br_startblock != DELAYSTARTBLOCK)) {
240 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, 175 trace_xfs_iomap_found(ip, offset, count, flags, &imap);
241 offset, count, iomapp, &imap, flags);
242 break; 176 break;
243 } 177 }
244 178
@@ -250,8 +184,7 @@ xfs_iomap(
250 &imap, &nimaps); 184 &imap, &nimaps);
251 } 185 }
252 if (!error) { 186 if (!error) {
253 xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, ip, 187 trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
254 offset, count, iomapp, &imap, flags);
255 } 188 }
256 iomap_flags = IOMAP_NEW; 189 iomap_flags = IOMAP_NEW;
257 break; 190 break;
@@ -261,8 +194,7 @@ xfs_iomap(
261 lockmode = 0; 194 lockmode = 0;
262 195
263 if (nimaps && !isnullstartblock(imap.br_startblock)) { 196 if (nimaps && !isnullstartblock(imap.br_startblock)) {
264 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, 197 trace_xfs_iomap_found(ip, offset, count, flags, &imap);
265 offset, count, iomapp, &imap, flags);
266 break; 198 break;
267 } 199 }
268 200
@@ -623,8 +555,7 @@ retry:
623 * delalloc blocks and retry without EOF preallocation. 555 * delalloc blocks and retry without EOF preallocation.
624 */ 556 */
625 if (nimaps == 0) { 557 if (nimaps == 0) {
626 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, 558 trace_xfs_delalloc_enospc(ip, offset, count);
627 ip, offset, count);
628 if (flushed) 559 if (flushed)
629 return XFS_ERROR(ENOSPC); 560 return XFS_ERROR(ENOSPC);
630 561
@@ -837,7 +768,7 @@ xfs_iomap_write_unwritten(
837 int committed; 768 int committed;
838 int error; 769 int error;
839 770
840 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count); 771 trace_xfs_unwritten_convert(ip, offset, count);
841 772
842 offset_fsb = XFS_B_TO_FSBT(mp, offset); 773 offset_fsb = XFS_B_TO_FSBT(mp, offset);
843 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 774 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
@@ -860,8 +791,15 @@ xfs_iomap_write_unwritten(
860 * set up a transaction to convert the range of extents 791 * set up a transaction to convert the range of extents
861 * from unwritten to real. Do allocations in a loop until 792 * from unwritten to real. Do allocations in a loop until
862 * we have covered the range passed in. 793 * we have covered the range passed in.
794 *
795 * Note that we open code the transaction allocation here
796 * to pass KM_NOFS--we can't risk to recursing back into
797 * the filesystem here as we might be asked to write out
798 * the same inode that we complete here and might deadlock
799 * on the iolock.
863 */ 800 */
864 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 801 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
802 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
865 tp->t_flags |= XFS_TRANS_RESERVE; 803 tp->t_flags |= XFS_TRANS_RESERVE;
866 error = xfs_trans_reserve(tp, resblks, 804 error = xfs_trans_reserve(tp, resblks,
867 XFS_WRITE_LOG_RES(mp), 0, 805 XFS_WRITE_LOG_RES(mp), 0,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index fdcf7b82747..174f2999099 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -43,6 +43,14 @@ typedef enum {
43 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ 43 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */
44} bmapi_flags_t; 44} bmapi_flags_t;
45 45
46#define BMAPI_FLAGS \
47 { BMAPI_READ, "READ" }, \
48 { BMAPI_WRITE, "WRITE" }, \
49 { BMAPI_ALLOCATE, "ALLOCATE" }, \
50 { BMAPI_IGNSTATE, "IGNSTATE" }, \
51 { BMAPI_DIRECT, "DIRECT" }, \
52 { BMAPI_MMAP, "MMAP" }, \
53 { BMAPI_TRYLOCK, "TRYLOCK" }
46 54
47/* 55/*
48 * xfs_iomap_t: File system I/O map 56 * xfs_iomap_t: File system I/O map
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9dbdff3ea48..600b5b06aae 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -40,6 +40,7 @@
40#include "xfs_dinode.h" 40#include "xfs_dinode.h"
41#include "xfs_inode.h" 41#include "xfs_inode.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43#include "xfs_trace.h"
43 44
44kmem_zone_t *xfs_log_ticket_zone; 45kmem_zone_t *xfs_log_ticket_zone;
45 46
@@ -122,85 +123,6 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
122 123
123STATIC int xlog_iclogs_empty(xlog_t *log); 124STATIC int xlog_iclogs_empty(xlog_t *log);
124 125
125#if defined(XFS_LOG_TRACE)
126
127#define XLOG_TRACE_LOGGRANT_SIZE 2048
128#define XLOG_TRACE_ICLOG_SIZE 256
129
130void
131xlog_trace_loggrant_alloc(xlog_t *log)
132{
133 log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
134}
135
136void
137xlog_trace_loggrant_dealloc(xlog_t *log)
138{
139 ktrace_free(log->l_grant_trace);
140}
141
142void
143xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
144{
145 unsigned long cnts;
146
147 /* ticket counts are 1 byte each */
148 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
149
150 ktrace_enter(log->l_grant_trace,
151 (void *)tic,
152 (void *)log->l_reserve_headq,
153 (void *)log->l_write_headq,
154 (void *)((unsigned long)log->l_grant_reserve_cycle),
155 (void *)((unsigned long)log->l_grant_reserve_bytes),
156 (void *)((unsigned long)log->l_grant_write_cycle),
157 (void *)((unsigned long)log->l_grant_write_bytes),
158 (void *)((unsigned long)log->l_curr_cycle),
159 (void *)((unsigned long)log->l_curr_block),
160 (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
161 (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
162 (void *)string,
163 (void *)((unsigned long)tic->t_trans_type),
164 (void *)cnts,
165 (void *)((unsigned long)tic->t_curr_res),
166 (void *)((unsigned long)tic->t_unit_res));
167}
168
169void
170xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
171{
172 iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
173}
174
175void
176xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
177{
178 ktrace_free(iclog->ic_trace);
179}
180
181void
182xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
183{
184 ktrace_enter(iclog->ic_trace,
185 (void *)((unsigned long)state),
186 (void *)((unsigned long)current_pid()),
187 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
188 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
189 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
190 (void *)NULL, (void *)NULL);
191}
192#else
193
194#define xlog_trace_loggrant_alloc(log)
195#define xlog_trace_loggrant_dealloc(log)
196#define xlog_trace_loggrant(log,tic,string)
197
198#define xlog_trace_iclog_alloc(iclog)
199#define xlog_trace_iclog_dealloc(iclog)
200#define xlog_trace_iclog(iclog,state)
201
202#endif /* XFS_LOG_TRACE */
203
204 126
205static void 127static void
206xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) 128xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
@@ -353,15 +275,17 @@ xfs_log_done(xfs_mount_t *mp,
353 275
354 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || 276 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
355 (flags & XFS_LOG_REL_PERM_RESERV)) { 277 (flags & XFS_LOG_REL_PERM_RESERV)) {
278 trace_xfs_log_done_nonperm(log, ticket);
279
356 /* 280 /*
357 * Release ticket if not permanent reservation or a specific 281 * Release ticket if not permanent reservation or a specific
358 * request has been made to release a permanent reservation. 282 * request has been made to release a permanent reservation.
359 */ 283 */
360 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
361 xlog_ungrant_log_space(log, ticket); 284 xlog_ungrant_log_space(log, ticket);
362 xfs_log_ticket_put(ticket); 285 xfs_log_ticket_put(ticket);
363 } else { 286 } else {
364 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 287 trace_xfs_log_done_perm(log, ticket);
288
365 xlog_regrant_reserve_log_space(log, ticket); 289 xlog_regrant_reserve_log_space(log, ticket);
366 /* If this ticket was a permanent reservation and we aren't 290 /* If this ticket was a permanent reservation and we aren't
367 * trying to release it, reset the inited flags; so next time 291 * trying to release it, reset the inited flags; so next time
@@ -505,10 +429,13 @@ xfs_log_reserve(xfs_mount_t *mp,
505 429
506 XFS_STATS_INC(xs_try_logspace); 430 XFS_STATS_INC(xs_try_logspace);
507 431
432
508 if (*ticket != NULL) { 433 if (*ticket != NULL) {
509 ASSERT(flags & XFS_LOG_PERM_RESERV); 434 ASSERT(flags & XFS_LOG_PERM_RESERV);
510 internal_ticket = (xlog_ticket_t *)*ticket; 435 internal_ticket = (xlog_ticket_t *)*ticket;
511 xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)"); 436
437 trace_xfs_log_reserve(log, internal_ticket);
438
512 xlog_grant_push_ail(mp, internal_ticket->t_unit_res); 439 xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
513 retval = xlog_regrant_write_log_space(log, internal_ticket); 440 retval = xlog_regrant_write_log_space(log, internal_ticket);
514 } else { 441 } else {
@@ -519,10 +446,9 @@ xfs_log_reserve(xfs_mount_t *mp,
519 return XFS_ERROR(ENOMEM); 446 return XFS_ERROR(ENOMEM);
520 internal_ticket->t_trans_type = t_type; 447 internal_ticket->t_trans_type = t_type;
521 *ticket = internal_ticket; 448 *ticket = internal_ticket;
522 xlog_trace_loggrant(log, internal_ticket, 449
523 (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ? 450 trace_xfs_log_reserve(log, internal_ticket);
524 "xfs_log_reserve: create new ticket (permanent trans)" : 451
525 "xfs_log_reserve: create new ticket");
526 xlog_grant_push_ail(mp, 452 xlog_grant_push_ail(mp,
527 (internal_ticket->t_unit_res * 453 (internal_ticket->t_unit_res *
528 internal_ticket->t_cnt)); 454 internal_ticket->t_cnt));
@@ -734,7 +660,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
734 spin_unlock(&log->l_icloglock); 660 spin_unlock(&log->l_icloglock);
735 } 661 }
736 if (tic) { 662 if (tic) {
737 xlog_trace_loggrant(log, tic, "unmount rec"); 663 trace_xfs_log_umount_write(log, tic);
738 xlog_ungrant_log_space(log, tic); 664 xlog_ungrant_log_space(log, tic);
739 xfs_log_ticket_put(tic); 665 xfs_log_ticket_put(tic);
740 } 666 }
@@ -1030,7 +956,6 @@ xlog_iodone(xfs_buf_t *bp)
1030 xfs_fs_cmn_err(CE_WARN, l->l_mp, 956 xfs_fs_cmn_err(CE_WARN, l->l_mp,
1031 "xlog_iodone: Barriers are no longer supported" 957 "xlog_iodone: Barriers are no longer supported"
1032 " by device. Disabling barriers\n"); 958 " by device. Disabling barriers\n");
1033 xfs_buftrace("XLOG_IODONE BARRIERS OFF", bp);
1034 } 959 }
1035 960
1036 /* 961 /*
@@ -1085,13 +1010,10 @@ xlog_bdstrat_cb(struct xfs_buf *bp)
1085 return 0; 1010 return 0;
1086 } 1011 }
1087 1012
1088 xfs_buftrace("XLOG__BDSTRAT IOERROR", bp);
1089 XFS_BUF_ERROR(bp, EIO); 1013 XFS_BUF_ERROR(bp, EIO);
1090 XFS_BUF_STALE(bp); 1014 XFS_BUF_STALE(bp);
1091 xfs_biodone(bp); 1015 xfs_biodone(bp);
1092 return XFS_ERROR(EIO); 1016 return XFS_ERROR(EIO);
1093
1094
1095} 1017}
1096 1018
1097/* 1019/*
@@ -1246,7 +1168,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1246 spin_lock_init(&log->l_grant_lock); 1168 spin_lock_init(&log->l_grant_lock);
1247 sv_init(&log->l_flush_wait, 0, "flush_wait"); 1169 sv_init(&log->l_flush_wait, 0, "flush_wait");
1248 1170
1249 xlog_trace_loggrant_alloc(log);
1250 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1171 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1251 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1172 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
1252 1173
@@ -1305,8 +1226,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1305 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); 1226 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1306 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); 1227 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1307 1228
1308 xlog_trace_iclog_alloc(iclog);
1309
1310 iclogp = &iclog->ic_next; 1229 iclogp = &iclog->ic_next;
1311 } 1230 }
1312 *iclogp = log->l_iclog; /* complete ring */ 1231 *iclogp = log->l_iclog; /* complete ring */
@@ -1321,13 +1240,11 @@ out_free_iclog:
1321 sv_destroy(&iclog->ic_force_wait); 1240 sv_destroy(&iclog->ic_force_wait);
1322 sv_destroy(&iclog->ic_write_wait); 1241 sv_destroy(&iclog->ic_write_wait);
1323 xfs_buf_free(iclog->ic_bp); 1242 xfs_buf_free(iclog->ic_bp);
1324 xlog_trace_iclog_dealloc(iclog);
1325 } 1243 }
1326 kmem_free(iclog); 1244 kmem_free(iclog);
1327 } 1245 }
1328 spinlock_destroy(&log->l_icloglock); 1246 spinlock_destroy(&log->l_icloglock);
1329 spinlock_destroy(&log->l_grant_lock); 1247 spinlock_destroy(&log->l_grant_lock);
1330 xlog_trace_loggrant_dealloc(log);
1331 xfs_buf_free(log->l_xbuf); 1248 xfs_buf_free(log->l_xbuf);
1332out_free_log: 1249out_free_log:
1333 kmem_free(log); 1250 kmem_free(log);
@@ -1524,6 +1441,7 @@ xlog_sync(xlog_t *log,
1524 XFS_BUF_ZEROFLAGS(bp); 1441 XFS_BUF_ZEROFLAGS(bp);
1525 XFS_BUF_BUSY(bp); 1442 XFS_BUF_BUSY(bp);
1526 XFS_BUF_ASYNC(bp); 1443 XFS_BUF_ASYNC(bp);
1444 bp->b_flags |= XBF_LOG_BUFFER;
1527 /* 1445 /*
1528 * Do an ordered write for the log block. 1446 * Do an ordered write for the log block.
1529 * Its unnecessary to flush the first split block in the log wrap case. 1447 * Its unnecessary to flush the first split block in the log wrap case.
@@ -1561,6 +1479,7 @@ xlog_sync(xlog_t *log,
1561 XFS_BUF_ZEROFLAGS(bp); 1479 XFS_BUF_ZEROFLAGS(bp);
1562 XFS_BUF_BUSY(bp); 1480 XFS_BUF_BUSY(bp);
1563 XFS_BUF_ASYNC(bp); 1481 XFS_BUF_ASYNC(bp);
1482 bp->b_flags |= XBF_LOG_BUFFER;
1564 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1483 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1565 XFS_BUF_ORDERED(bp); 1484 XFS_BUF_ORDERED(bp);
1566 dptr = XFS_BUF_PTR(bp); 1485 dptr = XFS_BUF_PTR(bp);
@@ -1607,7 +1526,6 @@ xlog_dealloc_log(xlog_t *log)
1607 sv_destroy(&iclog->ic_force_wait); 1526 sv_destroy(&iclog->ic_force_wait);
1608 sv_destroy(&iclog->ic_write_wait); 1527 sv_destroy(&iclog->ic_write_wait);
1609 xfs_buf_free(iclog->ic_bp); 1528 xfs_buf_free(iclog->ic_bp);
1610 xlog_trace_iclog_dealloc(iclog);
1611 next_iclog = iclog->ic_next; 1529 next_iclog = iclog->ic_next;
1612 kmem_free(iclog); 1530 kmem_free(iclog);
1613 iclog = next_iclog; 1531 iclog = next_iclog;
@@ -1616,7 +1534,6 @@ xlog_dealloc_log(xlog_t *log)
1616 spinlock_destroy(&log->l_grant_lock); 1534 spinlock_destroy(&log->l_grant_lock);
1617 1535
1618 xfs_buf_free(log->l_xbuf); 1536 xfs_buf_free(log->l_xbuf);
1619 xlog_trace_loggrant_dealloc(log);
1620 log->l_mp->m_log = NULL; 1537 log->l_mp->m_log = NULL;
1621 kmem_free(log); 1538 kmem_free(log);
1622} /* xlog_dealloc_log */ 1539} /* xlog_dealloc_log */
@@ -2414,7 +2331,6 @@ restart:
2414 2331
2415 iclog = log->l_iclog; 2332 iclog = log->l_iclog;
2416 if (iclog->ic_state != XLOG_STATE_ACTIVE) { 2333 if (iclog->ic_state != XLOG_STATE_ACTIVE) {
2417 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
2418 XFS_STATS_INC(xs_log_noiclogs); 2334 XFS_STATS_INC(xs_log_noiclogs);
2419 2335
2420 /* Wait for log writes to have flushed */ 2336 /* Wait for log writes to have flushed */
@@ -2520,13 +2436,15 @@ xlog_grant_log_space(xlog_t *log,
2520 2436
2521 /* Is there space or do we need to sleep? */ 2437 /* Is there space or do we need to sleep? */
2522 spin_lock(&log->l_grant_lock); 2438 spin_lock(&log->l_grant_lock);
2523 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: enter"); 2439
2440 trace_xfs_log_grant_enter(log, tic);
2524 2441
2525 /* something is already sleeping; insert new transaction at end */ 2442 /* something is already sleeping; insert new transaction at end */
2526 if (log->l_reserve_headq) { 2443 if (log->l_reserve_headq) {
2527 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2444 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2528 xlog_trace_loggrant(log, tic, 2445
2529 "xlog_grant_log_space: sleep 1"); 2446 trace_xfs_log_grant_sleep1(log, tic);
2447
2530 /* 2448 /*
2531 * Gotta check this before going to sleep, while we're 2449 * Gotta check this before going to sleep, while we're
2532 * holding the grant lock. 2450 * holding the grant lock.
@@ -2540,8 +2458,7 @@ xlog_grant_log_space(xlog_t *log,
2540 * If we got an error, and the filesystem is shutting down, 2458 * If we got an error, and the filesystem is shutting down,
2541 * we'll catch it down below. So just continue... 2459 * we'll catch it down below. So just continue...
2542 */ 2460 */
2543 xlog_trace_loggrant(log, tic, 2461 trace_xfs_log_grant_wake1(log, tic);
2544 "xlog_grant_log_space: wake 1");
2545 spin_lock(&log->l_grant_lock); 2462 spin_lock(&log->l_grant_lock);
2546 } 2463 }
2547 if (tic->t_flags & XFS_LOG_PERM_RESERV) 2464 if (tic->t_flags & XFS_LOG_PERM_RESERV)
@@ -2558,8 +2475,9 @@ redo:
2558 if (free_bytes < need_bytes) { 2475 if (free_bytes < need_bytes) {
2559 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2476 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2560 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2477 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2561 xlog_trace_loggrant(log, tic, 2478
2562 "xlog_grant_log_space: sleep 2"); 2479 trace_xfs_log_grant_sleep2(log, tic);
2480
2563 spin_unlock(&log->l_grant_lock); 2481 spin_unlock(&log->l_grant_lock);
2564 xlog_grant_push_ail(log->l_mp, need_bytes); 2482 xlog_grant_push_ail(log->l_mp, need_bytes);
2565 spin_lock(&log->l_grant_lock); 2483 spin_lock(&log->l_grant_lock);
@@ -2571,8 +2489,8 @@ redo:
2571 if (XLOG_FORCED_SHUTDOWN(log)) 2489 if (XLOG_FORCED_SHUTDOWN(log))
2572 goto error_return; 2490 goto error_return;
2573 2491
2574 xlog_trace_loggrant(log, tic, 2492 trace_xfs_log_grant_wake2(log, tic);
2575 "xlog_grant_log_space: wake 2"); 2493
2576 goto redo; 2494 goto redo;
2577 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2495 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2578 xlog_del_ticketq(&log->l_reserve_headq, tic); 2496 xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2592,7 +2510,7 @@ redo:
2592 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); 2510 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2593 } 2511 }
2594#endif 2512#endif
2595 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit"); 2513 trace_xfs_log_grant_exit(log, tic);
2596 xlog_verify_grant_head(log, 1); 2514 xlog_verify_grant_head(log, 1);
2597 spin_unlock(&log->l_grant_lock); 2515 spin_unlock(&log->l_grant_lock);
2598 return 0; 2516 return 0;
@@ -2600,7 +2518,9 @@ redo:
2600 error_return: 2518 error_return:
2601 if (tic->t_flags & XLOG_TIC_IN_Q) 2519 if (tic->t_flags & XLOG_TIC_IN_Q)
2602 xlog_del_ticketq(&log->l_reserve_headq, tic); 2520 xlog_del_ticketq(&log->l_reserve_headq, tic);
2603 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret"); 2521
2522 trace_xfs_log_grant_error(log, tic);
2523
2604 /* 2524 /*
2605 * If we are failing, make sure the ticket doesn't have any 2525 * If we are failing, make sure the ticket doesn't have any
2606 * current reservations. We don't want to add this back when 2526 * current reservations. We don't want to add this back when
@@ -2640,7 +2560,8 @@ xlog_regrant_write_log_space(xlog_t *log,
2640#endif 2560#endif
2641 2561
2642 spin_lock(&log->l_grant_lock); 2562 spin_lock(&log->l_grant_lock);
2643 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: enter"); 2563
2564 trace_xfs_log_regrant_write_enter(log, tic);
2644 2565
2645 if (XLOG_FORCED_SHUTDOWN(log)) 2566 if (XLOG_FORCED_SHUTDOWN(log))
2646 goto error_return; 2567 goto error_return;
@@ -2669,8 +2590,8 @@ xlog_regrant_write_log_space(xlog_t *log,
2669 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2590 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2670 xlog_ins_ticketq(&log->l_write_headq, tic); 2591 xlog_ins_ticketq(&log->l_write_headq, tic);
2671 2592
2672 xlog_trace_loggrant(log, tic, 2593 trace_xfs_log_regrant_write_sleep1(log, tic);
2673 "xlog_regrant_write_log_space: sleep 1"); 2594
2674 spin_unlock(&log->l_grant_lock); 2595 spin_unlock(&log->l_grant_lock);
2675 xlog_grant_push_ail(log->l_mp, need_bytes); 2596 xlog_grant_push_ail(log->l_mp, need_bytes);
2676 spin_lock(&log->l_grant_lock); 2597 spin_lock(&log->l_grant_lock);
@@ -2685,8 +2606,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2685 if (XLOG_FORCED_SHUTDOWN(log)) 2606 if (XLOG_FORCED_SHUTDOWN(log))
2686 goto error_return; 2607 goto error_return;
2687 2608
2688 xlog_trace_loggrant(log, tic, 2609 trace_xfs_log_regrant_write_wake1(log, tic);
2689 "xlog_regrant_write_log_space: wake 1");
2690 } 2610 }
2691 } 2611 }
2692 2612
@@ -2704,6 +2624,8 @@ redo:
2704 spin_lock(&log->l_grant_lock); 2624 spin_lock(&log->l_grant_lock);
2705 2625
2706 XFS_STATS_INC(xs_sleep_logspace); 2626 XFS_STATS_INC(xs_sleep_logspace);
2627 trace_xfs_log_regrant_write_sleep2(log, tic);
2628
2707 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2629 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2708 2630
2709 /* If we're shutting down, this tic is already off the queue */ 2631 /* If we're shutting down, this tic is already off the queue */
@@ -2711,8 +2633,7 @@ redo:
2711 if (XLOG_FORCED_SHUTDOWN(log)) 2633 if (XLOG_FORCED_SHUTDOWN(log))
2712 goto error_return; 2634 goto error_return;
2713 2635
2714 xlog_trace_loggrant(log, tic, 2636 trace_xfs_log_regrant_write_wake2(log, tic);
2715 "xlog_regrant_write_log_space: wake 2");
2716 goto redo; 2637 goto redo;
2717 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2638 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2718 xlog_del_ticketq(&log->l_write_headq, tic); 2639 xlog_del_ticketq(&log->l_write_headq, tic);
@@ -2727,7 +2648,8 @@ redo:
2727 } 2648 }
2728#endif 2649#endif
2729 2650
2730 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit"); 2651 trace_xfs_log_regrant_write_exit(log, tic);
2652
2731 xlog_verify_grant_head(log, 1); 2653 xlog_verify_grant_head(log, 1);
2732 spin_unlock(&log->l_grant_lock); 2654 spin_unlock(&log->l_grant_lock);
2733 return 0; 2655 return 0;
@@ -2736,7 +2658,9 @@ redo:
2736 error_return: 2658 error_return:
2737 if (tic->t_flags & XLOG_TIC_IN_Q) 2659 if (tic->t_flags & XLOG_TIC_IN_Q)
2738 xlog_del_ticketq(&log->l_reserve_headq, tic); 2660 xlog_del_ticketq(&log->l_reserve_headq, tic);
2739 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret"); 2661
2662 trace_xfs_log_regrant_write_error(log, tic);
2663
2740 /* 2664 /*
2741 * If we are failing, make sure the ticket doesn't have any 2665 * If we are failing, make sure the ticket doesn't have any
2742 * current reservations. We don't want to add this back when 2666 * current reservations. We don't want to add this back when
@@ -2760,8 +2684,8 @@ STATIC void
2760xlog_regrant_reserve_log_space(xlog_t *log, 2684xlog_regrant_reserve_log_space(xlog_t *log,
2761 xlog_ticket_t *ticket) 2685 xlog_ticket_t *ticket)
2762{ 2686{
2763 xlog_trace_loggrant(log, ticket, 2687 trace_xfs_log_regrant_reserve_enter(log, ticket);
2764 "xlog_regrant_reserve_log_space: enter"); 2688
2765 if (ticket->t_cnt > 0) 2689 if (ticket->t_cnt > 0)
2766 ticket->t_cnt--; 2690 ticket->t_cnt--;
2767 2691
@@ -2769,8 +2693,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2769 xlog_grant_sub_space(log, ticket->t_curr_res); 2693 xlog_grant_sub_space(log, ticket->t_curr_res);
2770 ticket->t_curr_res = ticket->t_unit_res; 2694 ticket->t_curr_res = ticket->t_unit_res;
2771 xlog_tic_reset_res(ticket); 2695 xlog_tic_reset_res(ticket);
2772 xlog_trace_loggrant(log, ticket, 2696
2773 "xlog_regrant_reserve_log_space: sub current res"); 2697 trace_xfs_log_regrant_reserve_sub(log, ticket);
2698
2774 xlog_verify_grant_head(log, 1); 2699 xlog_verify_grant_head(log, 1);
2775 2700
2776 /* just return if we still have some of the pre-reserved space */ 2701 /* just return if we still have some of the pre-reserved space */
@@ -2780,8 +2705,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2780 } 2705 }
2781 2706
2782 xlog_grant_add_space_reserve(log, ticket->t_unit_res); 2707 xlog_grant_add_space_reserve(log, ticket->t_unit_res);
2783 xlog_trace_loggrant(log, ticket, 2708
2784 "xlog_regrant_reserve_log_space: exit"); 2709 trace_xfs_log_regrant_reserve_exit(log, ticket);
2710
2785 xlog_verify_grant_head(log, 0); 2711 xlog_verify_grant_head(log, 0);
2786 spin_unlock(&log->l_grant_lock); 2712 spin_unlock(&log->l_grant_lock);
2787 ticket->t_curr_res = ticket->t_unit_res; 2713 ticket->t_curr_res = ticket->t_unit_res;
@@ -2811,11 +2737,11 @@ xlog_ungrant_log_space(xlog_t *log,
2811 ticket->t_cnt--; 2737 ticket->t_cnt--;
2812 2738
2813 spin_lock(&log->l_grant_lock); 2739 spin_lock(&log->l_grant_lock);
2814 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); 2740 trace_xfs_log_ungrant_enter(log, ticket);
2815 2741
2816 xlog_grant_sub_space(log, ticket->t_curr_res); 2742 xlog_grant_sub_space(log, ticket->t_curr_res);
2817 2743
2818 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current"); 2744 trace_xfs_log_ungrant_sub(log, ticket);
2819 2745
2820 /* If this is a permanent reservation ticket, we may be able to free 2746 /* If this is a permanent reservation ticket, we may be able to free
2821 * up more space based on the remaining count. 2747 * up more space based on the remaining count.
@@ -2825,7 +2751,8 @@ xlog_ungrant_log_space(xlog_t *log,
2825 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); 2751 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
2826 } 2752 }
2827 2753
2828 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); 2754 trace_xfs_log_ungrant_exit(log, ticket);
2755
2829 xlog_verify_grant_head(log, 1); 2756 xlog_verify_grant_head(log, 1);
2830 spin_unlock(&log->l_grant_lock); 2757 spin_unlock(&log->l_grant_lock);
2831 xfs_log_move_tail(log->l_mp, 1); 2758 xfs_log_move_tail(log->l_mp, 1);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 679c7c4926a..d55662db707 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -19,7 +19,6 @@
19#define __XFS_LOG_PRIV_H__ 19#define __XFS_LOG_PRIV_H__
20 20
21struct xfs_buf; 21struct xfs_buf;
22struct ktrace;
23struct log; 22struct log;
24struct xlog_ticket; 23struct xlog_ticket;
25struct xfs_buf_cancel; 24struct xfs_buf_cancel;
@@ -135,6 +134,12 @@ static inline uint xlog_get_client_id(__be32 i)
135#define XLOG_TIC_INITED 0x1 /* has been initialized */ 134#define XLOG_TIC_INITED 0x1 /* has been initialized */
136#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ 135#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
137#define XLOG_TIC_IN_Q 0x4 136#define XLOG_TIC_IN_Q 0x4
137
138#define XLOG_TIC_FLAGS \
139 { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
140 { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \
141 { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" }
142
138#endif /* __KERNEL__ */ 143#endif /* __KERNEL__ */
139 144
140#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */ 145#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */
@@ -361,9 +366,6 @@ typedef struct xlog_in_core {
361 int ic_bwritecnt; 366 int ic_bwritecnt;
362 unsigned short ic_state; 367 unsigned short ic_state;
363 char *ic_datap; /* pointer to iclog data */ 368 char *ic_datap; /* pointer to iclog data */
364#ifdef XFS_LOG_TRACE
365 struct ktrace *ic_trace;
366#endif
367 369
368 /* Callback structures need their own cacheline */ 370 /* Callback structures need their own cacheline */
369 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; 371 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
@@ -429,10 +431,6 @@ typedef struct log {
429 int l_grant_write_cycle; 431 int l_grant_write_cycle;
430 int l_grant_write_bytes; 432 int l_grant_write_bytes;
431 433
432#ifdef XFS_LOG_TRACE
433 struct ktrace *l_grant_trace;
434#endif
435
436 /* The following field are used for debugging; need to hold icloglock */ 434 /* The following field are used for debugging; need to hold icloglock */
437#ifdef DEBUG 435#ifdef DEBUG
438 char *l_iclog_bak[XLOG_MAX_ICLOGS]; 436 char *l_iclog_bak[XLOG_MAX_ICLOGS];
@@ -456,12 +454,6 @@ extern void xlog_put_bp(struct xfs_buf *);
456 454
457extern kmem_zone_t *xfs_log_ticket_zone; 455extern kmem_zone_t *xfs_log_ticket_zone;
458 456
459/* iclog tracing */
460#define XLOG_TRACE_GRAB_FLUSH 1
461#define XLOG_TRACE_REL_FLUSH 2
462#define XLOG_TRACE_SLEEP_FLUSH 3
463#define XLOG_TRACE_WAKE_FLUSH 4
464
465/* 457/*
466 * Unmount record type is used as a pseudo transaction type for the ticket. 458 * Unmount record type is used as a pseudo transaction type for the ticket.
467 * It's value must be outside the range of XFS_TRANS_* values. 459 * It's value must be outside the range of XFS_TRANS_* values.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index fb17f8226b0..69ac2e5ef20 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -46,6 +46,7 @@
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_rw.h" 47#include "xfs_rw.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_trace.h"
49 50
50STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 51STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
51STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); 52STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
@@ -225,16 +226,10 @@ xlog_header_check_dump(
225 xfs_mount_t *mp, 226 xfs_mount_t *mp,
226 xlog_rec_header_t *head) 227 xlog_rec_header_t *head)
227{ 228{
228 int b; 229 cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n",
229 230 __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
230 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); 231 cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n",
231 for (b = 0; b < 16; b++) 232 &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
232 cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&mp->m_sb.sb_uuid)[b]);
233 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
234 cmn_err(CE_DEBUG, " log : uuid = ");
235 for (b = 0; b < 16; b++)
236 cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&head->h_fs_uuid)[b]);
237 cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt));
238} 233}
239#else 234#else
240#define xlog_header_check_dump(mp, head) 235#define xlog_header_check_dump(mp, head)
@@ -2206,6 +2201,7 @@ xlog_recover_do_buffer_trans(
2206 xfs_daddr_t blkno; 2201 xfs_daddr_t blkno;
2207 int len; 2202 int len;
2208 ushort flags; 2203 ushort flags;
2204 uint buf_flags;
2209 2205
2210 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; 2206 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
2211 2207
@@ -2246,12 +2242,11 @@ xlog_recover_do_buffer_trans(
2246 } 2242 }
2247 2243
2248 mp = log->l_mp; 2244 mp = log->l_mp;
2249 if (flags & XFS_BLI_INODE_BUF) { 2245 buf_flags = XFS_BUF_LOCK;
2250 bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, 2246 if (!(flags & XFS_BLI_INODE_BUF))
2251 XFS_BUF_LOCK); 2247 buf_flags |= XFS_BUF_MAPPED;
2252 } else { 2248
2253 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); 2249 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
2254 }
2255 if (XFS_BUF_ISERROR(bp)) { 2250 if (XFS_BUF_ISERROR(bp)) {
2256 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, 2251 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
2257 bp, blkno); 2252 bp, blkno);
@@ -2350,8 +2345,8 @@ xlog_recover_do_inode_trans(
2350 goto error; 2345 goto error;
2351 } 2346 }
2352 2347
2353 bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, 2348 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
2354 in_f->ilf_len, XFS_BUF_LOCK); 2349 XFS_BUF_LOCK);
2355 if (XFS_BUF_ISERROR(bp)) { 2350 if (XFS_BUF_ISERROR(bp)) {
2356 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2351 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
2357 bp, in_f->ilf_blkno); 2352 bp, in_f->ilf_blkno);
@@ -3517,7 +3512,7 @@ xlog_do_recovery_pass(
3517{ 3512{
3518 xlog_rec_header_t *rhead; 3513 xlog_rec_header_t *rhead;
3519 xfs_daddr_t blk_no; 3514 xfs_daddr_t blk_no;
3520 xfs_caddr_t bufaddr, offset; 3515 xfs_caddr_t offset;
3521 xfs_buf_t *hbp, *dbp; 3516 xfs_buf_t *hbp, *dbp;
3522 int error = 0, h_size; 3517 int error = 0, h_size;
3523 int bblks, split_bblks; 3518 int bblks, split_bblks;
@@ -3610,7 +3605,7 @@ xlog_do_recovery_pass(
3610 /* 3605 /*
3611 * Check for header wrapping around physical end-of-log 3606 * Check for header wrapping around physical end-of-log
3612 */ 3607 */
3613 offset = NULL; 3608 offset = XFS_BUF_PTR(hbp);
3614 split_hblks = 0; 3609 split_hblks = 0;
3615 wrapped_hblks = 0; 3610 wrapped_hblks = 0;
3616 if (blk_no + hblks <= log->l_logBBsize) { 3611 if (blk_no + hblks <= log->l_logBBsize) {
@@ -3646,9 +3641,8 @@ xlog_do_recovery_pass(
3646 * - order is important. 3641 * - order is important.
3647 */ 3642 */
3648 wrapped_hblks = hblks - split_hblks; 3643 wrapped_hblks = hblks - split_hblks;
3649 bufaddr = XFS_BUF_PTR(hbp);
3650 error = XFS_BUF_SET_PTR(hbp, 3644 error = XFS_BUF_SET_PTR(hbp,
3651 bufaddr + BBTOB(split_hblks), 3645 offset + BBTOB(split_hblks),
3652 BBTOB(hblks - split_hblks)); 3646 BBTOB(hblks - split_hblks));
3653 if (error) 3647 if (error)
3654 goto bread_err2; 3648 goto bread_err2;
@@ -3658,14 +3652,10 @@ xlog_do_recovery_pass(
3658 if (error) 3652 if (error)
3659 goto bread_err2; 3653 goto bread_err2;
3660 3654
3661 error = XFS_BUF_SET_PTR(hbp, bufaddr, 3655 error = XFS_BUF_SET_PTR(hbp, offset,
3662 BBTOB(hblks)); 3656 BBTOB(hblks));
3663 if (error) 3657 if (error)
3664 goto bread_err2; 3658 goto bread_err2;
3665
3666 if (!offset)
3667 offset = xlog_align(log, 0,
3668 wrapped_hblks, hbp);
3669 } 3659 }
3670 rhead = (xlog_rec_header_t *)offset; 3660 rhead = (xlog_rec_header_t *)offset;
3671 error = xlog_valid_rec_header(log, rhead, 3661 error = xlog_valid_rec_header(log, rhead,
@@ -3685,7 +3675,7 @@ xlog_do_recovery_pass(
3685 } else { 3675 } else {
3686 /* This log record is split across the 3676 /* This log record is split across the
3687 * physical end of log */ 3677 * physical end of log */
3688 offset = NULL; 3678 offset = XFS_BUF_PTR(dbp);
3689 split_bblks = 0; 3679 split_bblks = 0;
3690 if (blk_no != log->l_logBBsize) { 3680 if (blk_no != log->l_logBBsize) {
3691 /* some data is before the physical 3681 /* some data is before the physical
@@ -3714,9 +3704,8 @@ xlog_do_recovery_pass(
3714 * _first_, then the log start (LR header end) 3704 * _first_, then the log start (LR header end)
3715 * - order is important. 3705 * - order is important.
3716 */ 3706 */
3717 bufaddr = XFS_BUF_PTR(dbp);
3718 error = XFS_BUF_SET_PTR(dbp, 3707 error = XFS_BUF_SET_PTR(dbp,
3719 bufaddr + BBTOB(split_bblks), 3708 offset + BBTOB(split_bblks),
3720 BBTOB(bblks - split_bblks)); 3709 BBTOB(bblks - split_bblks));
3721 if (error) 3710 if (error)
3722 goto bread_err2; 3711 goto bread_err2;
@@ -3727,13 +3716,9 @@ xlog_do_recovery_pass(
3727 if (error) 3716 if (error)
3728 goto bread_err2; 3717 goto bread_err2;
3729 3718
3730 error = XFS_BUF_SET_PTR(dbp, bufaddr, h_size); 3719 error = XFS_BUF_SET_PTR(dbp, offset, h_size);
3731 if (error) 3720 if (error)
3732 goto bread_err2; 3721 goto bread_err2;
3733
3734 if (!offset)
3735 offset = xlog_align(log, wrapped_hblks,
3736 bblks - split_bblks, dbp);
3737 } 3722 }
3738 xlog_unpack_data(rhead, offset, log); 3723 xlog_unpack_data(rhead, offset, log);
3739 if ((error = xlog_recover_process_data(log, rhash, 3724 if ((error = xlog_recover_process_data(log, rhash,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 8b6c9e807ef..eb403b40e12 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -44,6 +44,8 @@
44#include "xfs_quota.h" 44#include "xfs_quota.h"
45#include "xfs_fsops.h" 45#include "xfs_fsops.h"
46#include "xfs_utils.h" 46#include "xfs_utils.h"
47#include "xfs_trace.h"
48
47 49
48STATIC void xfs_unmountfs_wait(xfs_mount_t *); 50STATIC void xfs_unmountfs_wait(xfs_mount_t *);
49 51
@@ -583,8 +585,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
583 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 585 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
584 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 586 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;
585 587
586 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 588 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
587 BTOBB(sector_size), extra_flags); 589 extra_flags);
588 if (!bp || XFS_BUF_ISERROR(bp)) { 590 if (!bp || XFS_BUF_ISERROR(bp)) {
589 xfs_fs_mount_cmn_err(flags, "SB read failed"); 591 xfs_fs_mount_cmn_err(flags, "SB read failed");
590 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 592 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@ -624,8 +626,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
624 XFS_BUF_UNMANAGE(bp); 626 XFS_BUF_UNMANAGE(bp);
625 xfs_buf_relse(bp); 627 xfs_buf_relse(bp);
626 sector_size = mp->m_sb.sb_sectsize; 628 sector_size = mp->m_sb.sb_sectsize;
627 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 629 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
628 BTOBB(sector_size), extra_flags); 630 BTOBB(sector_size), extra_flags);
629 if (!bp || XFS_BUF_ISERROR(bp)) { 631 if (!bp || XFS_BUF_ISERROR(bp)) {
630 xfs_fs_mount_cmn_err(flags, "SB re-read failed"); 632 xfs_fs_mount_cmn_err(flags, "SB re-read failed");
631 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 633 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@ -1471,7 +1473,7 @@ xfs_log_sbcount(
1471 if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) 1473 if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1472 return 0; 1474 return 0;
1473 1475
1474 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT); 1476 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
1475 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1477 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1476 XFS_DEFAULT_LOG_COUNT); 1478 XFS_DEFAULT_LOG_COUNT);
1477 if (error) { 1479 if (error) {
@@ -2123,7 +2125,7 @@ xfs_icsb_destroy_counters(
2123 mutex_destroy(&mp->m_icsb_mutex); 2125 mutex_destroy(&mp->m_icsb_mutex);
2124} 2126}
2125 2127
2126STATIC_INLINE void 2128STATIC void
2127xfs_icsb_lock_cntr( 2129xfs_icsb_lock_cntr(
2128 xfs_icsb_cnts_t *icsbp) 2130 xfs_icsb_cnts_t *icsbp)
2129{ 2131{
@@ -2132,7 +2134,7 @@ xfs_icsb_lock_cntr(
2132 } 2134 }
2133} 2135}
2134 2136
2135STATIC_INLINE void 2137STATIC void
2136xfs_icsb_unlock_cntr( 2138xfs_icsb_unlock_cntr(
2137 xfs_icsb_cnts_t *icsbp) 2139 xfs_icsb_cnts_t *icsbp)
2138{ 2140{
@@ -2140,7 +2142,7 @@ xfs_icsb_unlock_cntr(
2140} 2142}
2141 2143
2142 2144
2143STATIC_INLINE void 2145STATIC void
2144xfs_icsb_lock_all_counters( 2146xfs_icsb_lock_all_counters(
2145 xfs_mount_t *mp) 2147 xfs_mount_t *mp)
2146{ 2148{
@@ -2153,7 +2155,7 @@ xfs_icsb_lock_all_counters(
2153 } 2155 }
2154} 2156}
2155 2157
2156STATIC_INLINE void 2158STATIC void
2157xfs_icsb_unlock_all_counters( 2159xfs_icsb_unlock_all_counters(
2158 xfs_mount_t *mp) 2160 xfs_mount_t *mp)
2159{ 2161{
@@ -2389,12 +2391,12 @@ xfs_icsb_modify_counters(
2389{ 2391{
2390 xfs_icsb_cnts_t *icsbp; 2392 xfs_icsb_cnts_t *icsbp;
2391 long long lcounter; /* long counter for 64 bit fields */ 2393 long long lcounter; /* long counter for 64 bit fields */
2392 int cpu, ret = 0; 2394 int ret = 0;
2393 2395
2394 might_sleep(); 2396 might_sleep();
2395again: 2397again:
2396 cpu = get_cpu(); 2398 preempt_disable();
2397 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu); 2399 icsbp = this_cpu_ptr(mp->m_sb_cnts);
2398 2400
2399 /* 2401 /*
2400 * if the counter is disabled, go to slow path 2402 * if the counter is disabled, go to slow path
@@ -2438,11 +2440,11 @@ again:
2438 break; 2440 break;
2439 } 2441 }
2440 xfs_icsb_unlock_cntr(icsbp); 2442 xfs_icsb_unlock_cntr(icsbp);
2441 put_cpu(); 2443 preempt_enable();
2442 return 0; 2444 return 0;
2443 2445
2444slow_path: 2446slow_path:
2445 put_cpu(); 2447 preempt_enable();
2446 2448
2447 /* 2449 /*
2448 * serialise with a mutex so we don't burn lots of cpu on 2450 * serialise with a mutex so we don't burn lots of cpu on
@@ -2490,7 +2492,7 @@ slow_path:
2490 2492
2491balance_counter: 2493balance_counter:
2492 xfs_icsb_unlock_cntr(icsbp); 2494 xfs_icsb_unlock_cntr(icsbp);
2493 put_cpu(); 2495 preempt_enable();
2494 2496
2495 /* 2497 /*
2496 * We may have multiple threads here if multiple per-cpu 2498 * We may have multiple threads here if multiple per-cpu
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a6c023bc0fb..1df7e450296 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -93,6 +93,9 @@ typedef struct xfs_dmops {
93 xfs_send_unmount_t xfs_send_unmount; 93 xfs_send_unmount_t xfs_send_unmount;
94} xfs_dmops_t; 94} xfs_dmops_t;
95 95
96#define XFS_DMAPI_UNMOUNT_FLAGS(mp) \
97 (((mp)->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? 0 : DM_FLAGS_UNWANTED)
98
96#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \ 99#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
97 (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock) 100 (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
98#define XFS_SEND_MMAP(mp, vma,fl) \ 101#define XFS_SEND_MMAP(mp, vma,fl) \
@@ -101,12 +104,24 @@ typedef struct xfs_dmops {
101 (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right) 104 (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
102#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ 105#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
103 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) 106 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
104#define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
105 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl)
106#define XFS_SEND_MOUNT(mp,right,path,name) \ 107#define XFS_SEND_MOUNT(mp,right,path,name) \
107 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) 108 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
108#define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \ 109#define XFS_SEND_PREUNMOUNT(mp) \
109 (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) 110do { \
111 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
112 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT, mp, \
113 (mp)->m_rootip, DM_RIGHT_NULL, \
114 (mp)->m_rootip, DM_RIGHT_NULL, \
115 NULL, NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
116 } \
117} while (0)
118#define XFS_SEND_UNMOUNT(mp) \
119do { \
120 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
121 (*(mp)->m_dm_ops->xfs_send_unmount)(mp, (mp)->m_rootip, \
122 DM_RIGHT_NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
123 } \
124} while (0)
110 125
111 126
112#ifdef HAVE_PERCPU_SB 127#ifdef HAVE_PERCPU_SB
@@ -387,13 +402,13 @@ xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
387 * Per-cpu superblock locking functions 402 * Per-cpu superblock locking functions
388 */ 403 */
389#ifdef HAVE_PERCPU_SB 404#ifdef HAVE_PERCPU_SB
390STATIC_INLINE void 405static inline void
391xfs_icsb_lock(xfs_mount_t *mp) 406xfs_icsb_lock(xfs_mount_t *mp)
392{ 407{
393 mutex_lock(&mp->m_icsb_mutex); 408 mutex_lock(&mp->m_icsb_mutex);
394} 409}
395 410
396STATIC_INLINE void 411static inline void
397xfs_icsb_unlock(xfs_mount_t *mp) 412xfs_icsb_unlock(xfs_mount_t *mp)
398{ 413{
399 mutex_unlock(&mp->m_icsb_mutex); 414 mutex_unlock(&mp->m_icsb_mutex);
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 3ec91ac74c2..91bfd60f4c7 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -92,6 +92,14 @@ typedef struct xfs_dqblk {
92 92
93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
94 94
95#define XFS_DQ_FLAGS \
96 { XFS_DQ_USER, "USER" }, \
97 { XFS_DQ_PROJ, "PROJ" }, \
98 { XFS_DQ_GROUP, "GROUP" }, \
99 { XFS_DQ_DIRTY, "DIRTY" }, \
100 { XFS_DQ_WANT, "WANT" }, \
101 { XFS_DQ_INACTIVE, "INACTIVE" }
102
95/* 103/*
96 * In the worst case, when both user and group quotas are on, 104 * In the worst case, when both user and group quotas are on,
97 * we can have a max of three dquots changing in a single transaction. 105 * we can have a max of three dquots changing in a single transaction.
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index b81deea0ce1..fc1cda23b81 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -39,6 +39,7 @@
39#include "xfs_utils.h" 39#include "xfs_utils.h"
40#include "xfs_trans_space.h" 40#include "xfs_trans_space.h"
41#include "xfs_vnodeops.h" 41#include "xfs_vnodeops.h"
42#include "xfs_trace.h"
42 43
43 44
44/* 45/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 385f6dceba5..6be05f756d5 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -45,6 +45,7 @@
45#include "xfs_inode_item.h" 45#include "xfs_inode_item.h"
46#include "xfs_trans_space.h" 46#include "xfs_trans_space.h"
47#include "xfs_utils.h" 47#include "xfs_utils.h"
48#include "xfs_trace.h"
48 49
49 50
50/* 51/*
@@ -1516,6 +1517,8 @@ xfs_rtfree_range(
1516 */ 1517 */
1517 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, 1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
1518 &postblock); 1519 &postblock);
1520 if (error)
1521 return error;
1519 /* 1522 /*
1520 * If there are blocks not being freed at the front of the 1523 * If there are blocks not being freed at the front of the
1521 * old extent, add summary data for them to be allocated. 1524 * old extent, add summary data for them to be allocated.
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 3f816ad7ff1..5aa07caea5f 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -44,6 +44,7 @@
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_buf_item.h" 45#include "xfs_buf_item.h"
46#include "xfs_rw.h" 46#include "xfs_rw.h"
47#include "xfs_trace.h"
47 48
48/* 49/*
49 * This is a subroutine for xfs_write() and other writers (xfs_ioctl) 50 * This is a subroutine for xfs_write() and other writers (xfs_ioctl)
@@ -171,7 +172,6 @@ xfs_bioerror(
171 * No need to wait until the buffer is unpinned. 172 * No need to wait until the buffer is unpinned.
172 * We aren't flushing it. 173 * We aren't flushing it.
173 */ 174 */
174 xfs_buftrace("XFS IOERROR", bp);
175 XFS_BUF_ERROR(bp, EIO); 175 XFS_BUF_ERROR(bp, EIO);
176 /* 176 /*
177 * We're calling biodone, so delete B_DONE flag. Either way 177 * We're calling biodone, so delete B_DONE flag. Either way
@@ -205,7 +205,6 @@ xfs_bioerror_relse(
205 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks); 205 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks);
206 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone); 206 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
207 207
208 xfs_buftrace("XFS IOERRELSE", bp);
209 fl = XFS_BUF_BFLAGS(bp); 208 fl = XFS_BUF_BFLAGS(bp);
210 /* 209 /*
211 * No need to wait until the buffer is unpinned. 210 * No need to wait until the buffer is unpinned.
@@ -277,10 +276,10 @@ xfs_read_buf(
277 xfs_buf_t *bp; 276 xfs_buf_t *bp;
278 int error; 277 int error;
279 278
280 if (flags) 279 if (!flags)
281 bp = xfs_buf_read_flags(target, blkno, len, flags); 280 flags = XBF_LOCK | XBF_MAPPED;
282 else 281
283 bp = xfs_buf_read(target, blkno, len, flags); 282 bp = xfs_buf_read(target, blkno, len, flags);
284 if (!bp) 283 if (!bp)
285 return XFS_ERROR(EIO); 284 return XFS_ERROR(EIO);
286 error = XFS_BUF_GETERROR(bp); 285 error = XFS_BUF_GETERROR(bp);
@@ -336,3 +335,25 @@ xfs_bwrite(
336 } 335 }
337 return (error); 336 return (error);
338} 337}
338
339/*
340 * helper function to extract extent size hint from inode
341 */
342xfs_extlen_t
343xfs_get_extsz_hint(
344 struct xfs_inode *ip)
345{
346 xfs_extlen_t extsz;
347
348 if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
349 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
350 ? ip->i_d.di_extsize
351 : ip->i_mount->m_sb.sb_rextsize;
352 ASSERT(extsz);
353 } else {
354 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
355 ? ip->i_d.di_extsize : 0;
356 }
357
358 return extsz;
359}
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index f5e4874c37d..571f2174435 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -37,34 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
37} 37}
38 38
39/* 39/*
40 * Flags for xfs_free_eofblocks
41 */
42#define XFS_FREE_EOF_LOCK (1<<0)
43#define XFS_FREE_EOF_NOLOCK (1<<1)
44
45
46/*
47 * helper function to extract extent size hint from inode
48 */
49STATIC_INLINE xfs_extlen_t
50xfs_get_extsz_hint(
51 xfs_inode_t *ip)
52{
53 xfs_extlen_t extsz;
54
55 if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
56 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
57 ? ip->i_d.di_extsize
58 : ip->i_mount->m_sb.sb_rextsize;
59 ASSERT(extsz);
60 } else {
61 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
62 ? ip->i_d.di_extsize : 0;
63 }
64 return extsz;
65}
66
67/*
68 * Prototypes for functions in xfs_rw.c. 40 * Prototypes for functions in xfs_rw.c.
69 */ 41 */
70extern int xfs_write_clear_setuid(struct xfs_inode *ip); 42extern int xfs_write_clear_setuid(struct xfs_inode *ip);
@@ -76,5 +48,6 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
76 struct xfs_buf **bpp); 48 struct xfs_buf **bpp);
77extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, 49extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
78 xfs_buf_t *bp, xfs_daddr_t blkno); 50 xfs_buf_t *bp, xfs_daddr_t blkno);
51extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
79 52
80#endif /* __XFS_RW_H__ */ 53#endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 66b849358e6..237badcbac3 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -236,19 +236,20 @@ xfs_trans_alloc(
236 uint type) 236 uint type)
237{ 237{
238 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); 238 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
239 return _xfs_trans_alloc(mp, type); 239 return _xfs_trans_alloc(mp, type, KM_SLEEP);
240} 240}
241 241
242xfs_trans_t * 242xfs_trans_t *
243_xfs_trans_alloc( 243_xfs_trans_alloc(
244 xfs_mount_t *mp, 244 xfs_mount_t *mp,
245 uint type) 245 uint type,
246 uint memflags)
246{ 247{
247 xfs_trans_t *tp; 248 xfs_trans_t *tp;
248 249
249 atomic_inc(&mp->m_active_trans); 250 atomic_inc(&mp->m_active_trans);
250 251
251 tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); 252 tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
252 tp->t_magic = XFS_TRANS_MAGIC; 253 tp->t_magic = XFS_TRANS_MAGIC;
253 tp->t_type = type; 254 tp->t_type = type;
254 tp->t_mountp = mp; 255 tp->t_mountp = mp;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ed47fc77759..ca64f33c63a 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -100,6 +100,49 @@ typedef struct xfs_trans_header {
100#define XFS_TRANS_TYPE_MAX 41 100#define XFS_TRANS_TYPE_MAX 41
101/* new transaction types need to be reflected in xfs_logprint(8) */ 101/* new transaction types need to be reflected in xfs_logprint(8) */
102 102
103#define XFS_TRANS_TYPES \
104 { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \
105 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
106 { XFS_TRANS_INACTIVE, "INACTIVE" }, \
107 { XFS_TRANS_CREATE, "CREATE" }, \
108 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
109 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
110 { XFS_TRANS_REMOVE, "REMOVE" }, \
111 { XFS_TRANS_LINK, "LINK" }, \
112 { XFS_TRANS_RENAME, "RENAME" }, \
113 { XFS_TRANS_MKDIR, "MKDIR" }, \
114 { XFS_TRANS_RMDIR, "RMDIR" }, \
115 { XFS_TRANS_SYMLINK, "SYMLINK" }, \
116 { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \
117 { XFS_TRANS_GROWFS, "GROWFS" }, \
118 { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \
119 { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \
120 { XFS_TRANS_WRITEID, "WRITEID" }, \
121 { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \
122 { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \
123 { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \
124 { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \
125 { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
126 { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
127 { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
128 { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \
129 { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
130 { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
131 { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
132 { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
133 { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
134 { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
135 { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \
136 { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
137 { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
138 { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
139 { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
140 { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
141 { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
142 { XFS_TRANS_DUMMY1, "DUMMY1" }, \
143 { XFS_TRANS_DUMMY2, "DUMMY2" }, \
144 { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
145
103/* 146/*
104 * This structure is used to track log items associated with 147 * This structure is used to track log items associated with
105 * a transaction. It points to the log item and keeps some 148 * a transaction. It points to the log item and keeps some
@@ -782,6 +825,10 @@ typedef struct xfs_log_item {
782#define XFS_LI_IN_AIL 0x1 825#define XFS_LI_IN_AIL 0x1
783#define XFS_LI_ABORTED 0x2 826#define XFS_LI_ABORTED 0x2
784 827
828#define XFS_LI_FLAGS \
829 { XFS_LI_IN_AIL, "IN_AIL" }, \
830 { XFS_LI_ABORTED, "ABORTED" }
831
785typedef struct xfs_item_ops { 832typedef struct xfs_item_ops {
786 uint (*iop_size)(xfs_log_item_t *); 833 uint (*iop_size)(xfs_log_item_t *);
787 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); 834 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
@@ -924,7 +971,7 @@ typedef struct xfs_trans {
924 * XFS transaction mechanism exported interfaces. 971 * XFS transaction mechanism exported interfaces.
925 */ 972 */
926xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); 973xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
927xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); 974xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint);
928xfs_trans_t *xfs_trans_dup(xfs_trans_t *); 975xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
929int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, 976int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
930 uint, uint); 977 uint, uint);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 218829e6a15..49130628d5e 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -38,6 +38,7 @@
38#include "xfs_trans_priv.h" 38#include "xfs_trans_priv.h"
39#include "xfs_error.h" 39#include "xfs_error.h"
40#include "xfs_rw.h" 40#include "xfs_rw.h"
41#include "xfs_trace.h"
41 42
42 43
43STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, 44STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
@@ -79,11 +80,8 @@ xfs_trans_get_buf(xfs_trans_t *tp,
79 /* 80 /*
80 * Default to a normal get_buf() call if the tp is NULL. 81 * Default to a normal get_buf() call if the tp is NULL.
81 */ 82 */
82 if (tp == NULL) { 83 if (tp == NULL)
83 bp = xfs_buf_get_flags(target_dev, blkno, len, 84 return xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY);
84 flags | BUF_BUSY);
85 return(bp);
86 }
87 85
88 /* 86 /*
89 * If we find the buffer in the cache with this transaction 87 * If we find the buffer in the cache with this transaction
@@ -98,26 +96,23 @@ xfs_trans_get_buf(xfs_trans_t *tp,
98 } 96 }
99 if (bp != NULL) { 97 if (bp != NULL) {
100 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 98 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
101 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { 99 if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
102 xfs_buftrace("TRANS GET RECUR SHUT", bp);
103 XFS_BUF_SUPER_STALE(bp); 100 XFS_BUF_SUPER_STALE(bp);
104 } 101
105 /* 102 /*
106 * If the buffer is stale then it was binval'ed 103 * If the buffer is stale then it was binval'ed
107 * since last read. This doesn't matter since the 104 * since last read. This doesn't matter since the
108 * caller isn't allowed to use the data anyway. 105 * caller isn't allowed to use the data anyway.
109 */ 106 */
110 else if (XFS_BUF_ISSTALE(bp)) { 107 else if (XFS_BUF_ISSTALE(bp))
111 xfs_buftrace("TRANS GET RECUR STALE", bp);
112 ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); 108 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
113 } 109
114 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 110 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
115 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 111 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
116 ASSERT(bip != NULL); 112 ASSERT(bip != NULL);
117 ASSERT(atomic_read(&bip->bli_refcount) > 0); 113 ASSERT(atomic_read(&bip->bli_refcount) > 0);
118 bip->bli_recur++; 114 bip->bli_recur++;
119 xfs_buftrace("TRANS GET RECUR", bp); 115 trace_xfs_trans_get_buf_recur(bip);
120 xfs_buf_item_trace("GET RECUR", bip);
121 return (bp); 116 return (bp);
122 } 117 }
123 118
@@ -129,7 +124,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
129 * easily deadlock with our current transaction as well as cause 124 * easily deadlock with our current transaction as well as cause
130 * us to run out of stack space. 125 * us to run out of stack space.
131 */ 126 */
132 bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY); 127 bp = xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY);
133 if (bp == NULL) { 128 if (bp == NULL) {
134 return NULL; 129 return NULL;
135 } 130 }
@@ -169,8 +164,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
169 */ 164 */
170 XFS_BUF_SET_FSPRIVATE2(bp, tp); 165 XFS_BUF_SET_FSPRIVATE2(bp, tp);
171 166
172 xfs_buftrace("TRANS GET", bp); 167 trace_xfs_trans_get_buf(bip);
173 xfs_buf_item_trace("GET", bip);
174 return (bp); 168 return (bp);
175} 169}
176 170
@@ -210,7 +204,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
210 ASSERT(bip != NULL); 204 ASSERT(bip != NULL);
211 ASSERT(atomic_read(&bip->bli_refcount) > 0); 205 ASSERT(atomic_read(&bip->bli_refcount) > 0);
212 bip->bli_recur++; 206 bip->bli_recur++;
213 xfs_buf_item_trace("GETSB RECUR", bip); 207 trace_xfs_trans_getsb_recur(bip);
214 return (bp); 208 return (bp);
215 } 209 }
216 210
@@ -252,7 +246,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
252 */ 246 */
253 XFS_BUF_SET_FSPRIVATE2(bp, tp); 247 XFS_BUF_SET_FSPRIVATE2(bp, tp);
254 248
255 xfs_buf_item_trace("GETSB", bip); 249 trace_xfs_trans_getsb(bip);
256 return (bp); 250 return (bp);
257} 251}
258 252
@@ -302,7 +296,7 @@ xfs_trans_read_buf(
302 * Default to a normal get_buf() call if the tp is NULL. 296 * Default to a normal get_buf() call if the tp is NULL.
303 */ 297 */
304 if (tp == NULL) { 298 if (tp == NULL) {
305 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 299 bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY);
306 if (!bp) 300 if (!bp)
307 return (flags & XFS_BUF_TRYLOCK) ? 301 return (flags & XFS_BUF_TRYLOCK) ?
308 EAGAIN : XFS_ERROR(ENOMEM); 302 EAGAIN : XFS_ERROR(ENOMEM);
@@ -350,7 +344,7 @@ xfs_trans_read_buf(
350 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 344 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
351 ASSERT((XFS_BUF_ISERROR(bp)) == 0); 345 ASSERT((XFS_BUF_ISERROR(bp)) == 0);
352 if (!(XFS_BUF_ISDONE(bp))) { 346 if (!(XFS_BUF_ISDONE(bp))) {
353 xfs_buftrace("READ_BUF_INCORE !DONE", bp); 347 trace_xfs_trans_read_buf_io(bp, _RET_IP_);
354 ASSERT(!XFS_BUF_ISASYNC(bp)); 348 ASSERT(!XFS_BUF_ISASYNC(bp));
355 XFS_BUF_READ(bp); 349 XFS_BUF_READ(bp);
356 xfsbdstrat(tp->t_mountp, bp); 350 xfsbdstrat(tp->t_mountp, bp);
@@ -375,7 +369,7 @@ xfs_trans_read_buf(
375 * brelse it either. Just get out. 369 * brelse it either. Just get out.
376 */ 370 */
377 if (XFS_FORCED_SHUTDOWN(mp)) { 371 if (XFS_FORCED_SHUTDOWN(mp)) {
378 xfs_buftrace("READ_BUF_INCORE XFSSHUTDN", bp); 372 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
379 *bpp = NULL; 373 *bpp = NULL;
380 return XFS_ERROR(EIO); 374 return XFS_ERROR(EIO);
381 } 375 }
@@ -385,7 +379,7 @@ xfs_trans_read_buf(
385 bip->bli_recur++; 379 bip->bli_recur++;
386 380
387 ASSERT(atomic_read(&bip->bli_refcount) > 0); 381 ASSERT(atomic_read(&bip->bli_refcount) > 0);
388 xfs_buf_item_trace("READ RECUR", bip); 382 trace_xfs_trans_read_buf_recur(bip);
389 *bpp = bp; 383 *bpp = bp;
390 return 0; 384 return 0;
391 } 385 }
@@ -398,14 +392,13 @@ xfs_trans_read_buf(
398 * easily deadlock with our current transaction as well as cause 392 * easily deadlock with our current transaction as well as cause
399 * us to run out of stack space. 393 * us to run out of stack space.
400 */ 394 */
401 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 395 bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY);
402 if (bp == NULL) { 396 if (bp == NULL) {
403 *bpp = NULL; 397 *bpp = NULL;
404 return 0; 398 return 0;
405 } 399 }
406 if (XFS_BUF_GETERROR(bp) != 0) { 400 if (XFS_BUF_GETERROR(bp) != 0) {
407 XFS_BUF_SUPER_STALE(bp); 401 XFS_BUF_SUPER_STALE(bp);
408 xfs_buftrace("READ ERROR", bp);
409 error = XFS_BUF_GETERROR(bp); 402 error = XFS_BUF_GETERROR(bp);
410 403
411 xfs_ioerror_alert("xfs_trans_read_buf", mp, 404 xfs_ioerror_alert("xfs_trans_read_buf", mp,
@@ -464,8 +457,7 @@ xfs_trans_read_buf(
464 */ 457 */
465 XFS_BUF_SET_FSPRIVATE2(bp, tp); 458 XFS_BUF_SET_FSPRIVATE2(bp, tp);
466 459
467 xfs_buftrace("TRANS READ", bp); 460 trace_xfs_trans_read_buf(bip);
468 xfs_buf_item_trace("READ", bip);
469 *bpp = bp; 461 *bpp = bp;
470 return 0; 462 return 0;
471 463
@@ -483,7 +475,7 @@ shutdown_abort:
483 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != 475 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) !=
484 (XFS_B_STALE|XFS_B_DELWRI)); 476 (XFS_B_STALE|XFS_B_DELWRI));
485 477
486 xfs_buftrace("READ_BUF XFSSHUTDN", bp); 478 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
487 xfs_buf_relse(bp); 479 xfs_buf_relse(bp);
488 *bpp = NULL; 480 *bpp = NULL;
489 return XFS_ERROR(EIO); 481 return XFS_ERROR(EIO);
@@ -549,13 +541,14 @@ xfs_trans_brelse(xfs_trans_t *tp,
549 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 541 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
550 ASSERT(lidp != NULL); 542 ASSERT(lidp != NULL);
551 543
544 trace_xfs_trans_brelse(bip);
545
552 /* 546 /*
553 * If the release is just for a recursive lock, 547 * If the release is just for a recursive lock,
554 * then decrement the count and return. 548 * then decrement the count and return.
555 */ 549 */
556 if (bip->bli_recur > 0) { 550 if (bip->bli_recur > 0) {
557 bip->bli_recur--; 551 bip->bli_recur--;
558 xfs_buf_item_trace("RELSE RECUR", bip);
559 return; 552 return;
560 } 553 }
561 554
@@ -563,10 +556,8 @@ xfs_trans_brelse(xfs_trans_t *tp,
563 * If the buffer is dirty within this transaction, we can't 556 * If the buffer is dirty within this transaction, we can't
564 * release it until we commit. 557 * release it until we commit.
565 */ 558 */
566 if (lidp->lid_flags & XFS_LID_DIRTY) { 559 if (lidp->lid_flags & XFS_LID_DIRTY)
567 xfs_buf_item_trace("RELSE DIRTY", bip);
568 return; 560 return;
569 }
570 561
571 /* 562 /*
572 * If the buffer has been invalidated, then we can't release 563 * If the buffer has been invalidated, then we can't release
@@ -574,13 +565,10 @@ xfs_trans_brelse(xfs_trans_t *tp,
574 * as part of this transaction. This prevents us from pulling 565 * as part of this transaction. This prevents us from pulling
575 * the item from the AIL before we should. 566 * the item from the AIL before we should.
576 */ 567 */
577 if (bip->bli_flags & XFS_BLI_STALE) { 568 if (bip->bli_flags & XFS_BLI_STALE)
578 xfs_buf_item_trace("RELSE STALE", bip);
579 return; 569 return;
580 }
581 570
582 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 571 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
583 xfs_buf_item_trace("RELSE", bip);
584 572
585 /* 573 /*
586 * Free up the log item descriptor tracking the released item. 574 * Free up the log item descriptor tracking the released item.
@@ -677,7 +665,7 @@ xfs_trans_bjoin(xfs_trans_t *tp,
677 */ 665 */
678 XFS_BUF_SET_FSPRIVATE2(bp, tp); 666 XFS_BUF_SET_FSPRIVATE2(bp, tp);
679 667
680 xfs_buf_item_trace("BJOIN", bip); 668 trace_xfs_trans_bjoin(bip);
681} 669}
682 670
683/* 671/*
@@ -701,7 +689,7 @@ xfs_trans_bhold(xfs_trans_t *tp,
701 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 689 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
702 ASSERT(atomic_read(&bip->bli_refcount) > 0); 690 ASSERT(atomic_read(&bip->bli_refcount) > 0);
703 bip->bli_flags |= XFS_BLI_HOLD; 691 bip->bli_flags |= XFS_BLI_HOLD;
704 xfs_buf_item_trace("BHOLD", bip); 692 trace_xfs_trans_bhold(bip);
705} 693}
706 694
707/* 695/*
@@ -724,7 +712,8 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
724 ASSERT(atomic_read(&bip->bli_refcount) > 0); 712 ASSERT(atomic_read(&bip->bli_refcount) > 0);
725 ASSERT(bip->bli_flags & XFS_BLI_HOLD); 713 ASSERT(bip->bli_flags & XFS_BLI_HOLD);
726 bip->bli_flags &= ~XFS_BLI_HOLD; 714 bip->bli_flags &= ~XFS_BLI_HOLD;
727 xfs_buf_item_trace("BHOLD RELEASE", bip); 715
716 trace_xfs_trans_bhold_release(bip);
728} 717}
729 718
730/* 719/*
@@ -770,6 +759,8 @@ xfs_trans_log_buf(xfs_trans_t *tp,
770 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 759 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
771 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; 760 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone;
772 761
762 trace_xfs_trans_log_buf(bip);
763
773 /* 764 /*
774 * If we invalidated the buffer within this transaction, then 765 * If we invalidated the buffer within this transaction, then
775 * cancel the invalidation now that we're dirtying the buffer 766 * cancel the invalidation now that we're dirtying the buffer
@@ -777,7 +768,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
777 * because we have a reference to the buffer this entire time. 768 * because we have a reference to the buffer this entire time.
778 */ 769 */
779 if (bip->bli_flags & XFS_BLI_STALE) { 770 if (bip->bli_flags & XFS_BLI_STALE) {
780 xfs_buf_item_trace("BLOG UNSTALE", bip);
781 bip->bli_flags &= ~XFS_BLI_STALE; 771 bip->bli_flags &= ~XFS_BLI_STALE;
782 ASSERT(XFS_BUF_ISSTALE(bp)); 772 ASSERT(XFS_BUF_ISSTALE(bp));
783 XFS_BUF_UNSTALE(bp); 773 XFS_BUF_UNSTALE(bp);
@@ -792,7 +782,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
792 lidp->lid_flags &= ~XFS_LID_BUF_STALE; 782 lidp->lid_flags &= ~XFS_LID_BUF_STALE;
793 bip->bli_flags |= XFS_BLI_LOGGED; 783 bip->bli_flags |= XFS_BLI_LOGGED;
794 xfs_buf_item_log(bip, first, last); 784 xfs_buf_item_log(bip, first, last);
795 xfs_buf_item_trace("BLOG", bip);
796} 785}
797 786
798 787
@@ -831,6 +820,8 @@ xfs_trans_binval(
831 ASSERT(lidp != NULL); 820 ASSERT(lidp != NULL);
832 ASSERT(atomic_read(&bip->bli_refcount) > 0); 821 ASSERT(atomic_read(&bip->bli_refcount) > 0);
833 822
823 trace_xfs_trans_binval(bip);
824
834 if (bip->bli_flags & XFS_BLI_STALE) { 825 if (bip->bli_flags & XFS_BLI_STALE) {
835 /* 826 /*
836 * If the buffer is already invalidated, then 827 * If the buffer is already invalidated, then
@@ -843,8 +834,6 @@ xfs_trans_binval(
843 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 834 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
844 ASSERT(lidp->lid_flags & XFS_LID_DIRTY); 835 ASSERT(lidp->lid_flags & XFS_LID_DIRTY);
845 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 836 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
846 xfs_buftrace("XFS_BINVAL RECUR", bp);
847 xfs_buf_item_trace("BINVAL RECUR", bip);
848 return; 837 return;
849 } 838 }
850 839
@@ -878,8 +867,6 @@ xfs_trans_binval(
878 (bip->bli_format.blf_map_size * sizeof(uint))); 867 (bip->bli_format.blf_map_size * sizeof(uint)));
879 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; 868 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE;
880 tp->t_flags |= XFS_TRANS_DIRTY; 869 tp->t_flags |= XFS_TRANS_DIRTY;
881 xfs_buftrace("XFS_BINVAL", bp);
882 xfs_buf_item_trace("BINVAL", bip);
883} 870}
884 871
885/* 872/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b572f7e840e..6f268756bf3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -53,6 +53,7 @@
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_filestream.h" 54#include "xfs_filestream.h"
55#include "xfs_vnodeops.h" 55#include "xfs_vnodeops.h"
56#include "xfs_trace.h"
56 57
57int 58int
58xfs_setattr( 59xfs_setattr(
@@ -69,7 +70,6 @@ xfs_setattr(
69 uint commit_flags=0; 70 uint commit_flags=0;
70 uid_t uid=0, iuid=0; 71 uid_t uid=0, iuid=0;
71 gid_t gid=0, igid=0; 72 gid_t gid=0, igid=0;
72 int timeflags = 0;
73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
74 int need_iolock = 1; 74 int need_iolock = 1;
75 75
@@ -134,16 +134,13 @@ xfs_setattr(
134 if (flags & XFS_ATTR_NOLOCK) 134 if (flags & XFS_ATTR_NOLOCK)
135 need_iolock = 0; 135 need_iolock = 0;
136 if (!(mask & ATTR_SIZE)) { 136 if (!(mask & ATTR_SIZE)) {
137 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) || 137 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
138 (mp->m_flags & XFS_MOUNT_WSYNC)) { 138 commit_flags = 0;
139 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 139 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
140 commit_flags = 0; 140 0, 0, 0);
141 if ((code = xfs_trans_reserve(tp, 0, 141 if (code) {
142 XFS_ICHANGE_LOG_RES(mp), 0, 142 lock_flags = 0;
143 0, 0))) { 143 goto error_return;
144 lock_flags = 0;
145 goto error_return;
146 }
147 } 144 }
148 } else { 145 } else {
149 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 146 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
@@ -294,15 +291,23 @@ xfs_setattr(
294 * or we are explicitly asked to change it. This handles 291 * or we are explicitly asked to change it. This handles
295 * the semantic difference between truncate() and ftruncate() 292 * the semantic difference between truncate() and ftruncate()
296 * as implemented in the VFS. 293 * as implemented in the VFS.
294 *
295 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
296 * is a special case where we need to update the times despite
297 * not having these flags set. For all other operations the
298 * VFS set these flags explicitly if it wants a timestamp
299 * update.
297 */ 300 */
298 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME)) 301 if (iattr->ia_size != ip->i_size &&
299 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 302 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
303 iattr->ia_ctime = iattr->ia_mtime =
304 current_fs_time(inode->i_sb);
305 mask |= ATTR_CTIME | ATTR_MTIME;
306 }
300 307
301 if (iattr->ia_size > ip->i_size) { 308 if (iattr->ia_size > ip->i_size) {
302 ip->i_d.di_size = iattr->ia_size; 309 ip->i_d.di_size = iattr->ia_size;
303 ip->i_size = iattr->ia_size; 310 ip->i_size = iattr->ia_size;
304 if (!(flags & XFS_ATTR_DMI))
305 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
306 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 311 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
307 } else if (iattr->ia_size <= ip->i_size || 312 } else if (iattr->ia_size <= ip->i_size ||
308 (iattr->ia_size == 0 && ip->i_d.di_nextents)) { 313 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
@@ -373,9 +378,6 @@ xfs_setattr(
373 ip->i_d.di_gid = gid; 378 ip->i_d.di_gid = gid;
374 inode->i_gid = gid; 379 inode->i_gid = gid;
375 } 380 }
376
377 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
378 timeflags |= XFS_ICHGTIME_CHG;
379 } 381 }
380 382
381 /* 383 /*
@@ -392,51 +394,37 @@ xfs_setattr(
392 394
393 inode->i_mode &= S_IFMT; 395 inode->i_mode &= S_IFMT;
394 inode->i_mode |= mode & ~S_IFMT; 396 inode->i_mode |= mode & ~S_IFMT;
395
396 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
397 timeflags |= XFS_ICHGTIME_CHG;
398 } 397 }
399 398
400 /* 399 /*
401 * Change file access or modified times. 400 * Change file access or modified times.
402 */ 401 */
403 if (mask & (ATTR_ATIME|ATTR_MTIME)) { 402 if (mask & ATTR_ATIME) {
404 if (mask & ATTR_ATIME) { 403 inode->i_atime = iattr->ia_atime;
405 inode->i_atime = iattr->ia_atime; 404 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
406 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 405 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
407 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 406 ip->i_update_core = 1;
408 ip->i_update_core = 1;
409 }
410 if (mask & ATTR_MTIME) {
411 inode->i_mtime = iattr->ia_mtime;
412 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
413 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
414 timeflags &= ~XFS_ICHGTIME_MOD;
415 timeflags |= XFS_ICHGTIME_CHG;
416 }
417 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
418 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
419 } 407 }
420 408 if (mask & ATTR_CTIME) {
421 /*
422 * Change file inode change time only if ATTR_CTIME set
423 * AND we have been called by a DMI function.
424 */
425
426 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
427 inode->i_ctime = iattr->ia_ctime; 409 inode->i_ctime = iattr->ia_ctime;
428 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 410 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
429 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 411 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
430 ip->i_update_core = 1; 412 ip->i_update_core = 1;
431 timeflags &= ~XFS_ICHGTIME_CHG; 413 }
414 if (mask & ATTR_MTIME) {
415 inode->i_mtime = iattr->ia_mtime;
416 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
417 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
418 ip->i_update_core = 1;
432 } 419 }
433 420
434 /* 421 /*
435 * Send out timestamp changes that need to be set to the 422 * And finally, log the inode core if any attribute in it
436 * current time. Not done when called by a DMI function. 423 * has been changed.
437 */ 424 */
438 if (timeflags && !(flags & XFS_ATTR_DMI)) 425 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
439 xfs_ichgtime(ip, timeflags); 426 ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
427 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
440 428
441 XFS_STATS_INC(xs_ig_attrchg); 429 XFS_STATS_INC(xs_ig_attrchg);
442 430
@@ -451,12 +439,10 @@ xfs_setattr(
451 * mix so this probably isn't worth the trouble to optimize. 439 * mix so this probably isn't worth the trouble to optimize.
452 */ 440 */
453 code = 0; 441 code = 0;
454 if (tp) { 442 if (mp->m_flags & XFS_MOUNT_WSYNC)
455 if (mp->m_flags & XFS_MOUNT_WSYNC) 443 xfs_trans_set_sync(tp);
456 xfs_trans_set_sync(tp);
457 444
458 code = xfs_trans_commit(tp, commit_flags); 445 code = xfs_trans_commit(tp, commit_flags);
459 }
460 446
461 xfs_iunlock(ip, lock_flags); 447 xfs_iunlock(ip, lock_flags);
462 448
@@ -538,9 +524,8 @@ xfs_readlink_bmap(
538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 524 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 525 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
540 526
541 bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt), 527 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
542 XBF_LOCK | XBF_MAPPED | 528 XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
543 XBF_DONT_BLOCK);
544 error = XFS_BUF_GETERROR(bp); 529 error = XFS_BUF_GETERROR(bp);
545 if (error) { 530 if (error) {
546 xfs_ioerror_alert("xfs_readlink", 531 xfs_ioerror_alert("xfs_readlink",
@@ -709,6 +694,11 @@ xfs_fsync(
709} 694}
710 695
711/* 696/*
697 * Flags for xfs_free_eofblocks
698 */
699#define XFS_FREE_EOF_TRYLOCK (1<<0)
700
701/*
712 * This is called by xfs_inactive to free any blocks beyond eof 702 * This is called by xfs_inactive to free any blocks beyond eof
713 * when the link count isn't zero and by xfs_dm_punch_hole() when 703 * when the link count isn't zero and by xfs_dm_punch_hole() when
714 * punching a hole to EOF. 704 * punching a hole to EOF.
@@ -726,7 +716,6 @@ xfs_free_eofblocks(
726 xfs_filblks_t map_len; 716 xfs_filblks_t map_len;
727 int nimaps; 717 int nimaps;
728 xfs_bmbt_irec_t imap; 718 xfs_bmbt_irec_t imap;
729 int use_iolock = (flags & XFS_FREE_EOF_LOCK);
730 719
731 /* 720 /*
732 * Figure out if there are any blocks beyond the end 721 * Figure out if there are any blocks beyond the end
@@ -768,14 +757,19 @@ xfs_free_eofblocks(
768 * cache and we can't 757 * cache and we can't
769 * do that within a transaction. 758 * do that within a transaction.
770 */ 759 */
771 if (use_iolock) 760 if (flags & XFS_FREE_EOF_TRYLOCK) {
761 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
762 xfs_trans_cancel(tp, 0);
763 return 0;
764 }
765 } else {
772 xfs_ilock(ip, XFS_IOLOCK_EXCL); 766 xfs_ilock(ip, XFS_IOLOCK_EXCL);
767 }
773 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 768 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
774 ip->i_size); 769 ip->i_size);
775 if (error) { 770 if (error) {
776 xfs_trans_cancel(tp, 0); 771 xfs_trans_cancel(tp, 0);
777 if (use_iolock) 772 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
778 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
779 return error; 773 return error;
780 } 774 }
781 775
@@ -812,8 +806,7 @@ xfs_free_eofblocks(
812 error = xfs_trans_commit(tp, 806 error = xfs_trans_commit(tp,
813 XFS_TRANS_RELEASE_LOG_RES); 807 XFS_TRANS_RELEASE_LOG_RES);
814 } 808 }
815 xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) 809 xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
816 : XFS_ILOCK_EXCL));
817 } 810 }
818 return error; 811 return error;
819} 812}
@@ -1113,7 +1106,17 @@ xfs_release(
1113 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1106 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1114 (!(ip->i_d.di_flags & 1107 (!(ip->i_d.di_flags &
1115 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1108 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
1116 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1109
1110 /*
1111 * If we can't get the iolock just skip truncating
1112 * the blocks past EOF because we could deadlock
1113 * with the mmap_sem otherwise. We'll get another
1114 * chance to drop them once the last reference to
1115 * the inode is dropped, so we'll never leak blocks
1116 * permanently.
1117 */
1118 error = xfs_free_eofblocks(mp, ip,
1119 XFS_FREE_EOF_TRYLOCK);
1117 if (error) 1120 if (error)
1118 return error; 1121 return error;
1119 } 1122 }
@@ -1184,7 +1187,7 @@ xfs_inactive(
1184 (!(ip->i_d.di_flags & 1187 (!(ip->i_d.di_flags &
1185 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1188 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
1186 (ip->i_delayed_blks != 0)))) { 1189 (ip->i_delayed_blks != 0)))) {
1187 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1190 error = xfs_free_eofblocks(mp, ip, 0);
1188 if (error) 1191 if (error)
1189 return VN_INACTIVE_CACHE; 1192 return VN_INACTIVE_CACHE;
1190 } 1193 }
@@ -1380,7 +1383,6 @@ xfs_lookup(
1380 if (error) 1383 if (error)
1381 goto out_free_name; 1384 goto out_free_name;
1382 1385
1383 xfs_itrace_ref(*ipp);
1384 return 0; 1386 return 0;
1385 1387
1386out_free_name: 1388out_free_name:
@@ -1526,7 +1528,6 @@ xfs_create(
1526 * At this point, we've gotten a newly allocated inode. 1528 * At this point, we've gotten a newly allocated inode.
1527 * It is locked (and joined to the transaction). 1529 * It is locked (and joined to the transaction).
1528 */ 1530 */
1529 xfs_itrace_ref(ip);
1530 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1531 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1531 1532
1532 /* 1533 /*
@@ -1986,9 +1987,6 @@ xfs_remove(
1986 if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 1987 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
1987 xfs_filestream_deassociate(ip); 1988 xfs_filestream_deassociate(ip);
1988 1989
1989 xfs_itrace_exit(ip);
1990 xfs_itrace_exit(dp);
1991
1992 std_return: 1990 std_return:
1993 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 1991 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
1994 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL, 1992 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
@@ -2285,7 +2283,6 @@ xfs_symlink(
2285 goto error_return; 2283 goto error_return;
2286 goto error1; 2284 goto error1;
2287 } 2285 }
2288 xfs_itrace_ref(ip);
2289 2286
2290 /* 2287 /*
2291 * An error after we've joined dp to the transaction will result in the 2288 * An error after we've joined dp to the transaction will result in the
@@ -2456,46 +2453,6 @@ xfs_set_dmattrs(
2456 return error; 2453 return error;
2457} 2454}
2458 2455
2459int
2460xfs_reclaim(
2461 xfs_inode_t *ip)
2462{
2463
2464 xfs_itrace_entry(ip);
2465
2466 ASSERT(!VN_MAPPED(VFS_I(ip)));
2467
2468 /* bad inode, get out here ASAP */
2469 if (is_bad_inode(VFS_I(ip))) {
2470 xfs_ireclaim(ip);
2471 return 0;
2472 }
2473
2474 xfs_ioend_wait(ip);
2475
2476 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
2477
2478 /*
2479 * If we have nothing to flush with this inode then complete the
2480 * teardown now, otherwise break the link between the xfs inode and the
2481 * linux inode and clean up the xfs inode later. This avoids flushing
2482 * the inode to disk during the delete operation itself.
2483 *
2484 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
2485 * first to ensure that xfs_iunpin() will never see an xfs inode
2486 * that has a linux inode being reclaimed. Synchronisation is provided
2487 * by the i_flags_lock.
2488 */
2489 if (!ip->i_update_core && (ip->i_itemp == NULL)) {
2490 xfs_ilock(ip, XFS_ILOCK_EXCL);
2491 xfs_iflock(ip);
2492 xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2493 return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
2494 }
2495 xfs_inode_set_reclaim_tag(ip);
2496 return 0;
2497}
2498
2499/* 2456/*
2500 * xfs_alloc_file_space() 2457 * xfs_alloc_file_space()
2501 * This routine allocates disk space for the given file. 2458 * This routine allocates disk space for the given file.
@@ -2868,7 +2825,6 @@ xfs_free_file_space(
2868 ioffset = offset & ~(rounding - 1); 2825 ioffset = offset & ~(rounding - 1);
2869 2826
2870 if (VN_CACHED(VFS_I(ip)) != 0) { 2827 if (VN_CACHED(VFS_I(ip)) != 0) {
2871 xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);
2872 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 2828 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
2873 if (error) 2829 if (error)
2874 goto out_unlock_iolock; 2830 goto out_unlock_iolock;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index a9e102de71a..167a467403a 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -38,7 +38,6 @@ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
38 const char *target_path, mode_t mode, struct xfs_inode **ipp, 38 const char *target_path, mode_t mode, struct xfs_inode **ipp,
39 cred_t *credp); 39 cred_t *credp);
40int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 40int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
41int xfs_reclaim(struct xfs_inode *ip);
42int xfs_change_file_space(struct xfs_inode *ip, int cmd, 41int xfs_change_file_space(struct xfs_inode *ip, int cmd,
43 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); 42 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
44int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 43int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,