diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/9p/v9fs.c | 33 | ||||
-rw-r--r-- | fs/9p/v9fs_vfs.h | 6 | ||||
-rw-r--r-- | fs/9p/vfs_dir.c | 14 | ||||
-rw-r--r-- | fs/9p/vfs_file.c | 36 | ||||
-rw-r--r-- | fs/9p/vfs_inode.c | 141 | ||||
-rw-r--r-- | fs/9p/vfs_inode_dotl.c | 86 | ||||
-rw-r--r-- | fs/9p/vfs_super.c | 2 | ||||
-rw-r--r-- | fs/Makefile | 2 | ||||
-rw-r--r-- | fs/attr.c | 5 | ||||
-rw-r--r-- | fs/autofs4/autofs_i.h | 26 | ||||
-rw-r--r-- | fs/autofs4/waitq.c | 2 | ||||
-rw-r--r-- | fs/befs/linuxvfs.c | 23 | ||||
-rw-r--r-- | fs/block_dev.c | 7 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 6 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 10 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 77 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 4 | ||||
-rw-r--r-- | fs/btrfs/file.c | 75 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 20 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 52 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 47 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 4 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 28 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 51 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 2 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 59 | ||||
-rw-r--r-- | fs/buffer.c | 4 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 2 | ||||
-rw-r--r-- | fs/ceph/super.c | 4 | ||||
-rw-r--r-- | fs/cifs/README | 14 | ||||
-rw-r--r-- | fs/cifs/cifs_debug.c | 11 | ||||
-rw-r--r-- | fs/cifs/cifs_fs_sb.h | 4 | ||||
-rw-r--r-- | fs/cifs/cifsacl.c | 373 | ||||
-rw-r--r-- | fs/cifs/cifsencrypt.c | 159 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 36 | ||||
-rw-r--r-- | fs/cifs/cifsfs.h | 6 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 111 | ||||
-rw-r--r-- | fs/cifs/cifspdu.h | 48 | ||||
-rw-r--r-- | fs/cifs/cifsproto.h | 48 | ||||
-rw-r--r-- | fs/cifs/cifssmb.c | 457 | ||||
-rw-r--r-- | fs/cifs/connect.c | 706 | ||||
-rw-r--r-- | fs/cifs/dir.c | 26 | ||||
-rw-r--r-- | fs/cifs/export.c | 4 | ||||
-rw-r--r-- | fs/cifs/file.c | 1126 | ||||
-rw-r--r-- | fs/cifs/inode.c | 54 | ||||
-rw-r--r-- | fs/cifs/link.c | 17 | ||||
-rw-r--r-- | fs/cifs/misc.c | 66 | ||||
-rw-r--r-- | fs/cifs/sess.c | 4 | ||||
-rw-r--r-- | fs/cifs/smbencrypt.c | 121 | ||||
-rw-r--r-- | fs/cifs/transport.c | 70 | ||||
-rw-r--r-- | fs/cifs/xattr.c | 42 | ||||
-rw-r--r-- | fs/coda/coda_linux.h | 5 | ||||
-rw-r--r-- | fs/compat.c | 13 | ||||
-rw-r--r-- | fs/compat_ioctl.c | 1 | ||||
-rw-r--r-- | fs/configfs/inode.c | 3 | ||||
-rw-r--r-- | fs/configfs/item.c | 2 | ||||
-rw-r--r-- | fs/debugfs/inode.c | 2 | ||||
-rw-r--r-- | fs/direct-io.c | 646 | ||||
-rw-r--r-- | fs/ecryptfs/Kconfig | 2 | ||||
-rw-r--r-- | fs/ecryptfs/keystore.c | 2 | ||||
-rw-r--r-- | fs/ecryptfs/main.c | 23 | ||||
-rw-r--r-- | fs/ecryptfs/read_write.c | 18 | ||||
-rw-r--r-- | fs/eventpoll.c | 2 | ||||
-rw-r--r-- | fs/exec.c | 17 | ||||
-rw-r--r-- | fs/exofs/Kbuild | 3 | ||||
-rw-r--r-- | fs/exofs/Kconfig | 9 | ||||
-rw-r--r-- | fs/exofs/exofs.h | 26 | ||||
-rw-r--r-- | fs/exofs/inode.c | 233 | ||||
-rw-r--r-- | fs/exofs/ore.c | 656 | ||||
-rw-r--r-- | fs/exofs/ore_raid.c | 660 | ||||
-rw-r--r-- | fs/exofs/ore_raid.h | 79 | ||||
-rw-r--r-- | fs/exofs/super.c | 205 | ||||
-rw-r--r-- | fs/ext2/xattr_security.c | 34 | ||||
-rw-r--r-- | fs/ext3/inode.c | 4 | ||||
-rw-r--r-- | fs/ext3/namei.c | 9 | ||||
-rw-r--r-- | fs/ext3/xattr_security.c | 36 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 4 | ||||
-rw-r--r-- | fs/ext4/file.c | 47 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 9 | ||||
-rw-r--r-- | fs/ext4/inode.c | 27 | ||||
-rw-r--r-- | fs/ext4/namei.c | 9 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 24 | ||||
-rw-r--r-- | fs/ext4/super.c | 1 | ||||
-rw-r--r-- | fs/ext4/xattr_security.c | 36 | ||||
-rw-r--r-- | fs/fat/dir.c | 2 | ||||
-rw-r--r-- | fs/fat/inode.c | 7 | ||||
-rw-r--r-- | fs/fuse/dev.c | 16 | ||||
-rw-r--r-- | fs/fuse/file.c | 84 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 8 | ||||
-rw-r--r-- | fs/fuse/inode.c | 13 | ||||
-rw-r--r-- | fs/gfs2/acl.c | 5 | ||||
-rw-r--r-- | fs/gfs2/aops.c | 8 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 199 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 50 | ||||
-rw-r--r-- | fs/gfs2/file.c | 299 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 89 | ||||
-rw-r--r-- | fs/gfs2/glops.h | 2 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 23 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 150 | ||||
-rw-r--r-- | fs/gfs2/inode.h | 2 | ||||
-rw-r--r-- | fs/gfs2/log.c | 4 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 66 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 6 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 8 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 30 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 573 | ||||
-rw-r--r-- | fs/gfs2/rgrp.h | 31 | ||||
-rw-r--r-- | fs/gfs2/super.c | 134 | ||||
-rw-r--r-- | fs/gfs2/trans.c | 5 | ||||
-rw-r--r-- | fs/gfs2/trans.h | 22 | ||||
-rw-r--r-- | fs/gfs2/xattr.c | 28 | ||||
-rw-r--r-- | fs/hfsplus/super.c | 15 | ||||
-rw-r--r-- | fs/hfsplus/wrapper.c | 4 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 1 | ||||
-rw-r--r-- | fs/inode.c | 26 | ||||
-rw-r--r-- | fs/jffs2/security.c | 35 | ||||
-rw-r--r-- | fs/jfs/jfs_umount.c | 4 | ||||
-rw-r--r-- | fs/jfs/xattr.c | 57 | ||||
-rw-r--r-- | fs/lockd/host.c | 25 | ||||
-rw-r--r-- | fs/lockd/svc.c | 2 | ||||
-rw-r--r-- | fs/locks.c | 225 | ||||
-rw-r--r-- | fs/namei.c | 58 | ||||
-rw-r--r-- | fs/namespace.c | 3 | ||||
-rw-r--r-- | fs/nfs/Kconfig | 16 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 59 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.h | 4 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayoutdev.c | 35 | ||||
-rw-r--r-- | fs/nfs/callback.c | 4 | ||||
-rw-r--r-- | fs/nfs/callback.h | 2 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 25 | ||||
-rw-r--r-- | fs/nfs/callback_xdr.c | 24 | ||||
-rw-r--r-- | fs/nfs/client.c | 11 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 2 | ||||
-rw-r--r-- | fs/nfs/file.c | 10 | ||||
-rw-r--r-- | fs/nfs/fscache-index.c | 4 | ||||
-rw-r--r-- | fs/nfs/idmap.c | 25 | ||||
-rw-r--r-- | fs/nfs/inode.c | 16 | ||||
-rw-r--r-- | fs/nfs/internal.h | 10 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 32 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.c | 33 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 113 | ||||
-rw-r--r-- | fs/nfs/nfs4renewd.c | 12 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 6 | ||||
-rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 28 | ||||
-rw-r--r-- | fs/nfs/objlayout/pnfs_osd_xdr_cli.c | 3 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 52 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 5 | ||||
-rw-r--r-- | fs/nfs/read.c | 40 | ||||
-rw-r--r-- | fs/nfs/super.c | 42 | ||||
-rw-r--r-- | fs/nfs/unlink.c | 4 | ||||
-rw-r--r-- | fs/nfs/write.c | 75 | ||||
-rw-r--r-- | fs/nfsd/export.c | 16 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 20 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 374 | ||||
-rw-r--r-- | fs/nfsd/nfs4recover.c | 53 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 1794 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 380 | ||||
-rw-r--r-- | fs/nfsd/nfsctl.c | 1 | ||||
-rw-r--r-- | fs/nfsd/nfsd.h | 33 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.c | 39 | ||||
-rw-r--r-- | fs/nfsd/state.h | 174 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 31 | ||||
-rw-r--r-- | fs/nfsd/vfs.h | 29 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 28 | ||||
-rw-r--r-- | fs/ocfs2/xattr.c | 38 | ||||
-rw-r--r-- | fs/open.c | 4 | ||||
-rw-r--r-- | fs/posix_acl.c | 2 | ||||
-rw-r--r-- | fs/proc/stat.c | 41 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 80 | ||||
-rw-r--r-- | fs/quota/quota.c | 2 | ||||
-rw-r--r-- | fs/read_write.c | 74 | ||||
-rw-r--r-- | fs/reiserfs/journal.c | 9 | ||||
-rw-r--r-- | fs/reiserfs/resize.c | 4 | ||||
-rw-r--r-- | fs/reiserfs/xattr_security.c | 4 | ||||
-rw-r--r-- | fs/squashfs/Kconfig | 6 | ||||
-rw-r--r-- | fs/stat.c | 2 | ||||
-rw-r--r-- | fs/sysfs/dir.c | 182 | ||||
-rw-r--r-- | fs/sysfs/file.c | 56 | ||||
-rw-r--r-- | fs/sysfs/inode.c | 16 | ||||
-rw-r--r-- | fs/sysfs/sysfs.h | 17 | ||||
-rw-r--r-- | fs/ubifs/debug.h | 6 | ||||
-rw-r--r-- | fs/xattr.c | 63 | ||||
-rw-r--r-- | fs/xfs/Makefile | 119 | ||||
-rw-r--r-- | fs/xfs/kmem.c (renamed from fs/xfs/linux-2.6/kmem.c) | 0 | ||||
-rw-r--r-- | fs/xfs/kmem.h (renamed from fs/xfs/linux-2.6/kmem.h) | 7 | ||||
-rw-r--r-- | fs/xfs/mrlock.h (renamed from fs/xfs/linux-2.6/mrlock.h) | 0 | ||||
-rw-r--r-- | fs/xfs/time.h (renamed from fs/xfs/linux-2.6/time.h) | 0 | ||||
-rw-r--r-- | fs/xfs/uuid.c (renamed from fs/xfs/support/uuid.c) | 0 | ||||
-rw-r--r-- | fs/xfs/uuid.h (renamed from fs/xfs/support/uuid.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_acl.c (renamed from fs/xfs/linux-2.6/xfs_acl.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_ag.h | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c (renamed from fs/xfs/linux-2.6/xfs_aops.c) | 118 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.h (renamed from fs/xfs/linux-2.6/xfs_aops.h) | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_attr.c | 90 | ||||
-rw-r--r-- | fs/xfs/xfs_attr_leaf.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.c | 2534 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.h | 318 | ||||
-rw-r--r-- | fs/xfs/xfs_btree.c | 26 | ||||
-rw-r--r-- | fs/xfs/xfs_btree.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c (renamed from fs/xfs/linux-2.6/xfs_buf.c) | 257 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h (renamed from fs/xfs/linux-2.6/xfs_buf.h) | 69 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 37 | ||||
-rw-r--r-- | fs/xfs/xfs_da_btree.c | 66 | ||||
-rw-r--r-- | fs/xfs/xfs_dfrag.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_dinode.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2_leaf.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.c (renamed from fs/xfs/linux-2.6/xfs_discard.c) | 20 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.h (renamed from fs/xfs/linux-2.6/xfs_discard.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c (renamed from fs/xfs/quota/xfs_dquot.c) | 46 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.h (renamed from fs/xfs/quota/xfs_dquot.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.c (renamed from fs/xfs/quota/xfs_dquot_item.c) | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.h (renamed from fs/xfs/quota/xfs_dquot_item.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_export.c (renamed from fs/xfs/linux-2.6/xfs_export.c) | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_export.h (renamed from fs/xfs/linux-2.6/xfs_export.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c (renamed from fs/xfs/linux-2.6/xfs_file.c) | 168 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_fs_subr.c (renamed from fs/xfs/linux-2.6/xfs_fs_subr.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 60 | ||||
-rw-r--r-- | fs/xfs/xfs_globals.c (renamed from fs/xfs/linux-2.6/xfs_globals.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 47 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c (renamed from fs/xfs/linux-2.6/xfs_ioctl.c) | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.h (renamed from fs/xfs/linux-2.6/xfs_ioctl.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl32.c (renamed from fs/xfs/linux-2.6/xfs_ioctl32.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl32.h (renamed from fs/xfs/linux-2.6/xfs_ioctl32.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 39 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c (renamed from fs/xfs/linux-2.6/xfs_iops.c) | 67 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.h (renamed from fs/xfs/linux-2.6/xfs_iops.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_linux.h (renamed from fs/xfs/linux-2.6/xfs_linux.h) | 29 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 34 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 81 | ||||
-rw-r--r-- | fs/xfs/xfs_message.c (renamed from fs/xfs/linux-2.6/xfs_message.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_message.h (renamed from fs/xfs/linux-2.6/xfs_message.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 40 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c (renamed from fs/xfs/quota/xfs_qm.c) | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.h (renamed from fs/xfs/quota/xfs_qm.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_bhv.c (renamed from fs/xfs/quota/xfs_qm_bhv.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_stats.c (renamed from fs/xfs/quota/xfs_qm_stats.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_stats.h (renamed from fs/xfs/quota/xfs_qm_stats.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_syscalls.c (renamed from fs/xfs/quota/xfs_qm_syscalls.c) | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_quota_priv.h (renamed from fs/xfs/quota/xfs_quota_priv.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_quotaops.c (renamed from fs/xfs/linux-2.6/xfs_quotaops.c) | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_rename.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.c | 80 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_rw.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_rw.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_sb.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_stats.c (renamed from fs/xfs/linux-2.6/xfs_stats.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_stats.h (renamed from fs/xfs/linux-2.6/xfs_stats.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c (renamed from fs/xfs/linux-2.6/xfs_super.c) | 60 | ||||
-rw-r--r-- | fs/xfs/xfs_super.h (renamed from fs/xfs/linux-2.6/xfs_super.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_sync.c (renamed from fs/xfs/linux-2.6/xfs_sync.c) | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_sync.h (renamed from fs/xfs/linux-2.6/xfs_sync.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_sysctl.c (renamed from fs/xfs/linux-2.6/xfs_sysctl.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_sysctl.h (renamed from fs/xfs/linux-2.6/xfs_sysctl.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.c (renamed from fs/xfs/linux-2.6/xfs_trace.c) | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h (renamed from fs/xfs/linux-2.6/xfs_trace.h) | 39 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.h | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_ail.c | 191 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 50 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_dquot.c (renamed from fs/xfs/quota/xfs_trans_dquot.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_inode.c | 25 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_priv.h | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_vnode.h (renamed from fs/xfs/linux-2.6/xfs_vnode.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 119 | ||||
-rw-r--r-- | fs/xfs/xfs_xattr.c (renamed from fs/xfs/linux-2.6/xfs_xattr.c) | 0 |
274 files changed, 10699 insertions, 8128 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index ef9661886112..2b78014a124a 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -132,21 +132,19 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
132 | options = tmp_options; | 132 | options = tmp_options; |
133 | 133 | ||
134 | while ((p = strsep(&options, ",")) != NULL) { | 134 | while ((p = strsep(&options, ",")) != NULL) { |
135 | int token; | 135 | int token, r; |
136 | if (!*p) | 136 | if (!*p) |
137 | continue; | 137 | continue; |
138 | token = match_token(p, tokens, args); | 138 | token = match_token(p, tokens, args); |
139 | if (token < Opt_uname) { | 139 | switch (token) { |
140 | int r = match_int(&args[0], &option); | 140 | case Opt_debug: |
141 | r = match_int(&args[0], &option); | ||
141 | if (r < 0) { | 142 | if (r < 0) { |
142 | P9_DPRINTK(P9_DEBUG_ERROR, | 143 | P9_DPRINTK(P9_DEBUG_ERROR, |
143 | "integer field, but no integer?\n"); | 144 | "integer field, but no integer?\n"); |
144 | ret = r; | 145 | ret = r; |
145 | continue; | 146 | continue; |
146 | } | 147 | } |
147 | } | ||
148 | switch (token) { | ||
149 | case Opt_debug: | ||
150 | v9ses->debug = option; | 148 | v9ses->debug = option; |
151 | #ifdef CONFIG_NET_9P_DEBUG | 149 | #ifdef CONFIG_NET_9P_DEBUG |
152 | p9_debug_level = option; | 150 | p9_debug_level = option; |
@@ -154,12 +152,33 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
154 | break; | 152 | break; |
155 | 153 | ||
156 | case Opt_dfltuid: | 154 | case Opt_dfltuid: |
155 | r = match_int(&args[0], &option); | ||
156 | if (r < 0) { | ||
157 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
158 | "integer field, but no integer?\n"); | ||
159 | ret = r; | ||
160 | continue; | ||
161 | } | ||
157 | v9ses->dfltuid = option; | 162 | v9ses->dfltuid = option; |
158 | break; | 163 | break; |
159 | case Opt_dfltgid: | 164 | case Opt_dfltgid: |
165 | r = match_int(&args[0], &option); | ||
166 | if (r < 0) { | ||
167 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
168 | "integer field, but no integer?\n"); | ||
169 | ret = r; | ||
170 | continue; | ||
171 | } | ||
160 | v9ses->dfltgid = option; | 172 | v9ses->dfltgid = option; |
161 | break; | 173 | break; |
162 | case Opt_afid: | 174 | case Opt_afid: |
175 | r = match_int(&args[0], &option); | ||
176 | if (r < 0) { | ||
177 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
178 | "integer field, but no integer?\n"); | ||
179 | ret = r; | ||
180 | continue; | ||
181 | } | ||
163 | v9ses->afid = option; | 182 | v9ses->afid = option; |
164 | break; | 183 | break; |
165 | case Opt_uname: | 184 | case Opt_uname: |
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 46ce357ca1ab..410ffd6ceb5f 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache; | |||
54 | 54 | ||
55 | struct inode *v9fs_alloc_inode(struct super_block *sb); | 55 | struct inode *v9fs_alloc_inode(struct super_block *sb); |
56 | void v9fs_destroy_inode(struct inode *inode); | 56 | void v9fs_destroy_inode(struct inode *inode); |
57 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); | 57 | struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t); |
58 | int v9fs_init_inode(struct v9fs_session_info *v9ses, | 58 | int v9fs_init_inode(struct v9fs_session_info *v9ses, |
59 | struct inode *inode, int mode); | 59 | struct inode *inode, int mode, dev_t); |
60 | void v9fs_evict_inode(struct inode *inode); | 60 | void v9fs_evict_inode(struct inode *inode); |
61 | ino_t v9fs_qid2ino(struct p9_qid *qid); | 61 | ino_t v9fs_qid2ino(struct p9_qid *qid); |
62 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); | 62 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); |
@@ -83,4 +83,6 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode) | |||
83 | v9inode->cache_validity |= V9FS_INO_INVALID_ATTR; | 83 | v9inode->cache_validity |= V9FS_INO_INVALID_ATTR; |
84 | return; | 84 | return; |
85 | } | 85 | } |
86 | |||
87 | int v9fs_open_to_dotl_flags(int flags); | ||
86 | #endif | 88 | #endif |
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 9c2bdda5cd9d..598fff1a54e5 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -165,9 +165,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
165 | } | 165 | } |
166 | while (rdir->head < rdir->tail) { | 166 | while (rdir->head < rdir->tail) { |
167 | p9stat_init(&st); | 167 | p9stat_init(&st); |
168 | err = p9stat_read(rdir->buf + rdir->head, | 168 | err = p9stat_read(fid->clnt, rdir->buf + rdir->head, |
169 | rdir->tail - rdir->head, &st, | 169 | rdir->tail - rdir->head, &st); |
170 | fid->clnt->proto_version); | ||
171 | if (err) { | 170 | if (err) { |
172 | P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); | 171 | P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); |
173 | err = -EIO; | 172 | err = -EIO; |
@@ -231,7 +230,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, | |||
231 | while (err == 0) { | 230 | while (err == 0) { |
232 | if (rdir->tail == rdir->head) { | 231 | if (rdir->tail == rdir->head) { |
233 | err = p9_client_readdir(fid, rdir->buf, buflen, | 232 | err = p9_client_readdir(fid, rdir->buf, buflen, |
234 | filp->f_pos); | 233 | filp->f_pos); |
235 | if (err <= 0) | 234 | if (err <= 0) |
236 | goto unlock_and_exit; | 235 | goto unlock_and_exit; |
237 | 236 | ||
@@ -241,10 +240,9 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, | |||
241 | 240 | ||
242 | while (rdir->head < rdir->tail) { | 241 | while (rdir->head < rdir->tail) { |
243 | 242 | ||
244 | err = p9dirent_read(rdir->buf + rdir->head, | 243 | err = p9dirent_read(fid->clnt, rdir->buf + rdir->head, |
245 | rdir->tail - rdir->head, | 244 | rdir->tail - rdir->head, |
246 | &curdirent, | 245 | &curdirent); |
247 | fid->clnt->proto_version); | ||
248 | if (err < 0) { | 246 | if (err < 0) { |
249 | P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); | 247 | P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); |
250 | err = -EIO; | 248 | err = -EIO; |
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 3c173fcc2c5a..62857a810a79 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -65,7 +65,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) | |||
65 | v9inode = V9FS_I(inode); | 65 | v9inode = V9FS_I(inode); |
66 | v9ses = v9fs_inode2v9ses(inode); | 66 | v9ses = v9fs_inode2v9ses(inode); |
67 | if (v9fs_proto_dotl(v9ses)) | 67 | if (v9fs_proto_dotl(v9ses)) |
68 | omode = file->f_flags; | 68 | omode = v9fs_open_to_dotl_flags(file->f_flags); |
69 | else | 69 | else |
70 | omode = v9fs_uflags2omode(file->f_flags, | 70 | omode = v9fs_uflags2omode(file->f_flags, |
71 | v9fs_proto_dotu(v9ses)); | 71 | v9fs_proto_dotu(v9ses)); |
@@ -169,7 +169,18 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
169 | 169 | ||
170 | /* convert posix lock to p9 tlock args */ | 170 | /* convert posix lock to p9 tlock args */ |
171 | memset(&flock, 0, sizeof(flock)); | 171 | memset(&flock, 0, sizeof(flock)); |
172 | flock.type = fl->fl_type; | 172 | /* map the lock type */ |
173 | switch (fl->fl_type) { | ||
174 | case F_RDLCK: | ||
175 | flock.type = P9_LOCK_TYPE_RDLCK; | ||
176 | break; | ||
177 | case F_WRLCK: | ||
178 | flock.type = P9_LOCK_TYPE_WRLCK; | ||
179 | break; | ||
180 | case F_UNLCK: | ||
181 | flock.type = P9_LOCK_TYPE_UNLCK; | ||
182 | break; | ||
183 | } | ||
173 | flock.start = fl->fl_start; | 184 | flock.start = fl->fl_start; |
174 | if (fl->fl_end == OFFSET_MAX) | 185 | if (fl->fl_end == OFFSET_MAX) |
175 | flock.length = 0; | 186 | flock.length = 0; |
@@ -245,7 +256,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) | |||
245 | 256 | ||
246 | /* convert posix lock to p9 tgetlock args */ | 257 | /* convert posix lock to p9 tgetlock args */ |
247 | memset(&glock, 0, sizeof(glock)); | 258 | memset(&glock, 0, sizeof(glock)); |
248 | glock.type = fl->fl_type; | 259 | glock.type = P9_LOCK_TYPE_UNLCK; |
249 | glock.start = fl->fl_start; | 260 | glock.start = fl->fl_start; |
250 | if (fl->fl_end == OFFSET_MAX) | 261 | if (fl->fl_end == OFFSET_MAX) |
251 | glock.length = 0; | 262 | glock.length = 0; |
@@ -257,17 +268,26 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) | |||
257 | res = p9_client_getlock_dotl(fid, &glock); | 268 | res = p9_client_getlock_dotl(fid, &glock); |
258 | if (res < 0) | 269 | if (res < 0) |
259 | return res; | 270 | return res; |
260 | if (glock.type != F_UNLCK) { | 271 | /* map 9p lock type to os lock type */ |
261 | fl->fl_type = glock.type; | 272 | switch (glock.type) { |
273 | case P9_LOCK_TYPE_RDLCK: | ||
274 | fl->fl_type = F_RDLCK; | ||
275 | break; | ||
276 | case P9_LOCK_TYPE_WRLCK: | ||
277 | fl->fl_type = F_WRLCK; | ||
278 | break; | ||
279 | case P9_LOCK_TYPE_UNLCK: | ||
280 | fl->fl_type = F_UNLCK; | ||
281 | break; | ||
282 | } | ||
283 | if (glock.type != P9_LOCK_TYPE_UNLCK) { | ||
262 | fl->fl_start = glock.start; | 284 | fl->fl_start = glock.start; |
263 | if (glock.length == 0) | 285 | if (glock.length == 0) |
264 | fl->fl_end = OFFSET_MAX; | 286 | fl->fl_end = OFFSET_MAX; |
265 | else | 287 | else |
266 | fl->fl_end = glock.start + glock.length - 1; | 288 | fl->fl_end = glock.start + glock.length - 1; |
267 | fl->fl_pid = glock.proc_id; | 289 | fl->fl_pid = glock.proc_id; |
268 | } else | 290 | } |
269 | fl->fl_type = F_UNLCK; | ||
270 | |||
271 | return res; | 291 | return res; |
272 | } | 292 | } |
273 | 293 | ||
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8bb5507e822f..b5a1076aaa6c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -95,15 +95,18 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) | |||
95 | /** | 95 | /** |
96 | * p9mode2unixmode- convert plan9 mode bits to unix mode bits | 96 | * p9mode2unixmode- convert plan9 mode bits to unix mode bits |
97 | * @v9ses: v9fs session information | 97 | * @v9ses: v9fs session information |
98 | * @mode: mode to convert | 98 | * @stat: p9_wstat from which mode need to be derived |
99 | * @rdev: major number, minor number in case of device files. | ||
99 | * | 100 | * |
100 | */ | 101 | */ |
101 | 102 | static int p9mode2unixmode(struct v9fs_session_info *v9ses, | |
102 | static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) | 103 | struct p9_wstat *stat, dev_t *rdev) |
103 | { | 104 | { |
104 | int res; | 105 | int res; |
106 | int mode = stat->mode; | ||
105 | 107 | ||
106 | res = mode & 0777; | 108 | res = mode & S_IALLUGO; |
109 | *rdev = 0; | ||
107 | 110 | ||
108 | if ((mode & P9_DMDIR) == P9_DMDIR) | 111 | if ((mode & P9_DMDIR) == P9_DMDIR) |
109 | res |= S_IFDIR; | 112 | res |= S_IFDIR; |
@@ -116,9 +119,26 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) | |||
116 | && (v9ses->nodev == 0)) | 119 | && (v9ses->nodev == 0)) |
117 | res |= S_IFIFO; | 120 | res |= S_IFIFO; |
118 | else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) | 121 | else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) |
119 | && (v9ses->nodev == 0)) | 122 | && (v9ses->nodev == 0)) { |
120 | res |= S_IFBLK; | 123 | char type = 0, ext[32]; |
121 | else | 124 | int major = -1, minor = -1; |
125 | |||
126 | strncpy(ext, stat->extension, sizeof(ext)); | ||
127 | sscanf(ext, "%c %u %u", &type, &major, &minor); | ||
128 | switch (type) { | ||
129 | case 'c': | ||
130 | res |= S_IFCHR; | ||
131 | break; | ||
132 | case 'b': | ||
133 | res |= S_IFBLK; | ||
134 | break; | ||
135 | default: | ||
136 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
137 | "Unknown special type %c %s\n", type, | ||
138 | stat->extension); | ||
139 | }; | ||
140 | *rdev = MKDEV(major, minor); | ||
141 | } else | ||
122 | res |= S_IFREG; | 142 | res |= S_IFREG; |
123 | 143 | ||
124 | if (v9fs_proto_dotu(v9ses)) { | 144 | if (v9fs_proto_dotu(v9ses)) { |
@@ -131,7 +151,6 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) | |||
131 | if ((mode & P9_DMSETVTX) == P9_DMSETVTX) | 151 | if ((mode & P9_DMSETVTX) == P9_DMSETVTX) |
132 | res |= S_ISVTX; | 152 | res |= S_ISVTX; |
133 | } | 153 | } |
134 | |||
135 | return res; | 154 | return res; |
136 | } | 155 | } |
137 | 156 | ||
@@ -242,13 +261,13 @@ void v9fs_destroy_inode(struct inode *inode) | |||
242 | } | 261 | } |
243 | 262 | ||
244 | int v9fs_init_inode(struct v9fs_session_info *v9ses, | 263 | int v9fs_init_inode(struct v9fs_session_info *v9ses, |
245 | struct inode *inode, int mode) | 264 | struct inode *inode, int mode, dev_t rdev) |
246 | { | 265 | { |
247 | int err = 0; | 266 | int err = 0; |
248 | 267 | ||
249 | inode_init_owner(inode, NULL, mode); | 268 | inode_init_owner(inode, NULL, mode); |
250 | inode->i_blocks = 0; | 269 | inode->i_blocks = 0; |
251 | inode->i_rdev = 0; | 270 | inode->i_rdev = rdev; |
252 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 271 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
253 | inode->i_mapping->a_ops = &v9fs_addr_operations; | 272 | inode->i_mapping->a_ops = &v9fs_addr_operations; |
254 | 273 | ||
@@ -259,10 +278,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, | |||
259 | case S_IFSOCK: | 278 | case S_IFSOCK: |
260 | if (v9fs_proto_dotl(v9ses)) { | 279 | if (v9fs_proto_dotl(v9ses)) { |
261 | inode->i_op = &v9fs_file_inode_operations_dotl; | 280 | inode->i_op = &v9fs_file_inode_operations_dotl; |
262 | inode->i_fop = &v9fs_file_operations_dotl; | ||
263 | } else if (v9fs_proto_dotu(v9ses)) { | 281 | } else if (v9fs_proto_dotu(v9ses)) { |
264 | inode->i_op = &v9fs_file_inode_operations; | 282 | inode->i_op = &v9fs_file_inode_operations; |
265 | inode->i_fop = &v9fs_file_operations; | ||
266 | } else { | 283 | } else { |
267 | P9_DPRINTK(P9_DEBUG_ERROR, | 284 | P9_DPRINTK(P9_DEBUG_ERROR, |
268 | "special files without extended mode\n"); | 285 | "special files without extended mode\n"); |
@@ -335,7 +352,7 @@ error: | |||
335 | * | 352 | * |
336 | */ | 353 | */ |
337 | 354 | ||
338 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) | 355 | struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev) |
339 | { | 356 | { |
340 | int err; | 357 | int err; |
341 | struct inode *inode; | 358 | struct inode *inode; |
@@ -348,7 +365,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) | |||
348 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); | 365 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); |
349 | return ERR_PTR(-ENOMEM); | 366 | return ERR_PTR(-ENOMEM); |
350 | } | 367 | } |
351 | err = v9fs_init_inode(v9ses, inode, mode); | 368 | err = v9fs_init_inode(v9ses, inode, mode, rdev); |
352 | if (err) { | 369 | if (err) { |
353 | iput(inode); | 370 | iput(inode); |
354 | return ERR_PTR(err); | 371 | return ERR_PTR(err); |
@@ -435,11 +452,12 @@ void v9fs_evict_inode(struct inode *inode) | |||
435 | static int v9fs_test_inode(struct inode *inode, void *data) | 452 | static int v9fs_test_inode(struct inode *inode, void *data) |
436 | { | 453 | { |
437 | int umode; | 454 | int umode; |
455 | dev_t rdev; | ||
438 | struct v9fs_inode *v9inode = V9FS_I(inode); | 456 | struct v9fs_inode *v9inode = V9FS_I(inode); |
439 | struct p9_wstat *st = (struct p9_wstat *)data; | 457 | struct p9_wstat *st = (struct p9_wstat *)data; |
440 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); | 458 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); |
441 | 459 | ||
442 | umode = p9mode2unixmode(v9ses, st->mode); | 460 | umode = p9mode2unixmode(v9ses, st, &rdev); |
443 | /* don't match inode of different type */ | 461 | /* don't match inode of different type */ |
444 | if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) | 462 | if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) |
445 | return 0; | 463 | return 0; |
@@ -473,6 +491,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, | |||
473 | struct p9_wstat *st, | 491 | struct p9_wstat *st, |
474 | int new) | 492 | int new) |
475 | { | 493 | { |
494 | dev_t rdev; | ||
476 | int retval, umode; | 495 | int retval, umode; |
477 | unsigned long i_ino; | 496 | unsigned long i_ino; |
478 | struct inode *inode; | 497 | struct inode *inode; |
@@ -496,8 +515,8 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, | |||
496 | * later. | 515 | * later. |
497 | */ | 516 | */ |
498 | inode->i_ino = i_ino; | 517 | inode->i_ino = i_ino; |
499 | umode = p9mode2unixmode(v9ses, st->mode); | 518 | umode = p9mode2unixmode(v9ses, st, &rdev); |
500 | retval = v9fs_init_inode(v9ses, inode, umode); | 519 | retval = v9fs_init_inode(v9ses, inode, umode, rdev); |
501 | if (retval) | 520 | if (retval) |
502 | goto error; | 521 | goto error; |
503 | 522 | ||
@@ -532,6 +551,19 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
532 | } | 551 | } |
533 | 552 | ||
534 | /** | 553 | /** |
554 | * v9fs_at_to_dotl_flags- convert Linux specific AT flags to | ||
555 | * plan 9 AT flag. | ||
556 | * @flags: flags to convert | ||
557 | */ | ||
558 | static int v9fs_at_to_dotl_flags(int flags) | ||
559 | { | ||
560 | int rflags = 0; | ||
561 | if (flags & AT_REMOVEDIR) | ||
562 | rflags |= P9_DOTL_AT_REMOVEDIR; | ||
563 | return rflags; | ||
564 | } | ||
565 | |||
566 | /** | ||
535 | * v9fs_remove - helper function to remove files and directories | 567 | * v9fs_remove - helper function to remove files and directories |
536 | * @dir: directory inode that is being deleted | 568 | * @dir: directory inode that is being deleted |
537 | * @dentry: dentry that is being deleted | 569 | * @dentry: dentry that is being deleted |
@@ -558,7 +590,8 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) | |||
558 | return retval; | 590 | return retval; |
559 | } | 591 | } |
560 | if (v9fs_proto_dotl(v9ses)) | 592 | if (v9fs_proto_dotl(v9ses)) |
561 | retval = p9_client_unlinkat(dfid, dentry->d_name.name, flags); | 593 | retval = p9_client_unlinkat(dfid, dentry->d_name.name, |
594 | v9fs_at_to_dotl_flags(flags)); | ||
562 | if (retval == -EOPNOTSUPP) { | 595 | if (retval == -EOPNOTSUPP) { |
563 | /* Try the one based on path */ | 596 | /* Try the one based on path */ |
564 | v9fid = v9fs_fid_clone(dentry); | 597 | v9fid = v9fs_fid_clone(dentry); |
@@ -645,13 +678,11 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
645 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | 678 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); |
646 | goto error; | 679 | goto error; |
647 | } | 680 | } |
648 | d_instantiate(dentry, inode); | ||
649 | err = v9fs_fid_add(dentry, fid); | 681 | err = v9fs_fid_add(dentry, fid); |
650 | if (err < 0) | 682 | if (err < 0) |
651 | goto error; | 683 | goto error; |
652 | 684 | d_instantiate(dentry, inode); | |
653 | return ofid; | 685 | return ofid; |
654 | |||
655 | error: | 686 | error: |
656 | if (ofid) | 687 | if (ofid) |
657 | p9_client_clunk(ofid); | 688 | p9_client_clunk(ofid); |
@@ -792,6 +823,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
792 | struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | 823 | struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, |
793 | struct nameidata *nameidata) | 824 | struct nameidata *nameidata) |
794 | { | 825 | { |
826 | struct dentry *res; | ||
795 | struct super_block *sb; | 827 | struct super_block *sb; |
796 | struct v9fs_session_info *v9ses; | 828 | struct v9fs_session_info *v9ses; |
797 | struct p9_fid *dfid, *fid; | 829 | struct p9_fid *dfid, *fid; |
@@ -823,22 +855,35 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
823 | 855 | ||
824 | return ERR_PTR(result); | 856 | return ERR_PTR(result); |
825 | } | 857 | } |
826 | 858 | /* | |
827 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); | 859 | * Make sure we don't use a wrong inode due to parallel |
860 | * unlink. For cached mode create calls request for new | ||
861 | * inode. But with cache disabled, lookup should do this. | ||
862 | */ | ||
863 | if (v9ses->cache) | ||
864 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); | ||
865 | else | ||
866 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); | ||
828 | if (IS_ERR(inode)) { | 867 | if (IS_ERR(inode)) { |
829 | result = PTR_ERR(inode); | 868 | result = PTR_ERR(inode); |
830 | inode = NULL; | 869 | inode = NULL; |
831 | goto error; | 870 | goto error; |
832 | } | 871 | } |
833 | |||
834 | result = v9fs_fid_add(dentry, fid); | 872 | result = v9fs_fid_add(dentry, fid); |
835 | if (result < 0) | 873 | if (result < 0) |
836 | goto error_iput; | 874 | goto error_iput; |
837 | |||
838 | inst_out: | 875 | inst_out: |
839 | d_add(dentry, inode); | 876 | /* |
840 | return NULL; | 877 | * If we had a rename on the server and a parallel lookup |
841 | 878 | * for the new name, then make sure we instantiate with | |
879 | * the new name. ie look up for a/b, while on server somebody | ||
880 | * moved b under k and client parallely did a lookup for | ||
881 | * k/b. | ||
882 | */ | ||
883 | res = d_materialise_unique(dentry, inode); | ||
884 | if (!IS_ERR(res)) | ||
885 | return res; | ||
886 | result = PTR_ERR(res); | ||
842 | error_iput: | 887 | error_iput: |
843 | iput(inode); | 888 | iput(inode); |
844 | error: | 889 | error: |
@@ -1002,7 +1047,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1002 | return PTR_ERR(st); | 1047 | return PTR_ERR(st); |
1003 | 1048 | ||
1004 | v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); | 1049 | v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); |
1005 | generic_fillattr(dentry->d_inode, stat); | 1050 | generic_fillattr(dentry->d_inode, stat); |
1006 | 1051 | ||
1007 | p9stat_free(st); | 1052 | p9stat_free(st); |
1008 | kfree(st); | 1053 | kfree(st); |
@@ -1086,6 +1131,7 @@ void | |||
1086 | v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | 1131 | v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, |
1087 | struct super_block *sb) | 1132 | struct super_block *sb) |
1088 | { | 1133 | { |
1134 | mode_t mode; | ||
1089 | char ext[32]; | 1135 | char ext[32]; |
1090 | char tag_name[14]; | 1136 | char tag_name[14]; |
1091 | unsigned int i_nlink; | 1137 | unsigned int i_nlink; |
@@ -1121,31 +1167,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | |||
1121 | inode->i_nlink = i_nlink; | 1167 | inode->i_nlink = i_nlink; |
1122 | } | 1168 | } |
1123 | } | 1169 | } |
1124 | inode->i_mode = p9mode2unixmode(v9ses, stat->mode); | 1170 | mode = stat->mode & S_IALLUGO; |
1125 | if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { | 1171 | mode |= inode->i_mode & ~S_IALLUGO; |
1126 | char type = 0; | 1172 | inode->i_mode = mode; |
1127 | int major = -1; | ||
1128 | int minor = -1; | ||
1129 | |||
1130 | strncpy(ext, stat->extension, sizeof(ext)); | ||
1131 | sscanf(ext, "%c %u %u", &type, &major, &minor); | ||
1132 | switch (type) { | ||
1133 | case 'c': | ||
1134 | inode->i_mode &= ~S_IFBLK; | ||
1135 | inode->i_mode |= S_IFCHR; | ||
1136 | break; | ||
1137 | case 'b': | ||
1138 | break; | ||
1139 | default: | ||
1140 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
1141 | "Unknown special type %c %s\n", type, | ||
1142 | stat->extension); | ||
1143 | }; | ||
1144 | inode->i_rdev = MKDEV(major, minor); | ||
1145 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
1146 | } else | ||
1147 | inode->i_rdev = 0; | ||
1148 | |||
1149 | i_size_write(inode, stat->length); | 1173 | i_size_write(inode, stat->length); |
1150 | 1174 | ||
1151 | /* not real number of blocks, but 512 byte ones ... */ | 1175 | /* not real number of blocks, but 512 byte ones ... */ |
@@ -1411,6 +1435,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | |||
1411 | 1435 | ||
1412 | int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) | 1436 | int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) |
1413 | { | 1437 | { |
1438 | int umode; | ||
1439 | dev_t rdev; | ||
1414 | loff_t i_size; | 1440 | loff_t i_size; |
1415 | struct p9_wstat *st; | 1441 | struct p9_wstat *st; |
1416 | struct v9fs_session_info *v9ses; | 1442 | struct v9fs_session_info *v9ses; |
@@ -1419,6 +1445,12 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) | |||
1419 | st = p9_client_stat(fid); | 1445 | st = p9_client_stat(fid); |
1420 | if (IS_ERR(st)) | 1446 | if (IS_ERR(st)) |
1421 | return PTR_ERR(st); | 1447 | return PTR_ERR(st); |
1448 | /* | ||
1449 | * Don't update inode if the file type is different | ||
1450 | */ | ||
1451 | umode = p9mode2unixmode(v9ses, st, &rdev); | ||
1452 | if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) | ||
1453 | goto out; | ||
1422 | 1454 | ||
1423 | spin_lock(&inode->i_lock); | 1455 | spin_lock(&inode->i_lock); |
1424 | /* | 1456 | /* |
@@ -1430,6 +1462,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) | |||
1430 | if (v9ses->cache) | 1462 | if (v9ses->cache) |
1431 | inode->i_size = i_size; | 1463 | inode->i_size = i_size; |
1432 | spin_unlock(&inode->i_lock); | 1464 | spin_unlock(&inode->i_lock); |
1465 | out: | ||
1433 | p9stat_free(st); | 1466 | p9stat_free(st); |
1434 | kfree(st); | 1467 | kfree(st); |
1435 | return 0; | 1468 | return 0; |
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index b6c8ed205192..aded79fcd5cf 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c | |||
@@ -153,7 +153,8 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, | |||
153 | * later. | 153 | * later. |
154 | */ | 154 | */ |
155 | inode->i_ino = i_ino; | 155 | inode->i_ino = i_ino; |
156 | retval = v9fs_init_inode(v9ses, inode, st->st_mode); | 156 | retval = v9fs_init_inode(v9ses, inode, |
157 | st->st_mode, new_decode_dev(st->st_rdev)); | ||
157 | if (retval) | 158 | if (retval) |
158 | goto error; | 159 | goto error; |
159 | 160 | ||
@@ -190,6 +191,58 @@ v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
190 | return inode; | 191 | return inode; |
191 | } | 192 | } |
192 | 193 | ||
194 | struct dotl_openflag_map { | ||
195 | int open_flag; | ||
196 | int dotl_flag; | ||
197 | }; | ||
198 | |||
199 | static int v9fs_mapped_dotl_flags(int flags) | ||
200 | { | ||
201 | int i; | ||
202 | int rflags = 0; | ||
203 | struct dotl_openflag_map dotl_oflag_map[] = { | ||
204 | { O_CREAT, P9_DOTL_CREATE }, | ||
205 | { O_EXCL, P9_DOTL_EXCL }, | ||
206 | { O_NOCTTY, P9_DOTL_NOCTTY }, | ||
207 | { O_TRUNC, P9_DOTL_TRUNC }, | ||
208 | { O_APPEND, P9_DOTL_APPEND }, | ||
209 | { O_NONBLOCK, P9_DOTL_NONBLOCK }, | ||
210 | { O_DSYNC, P9_DOTL_DSYNC }, | ||
211 | { FASYNC, P9_DOTL_FASYNC }, | ||
212 | { O_DIRECT, P9_DOTL_DIRECT }, | ||
213 | { O_LARGEFILE, P9_DOTL_LARGEFILE }, | ||
214 | { O_DIRECTORY, P9_DOTL_DIRECTORY }, | ||
215 | { O_NOFOLLOW, P9_DOTL_NOFOLLOW }, | ||
216 | { O_NOATIME, P9_DOTL_NOATIME }, | ||
217 | { O_CLOEXEC, P9_DOTL_CLOEXEC }, | ||
218 | { O_SYNC, P9_DOTL_SYNC}, | ||
219 | }; | ||
220 | for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { | ||
221 | if (flags & dotl_oflag_map[i].open_flag) | ||
222 | rflags |= dotl_oflag_map[i].dotl_flag; | ||
223 | } | ||
224 | return rflags; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * v9fs_open_to_dotl_flags- convert Linux specific open flags to | ||
229 | * plan 9 open flag. | ||
230 | * @flags: flags to convert | ||
231 | */ | ||
232 | int v9fs_open_to_dotl_flags(int flags) | ||
233 | { | ||
234 | int rflags = 0; | ||
235 | |||
236 | /* | ||
237 | * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY | ||
238 | * and P9_DOTL_NOACCESS | ||
239 | */ | ||
240 | rflags |= flags & O_ACCMODE; | ||
241 | rflags |= v9fs_mapped_dotl_flags(flags); | ||
242 | |||
243 | return rflags; | ||
244 | } | ||
245 | |||
193 | /** | 246 | /** |
194 | * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. | 247 | * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. |
195 | * @dir: directory inode that is being created | 248 | * @dir: directory inode that is being created |
@@ -258,7 +311,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
258 | "Failed to get acl values in creat %d\n", err); | 311 | "Failed to get acl values in creat %d\n", err); |
259 | goto error; | 312 | goto error; |
260 | } | 313 | } |
261 | err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid); | 314 | err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), |
315 | mode, gid, &qid); | ||
262 | if (err < 0) { | 316 | if (err < 0) { |
263 | P9_DPRINTK(P9_DEBUG_VFS, | 317 | P9_DPRINTK(P9_DEBUG_VFS, |
264 | "p9_client_open_dotl failed in creat %d\n", | 318 | "p9_client_open_dotl failed in creat %d\n", |
@@ -281,10 +335,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
281 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | 335 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); |
282 | goto error; | 336 | goto error; |
283 | } | 337 | } |
284 | d_instantiate(dentry, inode); | ||
285 | err = v9fs_fid_add(dentry, fid); | 338 | err = v9fs_fid_add(dentry, fid); |
286 | if (err < 0) | 339 | if (err < 0) |
287 | goto error; | 340 | goto error; |
341 | d_instantiate(dentry, inode); | ||
288 | 342 | ||
289 | /* Now set the ACL based on the default value */ | 343 | /* Now set the ACL based on the default value */ |
290 | v9fs_set_create_acl(dentry, &dacl, &pacl); | 344 | v9fs_set_create_acl(dentry, &dacl, &pacl); |
@@ -403,10 +457,10 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
403 | err); | 457 | err); |
404 | goto error; | 458 | goto error; |
405 | } | 459 | } |
406 | d_instantiate(dentry, inode); | ||
407 | err = v9fs_fid_add(dentry, fid); | 460 | err = v9fs_fid_add(dentry, fid); |
408 | if (err < 0) | 461 | if (err < 0) |
409 | goto error; | 462 | goto error; |
463 | d_instantiate(dentry, inode); | ||
410 | fid = NULL; | 464 | fid = NULL; |
411 | } else { | 465 | } else { |
412 | /* | 466 | /* |
@@ -414,7 +468,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
414 | * inode with stat. We need to get an inode | 468 | * inode with stat. We need to get an inode |
415 | * so that we can set the acl with dentry | 469 | * so that we can set the acl with dentry |
416 | */ | 470 | */ |
417 | inode = v9fs_get_inode(dir->i_sb, mode); | 471 | inode = v9fs_get_inode(dir->i_sb, mode, 0); |
418 | if (IS_ERR(inode)) { | 472 | if (IS_ERR(inode)) { |
419 | err = PTR_ERR(inode); | 473 | err = PTR_ERR(inode); |
420 | goto error; | 474 | goto error; |
@@ -540,6 +594,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) | |||
540 | void | 594 | void |
541 | v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) | 595 | v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) |
542 | { | 596 | { |
597 | mode_t mode; | ||
543 | struct v9fs_inode *v9inode = V9FS_I(inode); | 598 | struct v9fs_inode *v9inode = V9FS_I(inode); |
544 | 599 | ||
545 | if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { | 600 | if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { |
@@ -552,11 +607,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) | |||
552 | inode->i_uid = stat->st_uid; | 607 | inode->i_uid = stat->st_uid; |
553 | inode->i_gid = stat->st_gid; | 608 | inode->i_gid = stat->st_gid; |
554 | inode->i_nlink = stat->st_nlink; | 609 | inode->i_nlink = stat->st_nlink; |
555 | inode->i_mode = stat->st_mode; | ||
556 | inode->i_rdev = new_decode_dev(stat->st_rdev); | ||
557 | 610 | ||
558 | if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) | 611 | mode = stat->st_mode & S_IALLUGO; |
559 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | 612 | mode |= inode->i_mode & ~S_IALLUGO; |
613 | inode->i_mode = mode; | ||
560 | 614 | ||
561 | i_size_write(inode, stat->st_size); | 615 | i_size_write(inode, stat->st_size); |
562 | inode->i_blocks = stat->st_blocks; | 616 | inode->i_blocks = stat->st_blocks; |
@@ -657,14 +711,14 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, | |||
657 | err); | 711 | err); |
658 | goto error; | 712 | goto error; |
659 | } | 713 | } |
660 | d_instantiate(dentry, inode); | ||
661 | err = v9fs_fid_add(dentry, fid); | 714 | err = v9fs_fid_add(dentry, fid); |
662 | if (err < 0) | 715 | if (err < 0) |
663 | goto error; | 716 | goto error; |
717 | d_instantiate(dentry, inode); | ||
664 | fid = NULL; | 718 | fid = NULL; |
665 | } else { | 719 | } else { |
666 | /* Not in cached mode. No need to populate inode with stat */ | 720 | /* Not in cached mode. No need to populate inode with stat */ |
667 | inode = v9fs_get_inode(dir->i_sb, S_IFLNK); | 721 | inode = v9fs_get_inode(dir->i_sb, S_IFLNK, 0); |
668 | if (IS_ERR(inode)) { | 722 | if (IS_ERR(inode)) { |
669 | err = PTR_ERR(inode); | 723 | err = PTR_ERR(inode); |
670 | goto error; | 724 | goto error; |
@@ -810,17 +864,17 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
810 | err); | 864 | err); |
811 | goto error; | 865 | goto error; |
812 | } | 866 | } |
813 | d_instantiate(dentry, inode); | ||
814 | err = v9fs_fid_add(dentry, fid); | 867 | err = v9fs_fid_add(dentry, fid); |
815 | if (err < 0) | 868 | if (err < 0) |
816 | goto error; | 869 | goto error; |
870 | d_instantiate(dentry, inode); | ||
817 | fid = NULL; | 871 | fid = NULL; |
818 | } else { | 872 | } else { |
819 | /* | 873 | /* |
820 | * Not in cached mode. No need to populate inode with stat. | 874 | * Not in cached mode. No need to populate inode with stat. |
821 | * socket syscall returns a fd, so we need instantiate | 875 | * socket syscall returns a fd, so we need instantiate |
822 | */ | 876 | */ |
823 | inode = v9fs_get_inode(dir->i_sb, mode); | 877 | inode = v9fs_get_inode(dir->i_sb, mode, rdev); |
824 | if (IS_ERR(inode)) { | 878 | if (IS_ERR(inode)) { |
825 | err = PTR_ERR(inode); | 879 | err = PTR_ERR(inode); |
826 | goto error; | 880 | goto error; |
@@ -886,6 +940,11 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) | |||
886 | st = p9_client_getattr_dotl(fid, P9_STATS_ALL); | 940 | st = p9_client_getattr_dotl(fid, P9_STATS_ALL); |
887 | if (IS_ERR(st)) | 941 | if (IS_ERR(st)) |
888 | return PTR_ERR(st); | 942 | return PTR_ERR(st); |
943 | /* | ||
944 | * Don't update inode if the file type is different | ||
945 | */ | ||
946 | if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) | ||
947 | goto out; | ||
889 | 948 | ||
890 | spin_lock(&inode->i_lock); | 949 | spin_lock(&inode->i_lock); |
891 | /* | 950 | /* |
@@ -897,6 +956,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) | |||
897 | if (v9ses->cache) | 956 | if (v9ses->cache) |
898 | inode->i_size = i_size; | 957 | inode->i_size = i_size; |
899 | spin_unlock(&inode->i_lock); | 958 | spin_unlock(&inode->i_lock); |
959 | out: | ||
900 | kfree(st); | 960 | kfree(st); |
901 | return 0; | 961 | return 0; |
902 | } | 962 | } |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index feef6cdc1fd2..c70251d47ed1 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -149,7 +149,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, | |||
149 | else | 149 | else |
150 | sb->s_d_op = &v9fs_dentry_operations; | 150 | sb->s_d_op = &v9fs_dentry_operations; |
151 | 151 | ||
152 | inode = v9fs_get_inode(sb, S_IFDIR | mode); | 152 | inode = v9fs_get_inode(sb, S_IFDIR | mode, 0); |
153 | if (IS_ERR(inode)) { | 153 | if (IS_ERR(inode)) { |
154 | retval = PTR_ERR(inode); | 154 | retval = PTR_ERR(inode); |
155 | goto release_sb; | 155 | goto release_sb; |
diff --git a/fs/Makefile b/fs/Makefile index afc109691a9b..d2c3353d5477 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -120,6 +120,6 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ | |||
120 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ | 120 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ |
121 | obj-$(CONFIG_BTRFS_FS) += btrfs/ | 121 | obj-$(CONFIG_BTRFS_FS) += btrfs/ |
122 | obj-$(CONFIG_GFS2_FS) += gfs2/ | 122 | obj-$(CONFIG_GFS2_FS) += gfs2/ |
123 | obj-$(CONFIG_EXOFS_FS) += exofs/ | 123 | obj-y += exofs/ # Multiple modules |
124 | obj-$(CONFIG_CEPH_FS) += ceph/ | 124 | obj-$(CONFIG_CEPH_FS) += ceph/ |
125 | obj-$(CONFIG_PSTORE) += pstore/ | 125 | obj-$(CONFIG_PSTORE) += pstore/ |
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/fsnotify.h> | 13 | #include <linux/fsnotify.h> |
14 | #include <linux/fcntl.h> | 14 | #include <linux/fcntl.h> |
15 | #include <linux/security.h> | 15 | #include <linux/security.h> |
16 | #include <linux/evm.h> | ||
16 | 17 | ||
17 | /** | 18 | /** |
18 | * inode_change_ok - check if attribute changes to an inode are allowed | 19 | * inode_change_ok - check if attribute changes to an inode are allowed |
@@ -237,8 +238,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr) | |||
237 | else | 238 | else |
238 | error = simple_setattr(dentry, attr); | 239 | error = simple_setattr(dentry, attr); |
239 | 240 | ||
240 | if (!error) | 241 | if (!error) { |
241 | fsnotify_change(dentry, ia_valid); | 242 | fsnotify_change(dentry, ia_valid); |
243 | evm_inode_post_setattr(dentry, ia_valid); | ||
244 | } | ||
242 | 245 | ||
243 | return error; | 246 | return error; |
244 | } | 247 | } |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 475f9c597cb7..326dc08d3e3f 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -39,27 +39,17 @@ | |||
39 | 39 | ||
40 | /* #define DEBUG */ | 40 | /* #define DEBUG */ |
41 | 41 | ||
42 | #ifdef DEBUG | 42 | #define DPRINTK(fmt, ...) \ |
43 | #define DPRINTK(fmt, args...) \ | 43 | pr_debug("pid %d: %s: " fmt "\n", \ |
44 | do { \ | 44 | current->pid, __func__, ##__VA_ARGS__) |
45 | printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \ | 45 | |
46 | current->pid, __func__, ##args); \ | 46 | #define AUTOFS_WARN(fmt, ...) \ |
47 | } while (0) | ||
48 | #else | ||
49 | #define DPRINTK(fmt, args...) do {} while (0) | ||
50 | #endif | ||
51 | |||
52 | #define AUTOFS_WARN(fmt, args...) \ | ||
53 | do { \ | ||
54 | printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ | 47 | printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ |
55 | current->pid, __func__, ##args); \ | 48 | current->pid, __func__, ##__VA_ARGS__) |
56 | } while (0) | ||
57 | 49 | ||
58 | #define AUTOFS_ERROR(fmt, args...) \ | 50 | #define AUTOFS_ERROR(fmt, ...) \ |
59 | do { \ | ||
60 | printk(KERN_ERR "pid %d: %s: " fmt "\n", \ | 51 | printk(KERN_ERR "pid %d: %s: " fmt "\n", \ |
61 | current->pid, __func__, ##args); \ | 52 | current->pid, __func__, ##__VA_ARGS__) |
62 | } while (0) | ||
63 | 53 | ||
64 | /* Unified info structure. This is pointed to by both the dentry and | 54 | /* Unified info structure. This is pointed to by both the dentry and |
65 | inode structures. Each file in the filesystem has an instance of this | 55 | inode structures. Each file in the filesystem has an instance of this |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 25435987d6ae..e1fbdeef85db 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
104 | size_t pktsz; | 104 | size_t pktsz; |
105 | 105 | ||
106 | DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", | 106 | DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", |
107 | wq->wait_queue_token, wq->name.len, wq->name.name, type); | 107 | (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type); |
108 | 108 | ||
109 | memset(&pkt,0,sizeof pkt); /* For security reasons */ | 109 | memset(&pkt,0,sizeof pkt); /* For security reasons */ |
110 | 110 | ||
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 54b8c28bebc8..720d885e8dca 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
474 | befs_data_stream *data = &befs_ino->i_data.ds; | 474 | befs_data_stream *data = &befs_ino->i_data.ds; |
475 | befs_off_t len = data->size; | 475 | befs_off_t len = data->size; |
476 | 476 | ||
477 | befs_debug(sb, "Follow long symlink"); | 477 | if (len == 0) { |
478 | 478 | befs_error(sb, "Long symlink with illegal length"); | |
479 | link = kmalloc(len, GFP_NOFS); | ||
480 | if (!link) { | ||
481 | link = ERR_PTR(-ENOMEM); | ||
482 | } else if (befs_read_lsymlink(sb, data, link, len) != len) { | ||
483 | kfree(link); | ||
484 | befs_error(sb, "Failed to read entire long symlink"); | ||
485 | link = ERR_PTR(-EIO); | 479 | link = ERR_PTR(-EIO); |
486 | } else { | 480 | } else { |
487 | link[len - 1] = '\0'; | 481 | befs_debug(sb, "Follow long symlink"); |
482 | |||
483 | link = kmalloc(len, GFP_NOFS); | ||
484 | if (!link) { | ||
485 | link = ERR_PTR(-ENOMEM); | ||
486 | } else if (befs_read_lsymlink(sb, data, link, len) != len) { | ||
487 | kfree(link); | ||
488 | befs_error(sb, "Failed to read entire long symlink"); | ||
489 | link = ERR_PTR(-EIO); | ||
490 | } else { | ||
491 | link[len - 1] = '\0'; | ||
492 | } | ||
488 | } | 493 | } |
489 | } else { | 494 | } else { |
490 | link = befs_ino->i_data.symlink; | 495 | link = befs_ino->i_data.symlink; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index ff77262e887c..95f786ec7f08 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1429,6 +1429,11 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1429 | WARN_ON_ONCE(bdev->bd_holders); | 1429 | WARN_ON_ONCE(bdev->bd_holders); |
1430 | sync_blockdev(bdev); | 1430 | sync_blockdev(bdev); |
1431 | kill_bdev(bdev); | 1431 | kill_bdev(bdev); |
1432 | /* ->release can cause the old bdi to disappear, | ||
1433 | * so must switch it out first | ||
1434 | */ | ||
1435 | bdev_inode_switch_bdi(bdev->bd_inode, | ||
1436 | &default_backing_dev_info); | ||
1432 | } | 1437 | } |
1433 | if (bdev->bd_contains == bdev) { | 1438 | if (bdev->bd_contains == bdev) { |
1434 | if (disk->fops->release) | 1439 | if (disk->fops->release) |
@@ -1442,8 +1447,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1442 | disk_put_part(bdev->bd_part); | 1447 | disk_put_part(bdev->bd_part); |
1443 | bdev->bd_part = NULL; | 1448 | bdev->bd_part = NULL; |
1444 | bdev->bd_disk = NULL; | 1449 | bdev->bd_disk = NULL; |
1445 | bdev_inode_switch_bdi(bdev->bd_inode, | ||
1446 | &default_backing_dev_info); | ||
1447 | if (bdev != bdev->bd_contains) | 1450 | if (bdev != bdev->bd_contains) |
1448 | victim = bdev->bd_contains; | 1451 | victim = bdev->bd_contains; |
1449 | bdev->bd_contains = NULL; | 1452 | bdev->bd_contains = NULL; |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 502b9e988679..d9f99a16edd6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -176,7 +176,11 @@ static inline u64 btrfs_ino(struct inode *inode) | |||
176 | { | 176 | { |
177 | u64 ino = BTRFS_I(inode)->location.objectid; | 177 | u64 ino = BTRFS_I(inode)->location.objectid; |
178 | 178 | ||
179 | if (ino <= BTRFS_FIRST_FREE_OBJECTID) | 179 | /* |
180 | * !ino: btree_inode | ||
181 | * type == BTRFS_ROOT_ITEM_KEY: subvol dir | ||
182 | */ | ||
183 | if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY) | ||
180 | ino = inode->i_ino; | 184 | ino = inode->i_ino; |
181 | return ino; | 185 | return ino; |
182 | } | 186 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0469263e327e..03912c5c6f49 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1415,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); | |||
1415 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ | 1415 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ |
1416 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ | 1416 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ |
1417 | { \ | 1417 | { \ |
1418 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | 1418 | type *p = page_address(eb->first_page); \ |
1419 | u##bits res = le##bits##_to_cpu(p->member); \ | 1419 | u##bits res = le##bits##_to_cpu(p->member); \ |
1420 | kunmap_atomic(p, KM_USER0); \ | ||
1421 | return res; \ | 1420 | return res; \ |
1422 | } \ | 1421 | } \ |
1423 | static inline void btrfs_set_##name(struct extent_buffer *eb, \ | 1422 | static inline void btrfs_set_##name(struct extent_buffer *eb, \ |
1424 | u##bits val) \ | 1423 | u##bits val) \ |
1425 | { \ | 1424 | { \ |
1426 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | 1425 | type *p = page_address(eb->first_page); \ |
1427 | p->member = cpu_to_le##bits(val); \ | 1426 | p->member = cpu_to_le##bits(val); \ |
1428 | kunmap_atomic(p, KM_USER0); \ | ||
1429 | } | 1427 | } |
1430 | 1428 | ||
1431 | #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ | 1429 | #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ |
@@ -2367,8 +2365,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
2367 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2365 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2368 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2366 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2369 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2367 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
2370 | int btrfs_drop_snapshot(struct btrfs_root *root, | 2368 | void btrfs_drop_snapshot(struct btrfs_root *root, |
2371 | struct btrfs_block_rsv *block_rsv, int update_ref); | 2369 | struct btrfs_block_rsv *block_rsv, int update_ref); |
2372 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2370 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
2373 | struct btrfs_root *root, | 2371 | struct btrfs_root *root, |
2374 | struct extent_buffer *node, | 2372 | struct extent_buffer *node, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 66bac226944e..f5be06a2462f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1782,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1782 | 1782 | ||
1783 | 1783 | ||
1784 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1784 | for (i = 0; i < multi->num_stripes; i++, stripe++) { |
1785 | if (!stripe->dev->can_discard) | ||
1786 | continue; | ||
1787 | |||
1785 | ret = btrfs_issue_discard(stripe->dev->bdev, | 1788 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1786 | stripe->physical, | 1789 | stripe->physical, |
1787 | stripe->length); | 1790 | stripe->length); |
@@ -1789,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1789 | discarded_bytes += stripe->length; | 1792 | discarded_bytes += stripe->length; |
1790 | else if (ret != -EOPNOTSUPP) | 1793 | else if (ret != -EOPNOTSUPP) |
1791 | break; | 1794 | break; |
1795 | |||
1796 | /* | ||
1797 | * Just in case we get back EOPNOTSUPP for some reason, | ||
1798 | * just ignore the return value so we don't screw up | ||
1799 | * people calling discard_extent. | ||
1800 | */ | ||
1801 | ret = 0; | ||
1792 | } | 1802 | } |
1793 | kfree(multi); | 1803 | kfree(multi); |
1794 | } | 1804 | } |
1795 | if (discarded_bytes && ret == -EOPNOTSUPP) | ||
1796 | ret = 0; | ||
1797 | 1805 | ||
1798 | if (actual_bytes) | 1806 | if (actual_bytes) |
1799 | *actual_bytes = discarded_bytes; | 1807 | *actual_bytes = discarded_bytes; |
@@ -6269,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6269 | * also make sure backrefs for the shared block and all lower level | 6277 | * also make sure backrefs for the shared block and all lower level |
6270 | * blocks are properly updated. | 6278 | * blocks are properly updated. |
6271 | */ | 6279 | */ |
6272 | int btrfs_drop_snapshot(struct btrfs_root *root, | 6280 | void btrfs_drop_snapshot(struct btrfs_root *root, |
6273 | struct btrfs_block_rsv *block_rsv, int update_ref) | 6281 | struct btrfs_block_rsv *block_rsv, int update_ref) |
6274 | { | 6282 | { |
6275 | struct btrfs_path *path; | 6283 | struct btrfs_path *path; |
6276 | struct btrfs_trans_handle *trans; | 6284 | struct btrfs_trans_handle *trans; |
@@ -6283,13 +6291,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6283 | int level; | 6291 | int level; |
6284 | 6292 | ||
6285 | path = btrfs_alloc_path(); | 6293 | path = btrfs_alloc_path(); |
6286 | if (!path) | 6294 | if (!path) { |
6287 | return -ENOMEM; | 6295 | err = -ENOMEM; |
6296 | goto out; | ||
6297 | } | ||
6288 | 6298 | ||
6289 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6299 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6290 | if (!wc) { | 6300 | if (!wc) { |
6291 | btrfs_free_path(path); | 6301 | btrfs_free_path(path); |
6292 | return -ENOMEM; | 6302 | err = -ENOMEM; |
6303 | goto out; | ||
6293 | } | 6304 | } |
6294 | 6305 | ||
6295 | trans = btrfs_start_transaction(tree_root, 0); | 6306 | trans = btrfs_start_transaction(tree_root, 0); |
@@ -6318,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6318 | path->lowest_level = 0; | 6329 | path->lowest_level = 0; |
6319 | if (ret < 0) { | 6330 | if (ret < 0) { |
6320 | err = ret; | 6331 | err = ret; |
6321 | goto out; | 6332 | goto out_free; |
6322 | } | 6333 | } |
6323 | WARN_ON(ret > 0); | 6334 | WARN_ON(ret > 0); |
6324 | 6335 | ||
@@ -6425,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6425 | free_extent_buffer(root->commit_root); | 6436 | free_extent_buffer(root->commit_root); |
6426 | kfree(root); | 6437 | kfree(root); |
6427 | } | 6438 | } |
6428 | out: | 6439 | out_free: |
6429 | btrfs_end_transaction_throttle(trans, tree_root); | 6440 | btrfs_end_transaction_throttle(trans, tree_root); |
6430 | kfree(wc); | 6441 | kfree(wc); |
6431 | btrfs_free_path(path); | 6442 | btrfs_free_path(path); |
6432 | return err; | 6443 | out: |
6444 | if (err) | ||
6445 | btrfs_std_error(root->fs_info, err); | ||
6446 | return; | ||
6433 | } | 6447 | } |
6434 | 6448 | ||
6435 | /* | 6449 | /* |
@@ -6720,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6720 | struct btrfs_space_info *space_info; | 6734 | struct btrfs_space_info *space_info; |
6721 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 6735 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
6722 | struct btrfs_device *device; | 6736 | struct btrfs_device *device; |
6737 | u64 min_free; | ||
6738 | u64 dev_min = 1; | ||
6739 | u64 dev_nr = 0; | ||
6740 | int index; | ||
6723 | int full = 0; | 6741 | int full = 0; |
6724 | int ret = 0; | 6742 | int ret = 0; |
6725 | 6743 | ||
@@ -6729,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6729 | if (!block_group) | 6747 | if (!block_group) |
6730 | return -1; | 6748 | return -1; |
6731 | 6749 | ||
6750 | min_free = btrfs_block_group_used(&block_group->item); | ||
6751 | |||
6732 | /* no bytes used, we're good */ | 6752 | /* no bytes used, we're good */ |
6733 | if (!btrfs_block_group_used(&block_group->item)) | 6753 | if (!min_free) |
6734 | goto out; | 6754 | goto out; |
6735 | 6755 | ||
6736 | space_info = block_group->space_info; | 6756 | space_info = block_group->space_info; |
@@ -6746,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6746 | * all of the extents from this block group. If we can, we're good | 6766 | * all of the extents from this block group. If we can, we're good |
6747 | */ | 6767 | */ |
6748 | if ((space_info->total_bytes != block_group->key.offset) && | 6768 | if ((space_info->total_bytes != block_group->key.offset) && |
6749 | (space_info->bytes_used + space_info->bytes_reserved + | 6769 | (space_info->bytes_used + space_info->bytes_reserved + |
6750 | space_info->bytes_pinned + space_info->bytes_readonly + | 6770 | space_info->bytes_pinned + space_info->bytes_readonly + |
6751 | btrfs_block_group_used(&block_group->item) < | 6771 | min_free < space_info->total_bytes)) { |
6752 | space_info->total_bytes)) { | ||
6753 | spin_unlock(&space_info->lock); | 6772 | spin_unlock(&space_info->lock); |
6754 | goto out; | 6773 | goto out; |
6755 | } | 6774 | } |
@@ -6766,9 +6785,31 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6766 | if (full) | 6785 | if (full) |
6767 | goto out; | 6786 | goto out; |
6768 | 6787 | ||
6788 | /* | ||
6789 | * index: | ||
6790 | * 0: raid10 | ||
6791 | * 1: raid1 | ||
6792 | * 2: dup | ||
6793 | * 3: raid0 | ||
6794 | * 4: single | ||
6795 | */ | ||
6796 | index = get_block_group_index(block_group); | ||
6797 | if (index == 0) { | ||
6798 | dev_min = 4; | ||
6799 | /* Divide by 2 */ | ||
6800 | min_free >>= 1; | ||
6801 | } else if (index == 1) { | ||
6802 | dev_min = 2; | ||
6803 | } else if (index == 2) { | ||
6804 | /* Multiply by 2 */ | ||
6805 | min_free <<= 1; | ||
6806 | } else if (index == 3) { | ||
6807 | dev_min = fs_devices->rw_devices; | ||
6808 | do_div(min_free, dev_min); | ||
6809 | } | ||
6810 | |||
6769 | mutex_lock(&root->fs_info->chunk_mutex); | 6811 | mutex_lock(&root->fs_info->chunk_mutex); |
6770 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 6812 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
6771 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
6772 | u64 dev_offset; | 6813 | u64 dev_offset; |
6773 | 6814 | ||
6774 | /* | 6815 | /* |
@@ -6779,7 +6820,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6779 | ret = find_free_dev_extent(NULL, device, min_free, | 6820 | ret = find_free_dev_extent(NULL, device, min_free, |
6780 | &dev_offset, NULL); | 6821 | &dev_offset, NULL); |
6781 | if (!ret) | 6822 | if (!ret) |
6823 | dev_nr++; | ||
6824 | |||
6825 | if (dev_nr >= dev_min) | ||
6782 | break; | 6826 | break; |
6827 | |||
6783 | ret = -1; | 6828 | ret = -1; |
6784 | } | 6829 | } |
6785 | } | 6830 | } |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b910694f61ed..a1cb7821becd 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -183,8 +183,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
183 | * read from the commit root and sidestep a nasty deadlock | 183 | * read from the commit root and sidestep a nasty deadlock |
184 | * between reading the free space cache and updating the csum tree. | 184 | * between reading the free space cache and updating the csum tree. |
185 | */ | 185 | */ |
186 | if (btrfs_is_free_space_inode(root, inode)) | 186 | if (btrfs_is_free_space_inode(root, inode)) { |
187 | path->search_commit_root = 1; | 187 | path->search_commit_root = 1; |
188 | path->skip_locking = 1; | ||
189 | } | ||
188 | 190 | ||
189 | disk_bytenr = (u64)bio->bi_sector << 9; | 191 | disk_bytenr = (u64)bio->bi_sector << 9; |
190 | if (dio) | 192 | if (dio) |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 658d66959abe..1266f6e9cdb2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -150,6 +150,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
150 | spin_lock(&root->fs_info->defrag_inodes_lock); | 150 | spin_lock(&root->fs_info->defrag_inodes_lock); |
151 | if (!BTRFS_I(inode)->in_defrag) | 151 | if (!BTRFS_I(inode)->in_defrag) |
152 | __btrfs_add_inode_defrag(inode, defrag); | 152 | __btrfs_add_inode_defrag(inode, defrag); |
153 | else | ||
154 | kfree(defrag); | ||
153 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 155 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
154 | return 0; | 156 | return 0; |
155 | } | 157 | } |
@@ -1034,11 +1036,13 @@ out: | |||
1034 | * on error we return an unlocked page and the error value | 1036 | * on error we return an unlocked page and the error value |
1035 | * on success we return a locked page and 0 | 1037 | * on success we return a locked page and 0 |
1036 | */ | 1038 | */ |
1037 | static int prepare_uptodate_page(struct page *page, u64 pos) | 1039 | static int prepare_uptodate_page(struct page *page, u64 pos, |
1040 | bool force_uptodate) | ||
1038 | { | 1041 | { |
1039 | int ret = 0; | 1042 | int ret = 0; |
1040 | 1043 | ||
1041 | if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { | 1044 | if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) && |
1045 | !PageUptodate(page)) { | ||
1042 | ret = btrfs_readpage(NULL, page); | 1046 | ret = btrfs_readpage(NULL, page); |
1043 | if (ret) | 1047 | if (ret) |
1044 | return ret; | 1048 | return ret; |
@@ -1059,7 +1063,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos) | |||
1059 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | 1063 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, |
1060 | struct page **pages, size_t num_pages, | 1064 | struct page **pages, size_t num_pages, |
1061 | loff_t pos, unsigned long first_index, | 1065 | loff_t pos, unsigned long first_index, |
1062 | size_t write_bytes) | 1066 | size_t write_bytes, bool force_uptodate) |
1063 | { | 1067 | { |
1064 | struct extent_state *cached_state = NULL; | 1068 | struct extent_state *cached_state = NULL; |
1065 | int i; | 1069 | int i; |
@@ -1073,12 +1077,6 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
1073 | start_pos = pos & ~((u64)root->sectorsize - 1); | 1077 | start_pos = pos & ~((u64)root->sectorsize - 1); |
1074 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; | 1078 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; |
1075 | 1079 | ||
1076 | if (start_pos > inode->i_size) { | ||
1077 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); | ||
1078 | if (err) | ||
1079 | return err; | ||
1080 | } | ||
1081 | |||
1082 | again: | 1080 | again: |
1083 | for (i = 0; i < num_pages; i++) { | 1081 | for (i = 0; i < num_pages; i++) { |
1084 | pages[i] = find_or_create_page(inode->i_mapping, index + i, | 1082 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
@@ -1090,10 +1088,11 @@ again: | |||
1090 | } | 1088 | } |
1091 | 1089 | ||
1092 | if (i == 0) | 1090 | if (i == 0) |
1093 | err = prepare_uptodate_page(pages[i], pos); | 1091 | err = prepare_uptodate_page(pages[i], pos, |
1092 | force_uptodate); | ||
1094 | if (i == num_pages - 1) | 1093 | if (i == num_pages - 1) |
1095 | err = prepare_uptodate_page(pages[i], | 1094 | err = prepare_uptodate_page(pages[i], |
1096 | pos + write_bytes); | 1095 | pos + write_bytes, false); |
1097 | if (err) { | 1096 | if (err) { |
1098 | page_cache_release(pages[i]); | 1097 | page_cache_release(pages[i]); |
1099 | faili = i - 1; | 1098 | faili = i - 1; |
@@ -1162,6 +1161,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1162 | size_t num_written = 0; | 1161 | size_t num_written = 0; |
1163 | int nrptrs; | 1162 | int nrptrs; |
1164 | int ret = 0; | 1163 | int ret = 0; |
1164 | bool force_page_uptodate = false; | ||
1165 | 1165 | ||
1166 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1166 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
1167 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 1167 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
@@ -1204,7 +1204,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1204 | * contents of pages from loop to loop | 1204 | * contents of pages from loop to loop |
1205 | */ | 1205 | */ |
1206 | ret = prepare_pages(root, file, pages, num_pages, | 1206 | ret = prepare_pages(root, file, pages, num_pages, |
1207 | pos, first_index, write_bytes); | 1207 | pos, first_index, write_bytes, |
1208 | force_page_uptodate); | ||
1208 | if (ret) { | 1209 | if (ret) { |
1209 | btrfs_delalloc_release_space(inode, | 1210 | btrfs_delalloc_release_space(inode, |
1210 | num_pages << PAGE_CACHE_SHIFT); | 1211 | num_pages << PAGE_CACHE_SHIFT); |
@@ -1221,12 +1222,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1221 | if (copied < write_bytes) | 1222 | if (copied < write_bytes) |
1222 | nrptrs = 1; | 1223 | nrptrs = 1; |
1223 | 1224 | ||
1224 | if (copied == 0) | 1225 | if (copied == 0) { |
1226 | force_page_uptodate = true; | ||
1225 | dirty_pages = 0; | 1227 | dirty_pages = 0; |
1226 | else | 1228 | } else { |
1229 | force_page_uptodate = false; | ||
1227 | dirty_pages = (copied + offset + | 1230 | dirty_pages = (copied + offset + |
1228 | PAGE_CACHE_SIZE - 1) >> | 1231 | PAGE_CACHE_SIZE - 1) >> |
1229 | PAGE_CACHE_SHIFT; | 1232 | PAGE_CACHE_SHIFT; |
1233 | } | ||
1230 | 1234 | ||
1231 | /* | 1235 | /* |
1232 | * If we had a short copy we need to release the excess delaloc | 1236 | * If we had a short copy we need to release the excess delaloc |
@@ -1336,6 +1340,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1336 | struct inode *inode = fdentry(file)->d_inode; | 1340 | struct inode *inode = fdentry(file)->d_inode; |
1337 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1341 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1338 | loff_t *ppos = &iocb->ki_pos; | 1342 | loff_t *ppos = &iocb->ki_pos; |
1343 | u64 start_pos; | ||
1339 | ssize_t num_written = 0; | 1344 | ssize_t num_written = 0; |
1340 | ssize_t err = 0; | 1345 | ssize_t err = 0; |
1341 | size_t count, ocount; | 1346 | size_t count, ocount; |
@@ -1384,6 +1389,15 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1384 | file_update_time(file); | 1389 | file_update_time(file); |
1385 | BTRFS_I(inode)->sequence++; | 1390 | BTRFS_I(inode)->sequence++; |
1386 | 1391 | ||
1392 | start_pos = round_down(pos, root->sectorsize); | ||
1393 | if (start_pos > i_size_read(inode)) { | ||
1394 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); | ||
1395 | if (err) { | ||
1396 | mutex_unlock(&inode->i_mutex); | ||
1397 | goto out; | ||
1398 | } | ||
1399 | } | ||
1400 | |||
1387 | if (unlikely(file->f_flags & O_DIRECT)) { | 1401 | if (unlikely(file->f_flags & O_DIRECT)) { |
1388 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1402 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, |
1389 | pos, ppos, count, ocount); | 1403 | pos, ppos, count, ocount); |
@@ -1638,11 +1652,15 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1638 | 1652 | ||
1639 | cur_offset = alloc_start; | 1653 | cur_offset = alloc_start; |
1640 | while (1) { | 1654 | while (1) { |
1655 | u64 actual_end; | ||
1656 | |||
1641 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | 1657 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, |
1642 | alloc_end - cur_offset, 0); | 1658 | alloc_end - cur_offset, 0); |
1643 | BUG_ON(IS_ERR_OR_NULL(em)); | 1659 | BUG_ON(IS_ERR_OR_NULL(em)); |
1644 | last_byte = min(extent_map_end(em), alloc_end); | 1660 | last_byte = min(extent_map_end(em), alloc_end); |
1661 | actual_end = min_t(u64, extent_map_end(em), offset + len); | ||
1645 | last_byte = (last_byte + mask) & ~mask; | 1662 | last_byte = (last_byte + mask) & ~mask; |
1663 | |||
1646 | if (em->block_start == EXTENT_MAP_HOLE || | 1664 | if (em->block_start == EXTENT_MAP_HOLE || |
1647 | (cur_offset >= inode->i_size && | 1665 | (cur_offset >= inode->i_size && |
1648 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 1666 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
@@ -1655,6 +1673,16 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1655 | free_extent_map(em); | 1673 | free_extent_map(em); |
1656 | break; | 1674 | break; |
1657 | } | 1675 | } |
1676 | } else if (actual_end > inode->i_size && | ||
1677 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
1678 | /* | ||
1679 | * We didn't need to allocate any more space, but we | ||
1680 | * still extended the size of the file so we need to | ||
1681 | * update i_size. | ||
1682 | */ | ||
1683 | inode->i_ctime = CURRENT_TIME; | ||
1684 | i_size_write(inode, actual_end); | ||
1685 | btrfs_ordered_update_i_size(inode, actual_end, NULL); | ||
1658 | } | 1686 | } |
1659 | free_extent_map(em); | 1687 | free_extent_map(em); |
1660 | 1688 | ||
@@ -1793,10 +1821,15 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | |||
1793 | switch (origin) { | 1821 | switch (origin) { |
1794 | case SEEK_END: | 1822 | case SEEK_END: |
1795 | case SEEK_CUR: | 1823 | case SEEK_CUR: |
1796 | offset = generic_file_llseek_unlocked(file, offset, origin); | 1824 | offset = generic_file_llseek(file, offset, origin); |
1797 | goto out; | 1825 | goto out; |
1798 | case SEEK_DATA: | 1826 | case SEEK_DATA: |
1799 | case SEEK_HOLE: | 1827 | case SEEK_HOLE: |
1828 | if (offset >= i_size_read(inode)) { | ||
1829 | mutex_unlock(&inode->i_mutex); | ||
1830 | return -ENXIO; | ||
1831 | } | ||
1832 | |||
1800 | ret = find_desired_extent(inode, &offset, origin); | 1833 | ret = find_desired_extent(inode, &offset, origin); |
1801 | if (ret) { | 1834 | if (ret) { |
1802 | mutex_unlock(&inode->i_mutex); | 1835 | mutex_unlock(&inode->i_mutex); |
@@ -1804,10 +1837,14 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | |||
1804 | } | 1837 | } |
1805 | } | 1838 | } |
1806 | 1839 | ||
1807 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 1840 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { |
1808 | return -EINVAL; | 1841 | offset = -EINVAL; |
1809 | if (offset > inode->i_sb->s_maxbytes) | 1842 | goto out; |
1810 | return -EINVAL; | 1843 | } |
1844 | if (offset > inode->i_sb->s_maxbytes) { | ||
1845 | offset = -EINVAL; | ||
1846 | goto out; | ||
1847 | } | ||
1811 | 1848 | ||
1812 | /* Special lock needed here? */ | 1849 | /* Special lock needed here? */ |
1813 | if (offset != file->f_pos) { | 1850 | if (offset != file->f_pos) { |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6377713f639c..41ac927401d0 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -190,9 +190,11 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
190 | struct btrfs_path *path, | 190 | struct btrfs_path *path, |
191 | struct inode *inode) | 191 | struct inode *inode) |
192 | { | 192 | { |
193 | struct btrfs_block_rsv *rsv; | ||
193 | loff_t oldsize; | 194 | loff_t oldsize; |
194 | int ret = 0; | 195 | int ret = 0; |
195 | 196 | ||
197 | rsv = trans->block_rsv; | ||
196 | trans->block_rsv = root->orphan_block_rsv; | 198 | trans->block_rsv = root->orphan_block_rsv; |
197 | ret = btrfs_block_rsv_check(trans, root, | 199 | ret = btrfs_block_rsv_check(trans, root, |
198 | root->orphan_block_rsv, | 200 | root->orphan_block_rsv, |
@@ -210,6 +212,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
210 | */ | 212 | */ |
211 | ret = btrfs_truncate_inode_items(trans, root, inode, | 213 | ret = btrfs_truncate_inode_items(trans, root, inode, |
212 | 0, BTRFS_EXTENT_DATA_KEY); | 214 | 0, BTRFS_EXTENT_DATA_KEY); |
215 | |||
216 | trans->block_rsv = rsv; | ||
213 | if (ret) { | 217 | if (ret) { |
214 | WARN_ON(1); | 218 | WARN_ON(1); |
215 | return ret; | 219 | return ret; |
@@ -1168,9 +1172,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
1168 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); | 1172 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); |
1169 | } | 1173 | } |
1170 | 1174 | ||
1171 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | 1175 | static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, |
1172 | struct btrfs_free_space *info, u64 offset, | 1176 | struct btrfs_free_space *info, |
1173 | u64 bytes) | 1177 | u64 offset, u64 bytes) |
1174 | { | 1178 | { |
1175 | unsigned long start, count; | 1179 | unsigned long start, count; |
1176 | 1180 | ||
@@ -1181,6 +1185,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | |||
1181 | bitmap_clear(info->bitmap, start, count); | 1185 | bitmap_clear(info->bitmap, start, count); |
1182 | 1186 | ||
1183 | info->bytes -= bytes; | 1187 | info->bytes -= bytes; |
1188 | } | ||
1189 | |||
1190 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | ||
1191 | struct btrfs_free_space *info, u64 offset, | ||
1192 | u64 bytes) | ||
1193 | { | ||
1194 | __bitmap_clear_bits(ctl, info, offset, bytes); | ||
1184 | ctl->free_space -= bytes; | 1195 | ctl->free_space -= bytes; |
1185 | } | 1196 | } |
1186 | 1197 | ||
@@ -1984,7 +1995,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | |||
1984 | return 0; | 1995 | return 0; |
1985 | 1996 | ||
1986 | ret = search_start; | 1997 | ret = search_start; |
1987 | bitmap_clear_bits(ctl, entry, ret, bytes); | 1998 | __bitmap_clear_bits(ctl, entry, ret, bytes); |
1988 | 1999 | ||
1989 | return ret; | 2000 | return ret; |
1990 | } | 2001 | } |
@@ -2039,7 +2050,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
2039 | continue; | 2050 | continue; |
2040 | } | 2051 | } |
2041 | } else { | 2052 | } else { |
2042 | |||
2043 | ret = entry->offset; | 2053 | ret = entry->offset; |
2044 | 2054 | ||
2045 | entry->offset += bytes; | 2055 | entry->offset += bytes; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 15fceefbca0a..b2d004ad66a0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -1786,7 +1786,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1786 | &ordered_extent->list); | 1786 | &ordered_extent->list); |
1787 | 1787 | ||
1788 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1788 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1789 | if (!ret) { | 1789 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1790 | ret = btrfs_update_inode(trans, root, inode); | 1790 | ret = btrfs_update_inode(trans, root, inode); |
1791 | BUG_ON(ret); | 1791 | BUG_ON(ret); |
1792 | } | 1792 | } |
@@ -3510,15 +3510,19 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3510 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3510 | err = btrfs_drop_extents(trans, inode, cur_offset, |
3511 | cur_offset + hole_size, | 3511 | cur_offset + hole_size, |
3512 | &hint_byte, 1); | 3512 | &hint_byte, 1); |
3513 | if (err) | 3513 | if (err) { |
3514 | btrfs_end_transaction(trans, root); | ||
3514 | break; | 3515 | break; |
3516 | } | ||
3515 | 3517 | ||
3516 | err = btrfs_insert_file_extent(trans, root, | 3518 | err = btrfs_insert_file_extent(trans, root, |
3517 | btrfs_ino(inode), cur_offset, 0, | 3519 | btrfs_ino(inode), cur_offset, 0, |
3518 | 0, hole_size, 0, hole_size, | 3520 | 0, hole_size, 0, hole_size, |
3519 | 0, 0, 0); | 3521 | 0, 0, 0); |
3520 | if (err) | 3522 | if (err) { |
3523 | btrfs_end_transaction(trans, root); | ||
3521 | break; | 3524 | break; |
3525 | } | ||
3522 | 3526 | ||
3523 | btrfs_drop_extent_cache(inode, hole_start, | 3527 | btrfs_drop_extent_cache(inode, hole_start, |
3524 | last_byte - 1, 0); | 3528 | last_byte - 1, 0); |
@@ -3952,7 +3956,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
3952 | struct btrfs_root *root, int *new) | 3956 | struct btrfs_root *root, int *new) |
3953 | { | 3957 | { |
3954 | struct inode *inode; | 3958 | struct inode *inode; |
3955 | int bad_inode = 0; | ||
3956 | 3959 | ||
3957 | inode = btrfs_iget_locked(s, location->objectid, root); | 3960 | inode = btrfs_iget_locked(s, location->objectid, root); |
3958 | if (!inode) | 3961 | if (!inode) |
@@ -3968,15 +3971,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
3968 | if (new) | 3971 | if (new) |
3969 | *new = 1; | 3972 | *new = 1; |
3970 | } else { | 3973 | } else { |
3971 | bad_inode = 1; | 3974 | unlock_new_inode(inode); |
3975 | iput(inode); | ||
3976 | inode = ERR_PTR(-ESTALE); | ||
3972 | } | 3977 | } |
3973 | } | 3978 | } |
3974 | 3979 | ||
3975 | if (bad_inode) { | ||
3976 | iput(inode); | ||
3977 | inode = ERR_PTR(-ESTALE); | ||
3978 | } | ||
3979 | |||
3980 | return inode; | 3980 | return inode; |
3981 | } | 3981 | } |
3982 | 3982 | ||
@@ -4018,7 +4018,8 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4018 | memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); | 4018 | memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); |
4019 | kfree(dentry->d_fsdata); | 4019 | kfree(dentry->d_fsdata); |
4020 | dentry->d_fsdata = NULL; | 4020 | dentry->d_fsdata = NULL; |
4021 | d_clear_need_lookup(dentry); | 4021 | /* This thing is hashed, drop it for now */ |
4022 | d_drop(dentry); | ||
4022 | } else { | 4023 | } else { |
4023 | ret = btrfs_inode_by_name(dir, dentry, &location); | 4024 | ret = btrfs_inode_by_name(dir, dentry, &location); |
4024 | } | 4025 | } |
@@ -4085,7 +4086,15 @@ static void btrfs_dentry_release(struct dentry *dentry) | |||
4085 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 4086 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
4086 | struct nameidata *nd) | 4087 | struct nameidata *nd) |
4087 | { | 4088 | { |
4088 | return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); | 4089 | struct dentry *ret; |
4090 | |||
4091 | ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); | ||
4092 | if (unlikely(d_need_lookup(dentry))) { | ||
4093 | spin_lock(&dentry->d_lock); | ||
4094 | dentry->d_flags &= ~DCACHE_NEED_LOOKUP; | ||
4095 | spin_unlock(&dentry->d_lock); | ||
4096 | } | ||
4097 | return ret; | ||
4089 | } | 4098 | } |
4090 | 4099 | ||
4091 | unsigned char btrfs_filetype_table[] = { | 4100 | unsigned char btrfs_filetype_table[] = { |
@@ -4125,7 +4134,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4125 | 4134 | ||
4126 | /* special case for "." */ | 4135 | /* special case for "." */ |
4127 | if (filp->f_pos == 0) { | 4136 | if (filp->f_pos == 0) { |
4128 | over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR); | 4137 | over = filldir(dirent, ".", 1, |
4138 | filp->f_pos, btrfs_ino(inode), DT_DIR); | ||
4129 | if (over) | 4139 | if (over) |
4130 | return 0; | 4140 | return 0; |
4131 | filp->f_pos = 1; | 4141 | filp->f_pos = 1; |
@@ -4134,7 +4144,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4134 | if (filp->f_pos == 1) { | 4144 | if (filp->f_pos == 1) { |
4135 | u64 pino = parent_ino(filp->f_path.dentry); | 4145 | u64 pino = parent_ino(filp->f_path.dentry); |
4136 | over = filldir(dirent, "..", 2, | 4146 | over = filldir(dirent, "..", 2, |
4137 | 2, pino, DT_DIR); | 4147 | filp->f_pos, pino, DT_DIR); |
4138 | if (over) | 4148 | if (over) |
4139 | return 0; | 4149 | return 0; |
4140 | filp->f_pos = 2; | 4150 | filp->f_pos = 2; |
@@ -5823,7 +5833,7 @@ again: | |||
5823 | 5833 | ||
5824 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | 5834 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); |
5825 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | 5835 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5826 | if (!ret) | 5836 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) |
5827 | btrfs_update_inode(trans, root, inode); | 5837 | btrfs_update_inode(trans, root, inode); |
5828 | ret = 0; | 5838 | ret = 0; |
5829 | out_unlock: | 5839 | out_unlock: |
@@ -7354,11 +7364,15 @@ static int btrfs_set_page_dirty(struct page *page) | |||
7354 | static int btrfs_permission(struct inode *inode, int mask) | 7364 | static int btrfs_permission(struct inode *inode, int mask) |
7355 | { | 7365 | { |
7356 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7366 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7367 | umode_t mode = inode->i_mode; | ||
7357 | 7368 | ||
7358 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | 7369 | if (mask & MAY_WRITE && |
7359 | return -EROFS; | 7370 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { |
7360 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7371 | if (btrfs_root_readonly(root)) |
7361 | return -EACCES; | 7372 | return -EROFS; |
7373 | if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) | ||
7374 | return -EACCES; | ||
7375 | } | ||
7362 | return generic_permission(inode, mask); | 7376 | return generic_permission(inode, mask); |
7363 | } | 7377 | } |
7364 | 7378 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7cf013349941..dae5dfe41ba5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -1047,7 +1047,16 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1047 | if (!max_to_defrag) | 1047 | if (!max_to_defrag) |
1048 | max_to_defrag = last_index - 1; | 1048 | max_to_defrag = last_index - 1; |
1049 | 1049 | ||
1050 | while (i <= last_index && defrag_count < max_to_defrag) { | 1050 | /* |
1051 | * make writeback starts from i, so the defrag range can be | ||
1052 | * written sequentially. | ||
1053 | */ | ||
1054 | if (i < inode->i_mapping->writeback_index) | ||
1055 | inode->i_mapping->writeback_index = i; | ||
1056 | |||
1057 | while (i <= last_index && defrag_count < max_to_defrag && | ||
1058 | (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
1059 | PAGE_CACHE_SHIFT)) { | ||
1051 | /* | 1060 | /* |
1052 | * make sure we stop running if someone unmounts | 1061 | * make sure we stop running if someone unmounts |
1053 | * the FS | 1062 | * the FS |
@@ -2177,6 +2186,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2177 | if (!(src_file->f_mode & FMODE_READ)) | 2186 | if (!(src_file->f_mode & FMODE_READ)) |
2178 | goto out_fput; | 2187 | goto out_fput; |
2179 | 2188 | ||
2189 | /* don't make the dst file partly checksummed */ | ||
2190 | if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != | ||
2191 | (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) | ||
2192 | goto out_fput; | ||
2193 | |||
2180 | ret = -EISDIR; | 2194 | ret = -EISDIR; |
2181 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) | 2195 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) |
2182 | goto out_fput; | 2196 | goto out_fput; |
@@ -2220,6 +2234,16 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2220 | !IS_ALIGNED(destoff, bs)) | 2234 | !IS_ALIGNED(destoff, bs)) |
2221 | goto out_unlock; | 2235 | goto out_unlock; |
2222 | 2236 | ||
2237 | if (destoff > inode->i_size) { | ||
2238 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); | ||
2239 | if (ret) | ||
2240 | goto out_unlock; | ||
2241 | } | ||
2242 | |||
2243 | /* truncate page cache pages from target inode range */ | ||
2244 | truncate_inode_pages_range(&inode->i_data, destoff, | ||
2245 | PAGE_CACHE_ALIGN(destoff + len) - 1); | ||
2246 | |||
2223 | /* do any pending delalloc/csum calc on src, one way or | 2247 | /* do any pending delalloc/csum calc on src, one way or |
2224 | another, and lock file content */ | 2248 | another, and lock file content */ |
2225 | while (1) { | 2249 | while (1) { |
@@ -2313,7 +2337,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2313 | else | 2337 | else |
2314 | new_key.offset = destoff; | 2338 | new_key.offset = destoff; |
2315 | 2339 | ||
2316 | trans = btrfs_start_transaction(root, 1); | 2340 | /* |
2341 | * 1 - adjusting old extent (we may have to split it) | ||
2342 | * 1 - add new extent | ||
2343 | * 1 - inode update | ||
2344 | */ | ||
2345 | trans = btrfs_start_transaction(root, 3); | ||
2317 | if (IS_ERR(trans)) { | 2346 | if (IS_ERR(trans)) { |
2318 | ret = PTR_ERR(trans); | 2347 | ret = PTR_ERR(trans); |
2319 | goto out; | 2348 | goto out; |
@@ -2321,14 +2350,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2321 | 2350 | ||
2322 | if (type == BTRFS_FILE_EXTENT_REG || | 2351 | if (type == BTRFS_FILE_EXTENT_REG || |
2323 | type == BTRFS_FILE_EXTENT_PREALLOC) { | 2352 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
2353 | /* | ||
2354 | * a | --- range to clone ---| b | ||
2355 | * | ------------- extent ------------- | | ||
2356 | */ | ||
2357 | |||
2358 | /* substract range b */ | ||
2359 | if (key.offset + datal > off + len) | ||
2360 | datal = off + len - key.offset; | ||
2361 | |||
2362 | /* substract range a */ | ||
2324 | if (off > key.offset) { | 2363 | if (off > key.offset) { |
2325 | datao += off - key.offset; | 2364 | datao += off - key.offset; |
2326 | datal -= off - key.offset; | 2365 | datal -= off - key.offset; |
2327 | } | 2366 | } |
2328 | 2367 | ||
2329 | if (key.offset + datal > off + len) | ||
2330 | datal = off + len - key.offset; | ||
2331 | |||
2332 | ret = btrfs_drop_extents(trans, inode, | 2368 | ret = btrfs_drop_extents(trans, inode, |
2333 | new_key.offset, | 2369 | new_key.offset, |
2334 | new_key.offset + datal, | 2370 | new_key.offset + datal, |
@@ -2425,7 +2461,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2425 | if (endoff > inode->i_size) | 2461 | if (endoff > inode->i_size) |
2426 | btrfs_i_size_write(inode, endoff); | 2462 | btrfs_i_size_write(inode, endoff); |
2427 | 2463 | ||
2428 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
2429 | ret = btrfs_update_inode(trans, root, inode); | 2464 | ret = btrfs_update_inode(trans, root, inode); |
2430 | BUG_ON(ret); | 2465 | BUG_ON(ret); |
2431 | btrfs_end_transaction(trans, root); | 2466 | btrfs_end_transaction(trans, root); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7dc36fab4afc..e24b7964a155 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -884,6 +884,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
884 | struct btrfs_root *tree_root = fs_info->tree_root; | 884 | struct btrfs_root *tree_root = fs_info->tree_root; |
885 | struct btrfs_root *root = pending->root; | 885 | struct btrfs_root *root = pending->root; |
886 | struct btrfs_root *parent_root; | 886 | struct btrfs_root *parent_root; |
887 | struct btrfs_block_rsv *rsv; | ||
887 | struct inode *parent_inode; | 888 | struct inode *parent_inode; |
888 | struct dentry *parent; | 889 | struct dentry *parent; |
889 | struct dentry *dentry; | 890 | struct dentry *dentry; |
@@ -895,6 +896,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
895 | u64 objectid; | 896 | u64 objectid; |
896 | u64 root_flags; | 897 | u64 root_flags; |
897 | 898 | ||
899 | rsv = trans->block_rsv; | ||
900 | |||
898 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 901 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
899 | if (!new_root_item) { | 902 | if (!new_root_item) { |
900 | pending->error = -ENOMEM; | 903 | pending->error = -ENOMEM; |
@@ -1002,6 +1005,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1002 | btrfs_orphan_post_snapshot(trans, pending); | 1005 | btrfs_orphan_post_snapshot(trans, pending); |
1003 | fail: | 1006 | fail: |
1004 | kfree(new_root_item); | 1007 | kfree(new_root_item); |
1008 | trans->block_rsv = rsv; | ||
1005 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); | 1009 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
1006 | return 0; | 1010 | return 0; |
1007 | } | 1011 | } |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index babee65f8eda..786639fca067 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
799 | struct extent_buffer *eb, int slot, | 799 | struct extent_buffer *eb, int slot, |
800 | struct btrfs_key *key) | 800 | struct btrfs_key *key) |
801 | { | 801 | { |
802 | struct inode *dir; | ||
803 | int ret; | ||
804 | struct btrfs_inode_ref *ref; | 802 | struct btrfs_inode_ref *ref; |
803 | struct btrfs_dir_item *di; | ||
804 | struct inode *dir; | ||
805 | struct inode *inode; | 805 | struct inode *inode; |
806 | char *name; | ||
807 | int namelen; | ||
808 | unsigned long ref_ptr; | 806 | unsigned long ref_ptr; |
809 | unsigned long ref_end; | 807 | unsigned long ref_end; |
808 | char *name; | ||
809 | int namelen; | ||
810 | int ret; | ||
810 | int search_done = 0; | 811 | int search_done = 0; |
811 | 812 | ||
812 | /* | 813 | /* |
@@ -909,6 +910,25 @@ again: | |||
909 | } | 910 | } |
910 | btrfs_release_path(path); | 911 | btrfs_release_path(path); |
911 | 912 | ||
913 | /* look for a conflicting sequence number */ | ||
914 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), | ||
915 | btrfs_inode_ref_index(eb, ref), | ||
916 | name, namelen, 0); | ||
917 | if (di && !IS_ERR(di)) { | ||
918 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
919 | BUG_ON(ret); | ||
920 | } | ||
921 | btrfs_release_path(path); | ||
922 | |||
923 | /* look for a conflicing name */ | ||
924 | di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), | ||
925 | name, namelen, 0); | ||
926 | if (di && !IS_ERR(di)) { | ||
927 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
928 | BUG_ON(ret); | ||
929 | } | ||
930 | btrfs_release_path(path); | ||
931 | |||
912 | insert: | 932 | insert: |
913 | /* insert our name */ | 933 | /* insert our name */ |
914 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | 934 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 53875ae73ad4..f2a4cc79da61 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
142 | unsigned long limit; | 142 | unsigned long limit; |
143 | unsigned long last_waited = 0; | 143 | unsigned long last_waited = 0; |
144 | int force_reg = 0; | 144 | int force_reg = 0; |
145 | int sync_pending = 0; | ||
145 | struct blk_plug plug; | 146 | struct blk_plug plug; |
146 | 147 | ||
147 | /* | 148 | /* |
@@ -229,6 +230,22 @@ loop_lock: | |||
229 | 230 | ||
230 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 231 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
231 | 232 | ||
233 | /* | ||
234 | * if we're doing the sync list, record that our | ||
235 | * plug has some sync requests on it | ||
236 | * | ||
237 | * If we're doing the regular list and there are | ||
238 | * sync requests sitting around, unplug before | ||
239 | * we add more | ||
240 | */ | ||
241 | if (pending_bios == &device->pending_sync_bios) { | ||
242 | sync_pending = 1; | ||
243 | } else if (sync_pending) { | ||
244 | blk_finish_plug(&plug); | ||
245 | blk_start_plug(&plug); | ||
246 | sync_pending = 0; | ||
247 | } | ||
248 | |||
232 | submit_bio(cur->bi_rw, cur); | 249 | submit_bio(cur->bi_rw, cur); |
233 | num_run++; | 250 | num_run++; |
234 | batch_run++; | 251 | batch_run++; |
@@ -500,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
500 | fs_devices->rw_devices--; | 517 | fs_devices->rw_devices--; |
501 | } | 518 | } |
502 | 519 | ||
520 | if (device->can_discard) | ||
521 | fs_devices->num_can_discard--; | ||
522 | |||
503 | new_device = kmalloc(sizeof(*new_device), GFP_NOFS); | 523 | new_device = kmalloc(sizeof(*new_device), GFP_NOFS); |
504 | BUG_ON(!new_device); | 524 | BUG_ON(!new_device); |
505 | memcpy(new_device, device, sizeof(*new_device)); | 525 | memcpy(new_device, device, sizeof(*new_device)); |
@@ -508,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
508 | new_device->bdev = NULL; | 528 | new_device->bdev = NULL; |
509 | new_device->writeable = 0; | 529 | new_device->writeable = 0; |
510 | new_device->in_fs_metadata = 0; | 530 | new_device->in_fs_metadata = 0; |
531 | new_device->can_discard = 0; | ||
511 | list_replace_rcu(&device->dev_list, &new_device->dev_list); | 532 | list_replace_rcu(&device->dev_list, &new_device->dev_list); |
512 | 533 | ||
513 | call_rcu(&device->rcu, free_device); | 534 | call_rcu(&device->rcu, free_device); |
@@ -547,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
547 | static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | 568 | static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, |
548 | fmode_t flags, void *holder) | 569 | fmode_t flags, void *holder) |
549 | { | 570 | { |
571 | struct request_queue *q; | ||
550 | struct block_device *bdev; | 572 | struct block_device *bdev; |
551 | struct list_head *head = &fs_devices->devices; | 573 | struct list_head *head = &fs_devices->devices; |
552 | struct btrfs_device *device; | 574 | struct btrfs_device *device; |
@@ -603,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
603 | seeding = 0; | 625 | seeding = 0; |
604 | } | 626 | } |
605 | 627 | ||
628 | q = bdev_get_queue(bdev); | ||
629 | if (blk_queue_discard(q)) { | ||
630 | device->can_discard = 1; | ||
631 | fs_devices->num_can_discard++; | ||
632 | } | ||
633 | |||
606 | device->bdev = bdev; | 634 | device->bdev = bdev; |
607 | device->in_fs_metadata = 0; | 635 | device->in_fs_metadata = 0; |
608 | device->mode = flags; | 636 | device->mode = flags; |
@@ -835,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
835 | 863 | ||
836 | max_hole_start = search_start; | 864 | max_hole_start = search_start; |
837 | max_hole_size = 0; | 865 | max_hole_size = 0; |
866 | hole_size = 0; | ||
838 | 867 | ||
839 | if (search_start >= search_end) { | 868 | if (search_start >= search_end) { |
840 | ret = -ENOSPC; | 869 | ret = -ENOSPC; |
@@ -917,7 +946,14 @@ next: | |||
917 | cond_resched(); | 946 | cond_resched(); |
918 | } | 947 | } |
919 | 948 | ||
920 | hole_size = search_end- search_start; | 949 | /* |
950 | * At this point, search_start should be the end of | ||
951 | * allocated dev extents, and when shrinking the device, | ||
952 | * search_end may be smaller than search_start. | ||
953 | */ | ||
954 | if (search_end > search_start) | ||
955 | hole_size = search_end - search_start; | ||
956 | |||
921 | if (hole_size > max_hole_size) { | 957 | if (hole_size > max_hole_size) { |
922 | max_hole_start = search_start; | 958 | max_hole_start = search_start; |
923 | max_hole_size = hole_size; | 959 | max_hole_size = hole_size; |
@@ -1543,6 +1579,7 @@ error: | |||
1543 | 1579 | ||
1544 | int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | 1580 | int btrfs_init_new_device(struct btrfs_root *root, char *device_path) |
1545 | { | 1581 | { |
1582 | struct request_queue *q; | ||
1546 | struct btrfs_trans_handle *trans; | 1583 | struct btrfs_trans_handle *trans; |
1547 | struct btrfs_device *device; | 1584 | struct btrfs_device *device; |
1548 | struct block_device *bdev; | 1585 | struct block_device *bdev; |
@@ -1612,6 +1649,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1612 | 1649 | ||
1613 | lock_chunks(root); | 1650 | lock_chunks(root); |
1614 | 1651 | ||
1652 | q = bdev_get_queue(bdev); | ||
1653 | if (blk_queue_discard(q)) | ||
1654 | device->can_discard = 1; | ||
1615 | device->writeable = 1; | 1655 | device->writeable = 1; |
1616 | device->work.func = pending_bios_fn; | 1656 | device->work.func = pending_bios_fn; |
1617 | generate_random_uuid(device->uuid); | 1657 | generate_random_uuid(device->uuid); |
@@ -1647,6 +1687,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1647 | root->fs_info->fs_devices->num_devices++; | 1687 | root->fs_info->fs_devices->num_devices++; |
1648 | root->fs_info->fs_devices->open_devices++; | 1688 | root->fs_info->fs_devices->open_devices++; |
1649 | root->fs_info->fs_devices->rw_devices++; | 1689 | root->fs_info->fs_devices->rw_devices++; |
1690 | if (device->can_discard) | ||
1691 | root->fs_info->fs_devices->num_can_discard++; | ||
1650 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 1692 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
1651 | 1693 | ||
1652 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | 1694 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) |
@@ -2413,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2413 | total_avail = device->total_bytes - device->bytes_used; | 2455 | total_avail = device->total_bytes - device->bytes_used; |
2414 | else | 2456 | else |
2415 | total_avail = 0; | 2457 | total_avail = 0; |
2416 | /* avail is off by max(alloc_start, 1MB), but that is the same | 2458 | |
2417 | * for all devices, so it doesn't hurt the sorting later on | 2459 | /* If there is no space on this device, skip it. */ |
2418 | */ | 2460 | if (total_avail == 0) |
2461 | continue; | ||
2419 | 2462 | ||
2420 | ret = find_free_dev_extent(trans, device, | 2463 | ret = find_free_dev_extent(trans, device, |
2421 | max_stripe_size * dev_stripes, | 2464 | max_stripe_size * dev_stripes, |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61ae7ae..6d866db4e177 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -48,6 +48,7 @@ struct btrfs_device { | |||
48 | int writeable; | 48 | int writeable; |
49 | int in_fs_metadata; | 49 | int in_fs_metadata; |
50 | int missing; | 50 | int missing; |
51 | int can_discard; | ||
51 | 52 | ||
52 | spinlock_t io_lock; | 53 | spinlock_t io_lock; |
53 | 54 | ||
@@ -104,6 +105,7 @@ struct btrfs_fs_devices { | |||
104 | u64 rw_devices; | 105 | u64 rw_devices; |
105 | u64 missing_devices; | 106 | u64 missing_devices; |
106 | u64 total_rw_bytes; | 107 | u64 total_rw_bytes; |
108 | u64 num_can_discard; | ||
107 | struct block_device *latest_bdev; | 109 | struct block_device *latest_bdev; |
108 | 110 | ||
109 | /* all of the devices in the FS, protected by a mutex | 111 | /* all of the devices in the FS, protected by a mutex |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index d733b9cfea34..426aa464f1af 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -116,6 +116,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
116 | if (ret) | 116 | if (ret) |
117 | goto out; | 117 | goto out; |
118 | btrfs_release_path(path); | 118 | btrfs_release_path(path); |
119 | |||
120 | /* | ||
121 | * remove the attribute | ||
122 | */ | ||
123 | if (!value) | ||
124 | goto out; | ||
119 | } | 125 | } |
120 | 126 | ||
121 | again: | 127 | again: |
@@ -158,6 +164,9 @@ out: | |||
158 | return ret; | 164 | return ret; |
159 | } | 165 | } |
160 | 166 | ||
167 | /* | ||
168 | * @value: "" makes the attribute to empty, NULL removes it | ||
169 | */ | ||
161 | int __btrfs_setxattr(struct btrfs_trans_handle *trans, | 170 | int __btrfs_setxattr(struct btrfs_trans_handle *trans, |
162 | struct inode *inode, const char *name, | 171 | struct inode *inode, const char *name, |
163 | const void *value, size_t size, int flags) | 172 | const void *value, size_t size, int flags) |
@@ -374,36 +383,36 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) | |||
374 | XATTR_REPLACE); | 383 | XATTR_REPLACE); |
375 | } | 384 | } |
376 | 385 | ||
377 | int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, | 386 | int btrfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
378 | struct inode *inode, struct inode *dir, | 387 | void *fs_info) |
379 | const struct qstr *qstr) | ||
380 | { | 388 | { |
381 | int err; | 389 | const struct xattr *xattr; |
382 | size_t len; | 390 | struct btrfs_trans_handle *trans = fs_info; |
383 | void *value; | ||
384 | char *suffix; | ||
385 | char *name; | 391 | char *name; |
392 | int err = 0; | ||
386 | 393 | ||
387 | err = security_inode_init_security(inode, dir, qstr, &suffix, &value, | 394 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
388 | &len); | 395 | name = kmalloc(XATTR_SECURITY_PREFIX_LEN + |
389 | if (err) { | 396 | strlen(xattr->name) + 1, GFP_NOFS); |
390 | if (err == -EOPNOTSUPP) | 397 | if (!name) { |
391 | return 0; | 398 | err = -ENOMEM; |
392 | return err; | 399 | break; |
393 | } | 400 | } |
394 | |||
395 | name = kmalloc(XATTR_SECURITY_PREFIX_LEN + strlen(suffix) + 1, | ||
396 | GFP_NOFS); | ||
397 | if (!name) { | ||
398 | err = -ENOMEM; | ||
399 | } else { | ||
400 | strcpy(name, XATTR_SECURITY_PREFIX); | 401 | strcpy(name, XATTR_SECURITY_PREFIX); |
401 | strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); | 402 | strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); |
402 | err = __btrfs_setxattr(trans, inode, name, value, len, 0); | 403 | err = __btrfs_setxattr(trans, inode, name, |
404 | xattr->value, xattr->value_len, 0); | ||
403 | kfree(name); | 405 | kfree(name); |
406 | if (err < 0) | ||
407 | break; | ||
404 | } | 408 | } |
405 | |||
406 | kfree(suffix); | ||
407 | kfree(value); | ||
408 | return err; | 409 | return err; |
409 | } | 410 | } |
411 | |||
412 | int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, | ||
413 | struct inode *inode, struct inode *dir, | ||
414 | const struct qstr *qstr) | ||
415 | { | ||
416 | return security_inode_init_security(inode, dir, qstr, | ||
417 | &btrfs_initxattrs, trans); | ||
418 | } | ||
diff --git a/fs/buffer.c b/fs/buffer.c index 1a80b048ade8..936d6035f6e2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1470,13 +1470,13 @@ static void discard_buffer(struct buffer_head * bh) | |||
1470 | } | 1470 | } |
1471 | 1471 | ||
1472 | /** | 1472 | /** |
1473 | * block_invalidatepage - invalidate part of all of a buffer-backed page | 1473 | * block_invalidatepage - invalidate part or all of a buffer-backed page |
1474 | * | 1474 | * |
1475 | * @page: the page which is affected | 1475 | * @page: the page which is affected |
1476 | * @offset: the index of the truncation point | 1476 | * @offset: the index of the truncation point |
1477 | * | 1477 | * |
1478 | * block_invalidatepage() is called when all or part of the page has become | 1478 | * block_invalidatepage() is called when all or part of the page has become |
1479 | * invalidatedby a truncate operation. | 1479 | * invalidated by a truncate operation. |
1480 | * | 1480 | * |
1481 | * block_invalidatepage() does not have to release all buffers, but it must | 1481 | * block_invalidatepage() does not have to release all buffers, but it must |
1482 | * ensure that no dirty buffer is left outside @offset and that no I/O | 1482 | * ensure that no dirty buffer is left outside @offset and that no I/O |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fee028b5332e..86c59e16ba74 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1595,7 +1595,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, | |||
1595 | r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath); | 1595 | r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath); |
1596 | dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, | 1596 | dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, |
1597 | *ppath); | 1597 | *ppath); |
1598 | } else if (rpath) { | 1598 | } else if (rpath || rino) { |
1599 | *ino = rino; | 1599 | *ino = rino; |
1600 | *ppath = rpath; | 1600 | *ppath = rpath; |
1601 | *pathlen = strlen(rpath); | 1601 | *pathlen = strlen(rpath); |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index d47c5ec7fb1f..88bacaf385d9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -813,8 +813,8 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, | |||
813 | fsc = create_fs_client(fsopt, opt); | 813 | fsc = create_fs_client(fsopt, opt); |
814 | if (IS_ERR(fsc)) { | 814 | if (IS_ERR(fsc)) { |
815 | res = ERR_CAST(fsc); | 815 | res = ERR_CAST(fsc); |
816 | kfree(fsopt); | 816 | destroy_mount_options(fsopt); |
817 | kfree(opt); | 817 | ceph_destroy_options(opt); |
818 | goto out_final; | 818 | goto out_final; |
819 | } | 819 | } |
820 | 820 | ||
diff --git a/fs/cifs/README b/fs/cifs/README index c5c2c5e5f0f2..895da1dc1550 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -745,4 +745,18 @@ installed and something like the following lines should be added to the | |||
745 | create cifs.spnego * * /usr/local/sbin/cifs.upcall %k | 745 | create cifs.spnego * * /usr/local/sbin/cifs.upcall %k |
746 | create dns_resolver * * /usr/local/sbin/cifs.upcall %k | 746 | create dns_resolver * * /usr/local/sbin/cifs.upcall %k |
747 | 747 | ||
748 | CIFS kernel module parameters | ||
749 | ============================= | ||
750 | These module parameters can be specified or modified either during the time of | ||
751 | module loading or during the runtime by using the interface | ||
752 | /proc/module/cifs/parameters/<param> | ||
753 | |||
754 | i.e. echo "value" > /sys/module/cifs/parameters/<param> | ||
755 | |||
756 | 1. echo_retries - The number of echo attempts before giving up and | ||
757 | reconnecting to the server. The default is 5. The value 0 | ||
758 | means never reconnect. | ||
759 | |||
760 | 2. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default. | ||
761 | [Y/y/1]. To disable use any of [N/n/0]. | ||
748 | 762 | ||
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 2fe3cf13b2e9..84e8c0724704 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
@@ -176,7 +176,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
176 | 176 | ||
177 | #ifdef CONFIG_CIFS_STATS2 | 177 | #ifdef CONFIG_CIFS_STATS2 |
178 | seq_printf(m, " In Send: %d In MaxReq Wait: %d", | 178 | seq_printf(m, " In Send: %d In MaxReq Wait: %d", |
179 | atomic_read(&server->inSend), | 179 | atomic_read(&server->in_send), |
180 | atomic_read(&server->num_waiters)); | 180 | atomic_read(&server->num_waiters)); |
181 | #endif | 181 | #endif |
182 | 182 | ||
@@ -511,7 +511,7 @@ static const struct file_operations cifsFYI_proc_fops = { | |||
511 | 511 | ||
512 | static int cifs_oplock_proc_show(struct seq_file *m, void *v) | 512 | static int cifs_oplock_proc_show(struct seq_file *m, void *v) |
513 | { | 513 | { |
514 | seq_printf(m, "%d\n", oplockEnabled); | 514 | seq_printf(m, "%d\n", enable_oplocks); |
515 | return 0; | 515 | return 0; |
516 | } | 516 | } |
517 | 517 | ||
@@ -526,13 +526,16 @@ static ssize_t cifs_oplock_proc_write(struct file *file, | |||
526 | char c; | 526 | char c; |
527 | int rc; | 527 | int rc; |
528 | 528 | ||
529 | printk(KERN_WARNING "CIFS: The /proc/fs/cifs/OplockEnabled interface " | ||
530 | "will be removed in kernel version 3.4. Please migrate to " | ||
531 | "using the 'enable_oplocks' module parameter in cifs.ko.\n"); | ||
529 | rc = get_user(c, buffer); | 532 | rc = get_user(c, buffer); |
530 | if (rc) | 533 | if (rc) |
531 | return rc; | 534 | return rc; |
532 | if (c == '0' || c == 'n' || c == 'N') | 535 | if (c == '0' || c == 'n' || c == 'N') |
533 | oplockEnabled = 0; | 536 | enable_oplocks = false; |
534 | else if (c == '1' || c == 'y' || c == 'Y') | 537 | else if (c == '1' || c == 'y' || c == 'Y') |
535 | oplockEnabled = 1; | 538 | enable_oplocks = true; |
536 | 539 | ||
537 | return count; | 540 | return count; |
538 | } | 541 | } |
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 7260e11e21f8..500d65859279 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h | |||
@@ -43,6 +43,8 @@ | |||
43 | #define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ | 43 | #define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ |
44 | #define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */ | 44 | #define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */ |
45 | #define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */ | 45 | #define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */ |
46 | #define CIFS_MOUNT_CIFS_BACKUPUID 0x200000 /* backup intent bit for a user */ | ||
47 | #define CIFS_MOUNT_CIFS_BACKUPGID 0x400000 /* backup intent bit for a group */ | ||
46 | 48 | ||
47 | struct cifs_sb_info { | 49 | struct cifs_sb_info { |
48 | struct rb_root tlink_tree; | 50 | struct rb_root tlink_tree; |
@@ -55,6 +57,8 @@ struct cifs_sb_info { | |||
55 | atomic_t active; | 57 | atomic_t active; |
56 | uid_t mnt_uid; | 58 | uid_t mnt_uid; |
57 | gid_t mnt_gid; | 59 | gid_t mnt_gid; |
60 | uid_t mnt_backupuid; | ||
61 | gid_t mnt_backupgid; | ||
58 | mode_t mnt_file_mode; | 62 | mode_t mnt_file_mode; |
59 | mode_t mnt_dir_mode; | 63 | mode_t mnt_dir_mode; |
60 | unsigned int mnt_cifs_flags; | 64 | unsigned int mnt_cifs_flags; |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 21de1d6d5849..72ddf23ef6f7 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -91,9 +91,76 @@ cifs_idmap_shrinker(struct shrinker *shrink, struct shrink_control *sc) | |||
91 | shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); | 91 | shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); |
92 | spin_unlock(&sidgidlock); | 92 | spin_unlock(&sidgidlock); |
93 | 93 | ||
94 | root = &siduidtree; | ||
95 | spin_lock(&uidsidlock); | ||
96 | shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); | ||
97 | spin_unlock(&uidsidlock); | ||
98 | |||
99 | root = &sidgidtree; | ||
100 | spin_lock(&gidsidlock); | ||
101 | shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); | ||
102 | spin_unlock(&gidsidlock); | ||
103 | |||
94 | return nr_rem; | 104 | return nr_rem; |
95 | } | 105 | } |
96 | 106 | ||
107 | static void | ||
108 | sid_rb_insert(struct rb_root *root, unsigned long cid, | ||
109 | struct cifs_sid_id **psidid, char *typestr) | ||
110 | { | ||
111 | char *strptr; | ||
112 | struct rb_node *node = root->rb_node; | ||
113 | struct rb_node *parent = NULL; | ||
114 | struct rb_node **linkto = &(root->rb_node); | ||
115 | struct cifs_sid_id *lsidid; | ||
116 | |||
117 | while (node) { | ||
118 | lsidid = rb_entry(node, struct cifs_sid_id, rbnode); | ||
119 | parent = node; | ||
120 | if (cid > lsidid->id) { | ||
121 | linkto = &(node->rb_left); | ||
122 | node = node->rb_left; | ||
123 | } | ||
124 | if (cid < lsidid->id) { | ||
125 | linkto = &(node->rb_right); | ||
126 | node = node->rb_right; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | (*psidid)->id = cid; | ||
131 | (*psidid)->time = jiffies - (SID_MAP_RETRY + 1); | ||
132 | (*psidid)->refcount = 0; | ||
133 | |||
134 | sprintf((*psidid)->sidstr, "%s", typestr); | ||
135 | strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr); | ||
136 | sprintf(strptr, "%ld", cid); | ||
137 | |||
138 | clear_bit(SID_ID_PENDING, &(*psidid)->state); | ||
139 | clear_bit(SID_ID_MAPPED, &(*psidid)->state); | ||
140 | |||
141 | rb_link_node(&(*psidid)->rbnode, parent, linkto); | ||
142 | rb_insert_color(&(*psidid)->rbnode, root); | ||
143 | } | ||
144 | |||
145 | static struct cifs_sid_id * | ||
146 | sid_rb_search(struct rb_root *root, unsigned long cid) | ||
147 | { | ||
148 | struct rb_node *node = root->rb_node; | ||
149 | struct cifs_sid_id *lsidid; | ||
150 | |||
151 | while (node) { | ||
152 | lsidid = rb_entry(node, struct cifs_sid_id, rbnode); | ||
153 | if (cid > lsidid->id) | ||
154 | node = node->rb_left; | ||
155 | else if (cid < lsidid->id) | ||
156 | node = node->rb_right; | ||
157 | else /* node found */ | ||
158 | return lsidid; | ||
159 | } | ||
160 | |||
161 | return NULL; | ||
162 | } | ||
163 | |||
97 | static struct shrinker cifs_shrinker = { | 164 | static struct shrinker cifs_shrinker = { |
98 | .shrink = cifs_idmap_shrinker, | 165 | .shrink = cifs_idmap_shrinker, |
99 | .seeks = DEFAULT_SEEKS, | 166 | .seeks = DEFAULT_SEEKS, |
@@ -110,6 +177,7 @@ cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen) | |||
110 | 177 | ||
111 | memcpy(payload, data, datalen); | 178 | memcpy(payload, data, datalen); |
112 | key->payload.data = payload; | 179 | key->payload.data = payload; |
180 | key->datalen = datalen; | ||
113 | return 0; | 181 | return 0; |
114 | } | 182 | } |
115 | 183 | ||
@@ -224,6 +292,120 @@ sidid_pending_wait(void *unused) | |||
224 | } | 292 | } |
225 | 293 | ||
226 | static int | 294 | static int |
295 | id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid) | ||
296 | { | ||
297 | int rc = 0; | ||
298 | struct key *sidkey; | ||
299 | const struct cred *saved_cred; | ||
300 | struct cifs_sid *lsid; | ||
301 | struct cifs_sid_id *psidid, *npsidid; | ||
302 | struct rb_root *cidtree; | ||
303 | spinlock_t *cidlock; | ||
304 | |||
305 | if (sidtype == SIDOWNER) { | ||
306 | cidlock = &siduidlock; | ||
307 | cidtree = &uidtree; | ||
308 | } else if (sidtype == SIDGROUP) { | ||
309 | cidlock = &sidgidlock; | ||
310 | cidtree = &gidtree; | ||
311 | } else | ||
312 | return -EINVAL; | ||
313 | |||
314 | spin_lock(cidlock); | ||
315 | psidid = sid_rb_search(cidtree, cid); | ||
316 | |||
317 | if (!psidid) { /* node does not exist, allocate one & attempt adding */ | ||
318 | spin_unlock(cidlock); | ||
319 | npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL); | ||
320 | if (!npsidid) | ||
321 | return -ENOMEM; | ||
322 | |||
323 | npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL); | ||
324 | if (!npsidid->sidstr) { | ||
325 | kfree(npsidid); | ||
326 | return -ENOMEM; | ||
327 | } | ||
328 | |||
329 | spin_lock(cidlock); | ||
330 | psidid = sid_rb_search(cidtree, cid); | ||
331 | if (psidid) { /* node happened to get inserted meanwhile */ | ||
332 | ++psidid->refcount; | ||
333 | spin_unlock(cidlock); | ||
334 | kfree(npsidid->sidstr); | ||
335 | kfree(npsidid); | ||
336 | } else { | ||
337 | psidid = npsidid; | ||
338 | sid_rb_insert(cidtree, cid, &psidid, | ||
339 | sidtype == SIDOWNER ? "oi:" : "gi:"); | ||
340 | ++psidid->refcount; | ||
341 | spin_unlock(cidlock); | ||
342 | } | ||
343 | } else { | ||
344 | ++psidid->refcount; | ||
345 | spin_unlock(cidlock); | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * If we are here, it is safe to access psidid and its fields | ||
350 | * since a reference was taken earlier while holding the spinlock. | ||
351 | * A reference on the node is put without holding the spinlock | ||
352 | * and it is OK to do so in this case, shrinker will not erase | ||
353 | * this node until all references are put and we do not access | ||
354 | * any fields of the node after a reference is put . | ||
355 | */ | ||
356 | if (test_bit(SID_ID_MAPPED, &psidid->state)) { | ||
357 | memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); | ||
358 | psidid->time = jiffies; /* update ts for accessing */ | ||
359 | goto id_sid_out; | ||
360 | } | ||
361 | |||
362 | if (time_after(psidid->time + SID_MAP_RETRY, jiffies)) { | ||
363 | rc = -EINVAL; | ||
364 | goto id_sid_out; | ||
365 | } | ||
366 | |||
367 | if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) { | ||
368 | saved_cred = override_creds(root_cred); | ||
369 | sidkey = request_key(&cifs_idmap_key_type, psidid->sidstr, ""); | ||
370 | if (IS_ERR(sidkey)) { | ||
371 | rc = -EINVAL; | ||
372 | cFYI(1, "%s: Can't map and id to a SID", __func__); | ||
373 | } else { | ||
374 | lsid = (struct cifs_sid *)sidkey->payload.data; | ||
375 | memcpy(&psidid->sid, lsid, | ||
376 | sidkey->datalen < sizeof(struct cifs_sid) ? | ||
377 | sidkey->datalen : sizeof(struct cifs_sid)); | ||
378 | memcpy(ssid, &psidid->sid, | ||
379 | sidkey->datalen < sizeof(struct cifs_sid) ? | ||
380 | sidkey->datalen : sizeof(struct cifs_sid)); | ||
381 | set_bit(SID_ID_MAPPED, &psidid->state); | ||
382 | key_put(sidkey); | ||
383 | kfree(psidid->sidstr); | ||
384 | } | ||
385 | psidid->time = jiffies; /* update ts for accessing */ | ||
386 | revert_creds(saved_cred); | ||
387 | clear_bit(SID_ID_PENDING, &psidid->state); | ||
388 | wake_up_bit(&psidid->state, SID_ID_PENDING); | ||
389 | } else { | ||
390 | rc = wait_on_bit(&psidid->state, SID_ID_PENDING, | ||
391 | sidid_pending_wait, TASK_INTERRUPTIBLE); | ||
392 | if (rc) { | ||
393 | cFYI(1, "%s: sidid_pending_wait interrupted %d", | ||
394 | __func__, rc); | ||
395 | --psidid->refcount; | ||
396 | return rc; | ||
397 | } | ||
398 | if (test_bit(SID_ID_MAPPED, &psidid->state)) | ||
399 | memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); | ||
400 | else | ||
401 | rc = -EINVAL; | ||
402 | } | ||
403 | id_sid_out: | ||
404 | --psidid->refcount; | ||
405 | return rc; | ||
406 | } | ||
407 | |||
408 | static int | ||
227 | sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, | 409 | sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, |
228 | struct cifs_fattr *fattr, uint sidtype) | 410 | struct cifs_fattr *fattr, uint sidtype) |
229 | { | 411 | { |
@@ -383,6 +565,10 @@ init_cifs_idmap(void) | |||
383 | spin_lock_init(&sidgidlock); | 565 | spin_lock_init(&sidgidlock); |
384 | gidtree = RB_ROOT; | 566 | gidtree = RB_ROOT; |
385 | 567 | ||
568 | spin_lock_init(&uidsidlock); | ||
569 | siduidtree = RB_ROOT; | ||
570 | spin_lock_init(&gidsidlock); | ||
571 | sidgidtree = RB_ROOT; | ||
386 | register_shrinker(&cifs_shrinker); | 572 | register_shrinker(&cifs_shrinker); |
387 | 573 | ||
388 | cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring)); | 574 | cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring)); |
@@ -422,6 +608,18 @@ cifs_destroy_idmaptrees(void) | |||
422 | while ((node = rb_first(root))) | 608 | while ((node = rb_first(root))) |
423 | rb_erase(node, root); | 609 | rb_erase(node, root); |
424 | spin_unlock(&sidgidlock); | 610 | spin_unlock(&sidgidlock); |
611 | |||
612 | root = &siduidtree; | ||
613 | spin_lock(&uidsidlock); | ||
614 | while ((node = rb_first(root))) | ||
615 | rb_erase(node, root); | ||
616 | spin_unlock(&uidsidlock); | ||
617 | |||
618 | root = &sidgidtree; | ||
619 | spin_lock(&gidsidlock); | ||
620 | while ((node = rb_first(root))) | ||
621 | rb_erase(node, root); | ||
622 | spin_unlock(&gidsidlock); | ||
425 | } | 623 | } |
426 | 624 | ||
427 | /* if the two SIDs (roughly equivalent to a UUID for a user or group) are | 625 | /* if the two SIDs (roughly equivalent to a UUID for a user or group) are |
@@ -706,7 +904,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, | |||
706 | acl_size = sizeof(struct cifs_acl); | 904 | acl_size = sizeof(struct cifs_acl); |
707 | 905 | ||
708 | num_aces = le32_to_cpu(pdacl->num_aces); | 906 | num_aces = le32_to_cpu(pdacl->num_aces); |
709 | if (num_aces > 0) { | 907 | if (num_aces > 0) { |
710 | umode_t user_mask = S_IRWXU; | 908 | umode_t user_mask = S_IRWXU; |
711 | umode_t group_mask = S_IRWXG; | 909 | umode_t group_mask = S_IRWXG; |
712 | umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO; | 910 | umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO; |
@@ -868,52 +1066,82 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, | |||
868 | else | 1066 | else |
869 | cFYI(1, "no ACL"); /* BB grant all or default perms? */ | 1067 | cFYI(1, "no ACL"); /* BB grant all or default perms? */ |
870 | 1068 | ||
871 | /* cifscred->uid = owner_sid_ptr->rid; | ||
872 | cifscred->gid = group_sid_ptr->rid; | ||
873 | memcpy((void *)(&(cifscred->osid)), (void *)owner_sid_ptr, | ||
874 | sizeof(struct cifs_sid)); | ||
875 | memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, | ||
876 | sizeof(struct cifs_sid)); */ | ||
877 | |||
878 | return rc; | 1069 | return rc; |
879 | } | 1070 | } |
880 | 1071 | ||
881 | |||
882 | /* Convert permission bits from mode to equivalent CIFS ACL */ | 1072 | /* Convert permission bits from mode to equivalent CIFS ACL */ |
883 | static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, | 1073 | static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, |
884 | struct inode *inode, __u64 nmode) | 1074 | __u32 secdesclen, __u64 nmode, uid_t uid, gid_t gid, int *aclflag) |
885 | { | 1075 | { |
886 | int rc = 0; | 1076 | int rc = 0; |
887 | __u32 dacloffset; | 1077 | __u32 dacloffset; |
888 | __u32 ndacloffset; | 1078 | __u32 ndacloffset; |
889 | __u32 sidsoffset; | 1079 | __u32 sidsoffset; |
890 | struct cifs_sid *owner_sid_ptr, *group_sid_ptr; | 1080 | struct cifs_sid *owner_sid_ptr, *group_sid_ptr; |
1081 | struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr; | ||
891 | struct cifs_acl *dacl_ptr = NULL; /* no need for SACL ptr */ | 1082 | struct cifs_acl *dacl_ptr = NULL; /* no need for SACL ptr */ |
892 | struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ | 1083 | struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ |
893 | 1084 | ||
894 | if ((inode == NULL) || (pntsd == NULL) || (pnntsd == NULL)) | 1085 | if (nmode != NO_CHANGE_64) { /* chmod */ |
895 | return -EIO; | 1086 | owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + |
896 | |||
897 | owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + | ||
898 | le32_to_cpu(pntsd->osidoffset)); | 1087 | le32_to_cpu(pntsd->osidoffset)); |
899 | group_sid_ptr = (struct cifs_sid *)((char *)pntsd + | 1088 | group_sid_ptr = (struct cifs_sid *)((char *)pntsd + |
900 | le32_to_cpu(pntsd->gsidoffset)); | 1089 | le32_to_cpu(pntsd->gsidoffset)); |
901 | 1090 | dacloffset = le32_to_cpu(pntsd->dacloffset); | |
902 | dacloffset = le32_to_cpu(pntsd->dacloffset); | 1091 | dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); |
903 | dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); | 1092 | ndacloffset = sizeof(struct cifs_ntsd); |
904 | 1093 | ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset); | |
905 | ndacloffset = sizeof(struct cifs_ntsd); | 1094 | ndacl_ptr->revision = dacl_ptr->revision; |
906 | ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset); | 1095 | ndacl_ptr->size = 0; |
907 | ndacl_ptr->revision = dacl_ptr->revision; | 1096 | ndacl_ptr->num_aces = 0; |
908 | ndacl_ptr->size = 0; | 1097 | |
909 | ndacl_ptr->num_aces = 0; | 1098 | rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr, |
910 | 1099 | nmode); | |
911 | rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr, nmode); | 1100 | sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size); |
912 | 1101 | /* copy sec desc control portion & owner and group sids */ | |
913 | sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size); | 1102 | copy_sec_desc(pntsd, pnntsd, sidsoffset); |
914 | 1103 | *aclflag = CIFS_ACL_DACL; | |
915 | /* copy security descriptor control portion and owner and group sid */ | 1104 | } else { |
916 | copy_sec_desc(pntsd, pnntsd, sidsoffset); | 1105 | memcpy(pnntsd, pntsd, secdesclen); |
1106 | if (uid != NO_CHANGE_32) { /* chown */ | ||
1107 | owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + | ||
1108 | le32_to_cpu(pnntsd->osidoffset)); | ||
1109 | nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid), | ||
1110 | GFP_KERNEL); | ||
1111 | if (!nowner_sid_ptr) | ||
1112 | return -ENOMEM; | ||
1113 | rc = id_to_sid(uid, SIDOWNER, nowner_sid_ptr); | ||
1114 | if (rc) { | ||
1115 | cFYI(1, "%s: Mapping error %d for owner id %d", | ||
1116 | __func__, rc, uid); | ||
1117 | kfree(nowner_sid_ptr); | ||
1118 | return rc; | ||
1119 | } | ||
1120 | memcpy(owner_sid_ptr, nowner_sid_ptr, | ||
1121 | sizeof(struct cifs_sid)); | ||
1122 | kfree(nowner_sid_ptr); | ||
1123 | *aclflag = CIFS_ACL_OWNER; | ||
1124 | } | ||
1125 | if (gid != NO_CHANGE_32) { /* chgrp */ | ||
1126 | group_sid_ptr = (struct cifs_sid *)((char *)pnntsd + | ||
1127 | le32_to_cpu(pnntsd->gsidoffset)); | ||
1128 | ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid), | ||
1129 | GFP_KERNEL); | ||
1130 | if (!ngroup_sid_ptr) | ||
1131 | return -ENOMEM; | ||
1132 | rc = id_to_sid(gid, SIDGROUP, ngroup_sid_ptr); | ||
1133 | if (rc) { | ||
1134 | cFYI(1, "%s: Mapping error %d for group id %d", | ||
1135 | __func__, rc, gid); | ||
1136 | kfree(ngroup_sid_ptr); | ||
1137 | return rc; | ||
1138 | } | ||
1139 | memcpy(group_sid_ptr, ngroup_sid_ptr, | ||
1140 | sizeof(struct cifs_sid)); | ||
1141 | kfree(ngroup_sid_ptr); | ||
1142 | *aclflag = CIFS_ACL_GROUP; | ||
1143 | } | ||
1144 | } | ||
917 | 1145 | ||
918 | return rc; | 1146 | return rc; |
919 | } | 1147 | } |
@@ -945,7 +1173,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, | |||
945 | { | 1173 | { |
946 | struct cifs_ntsd *pntsd = NULL; | 1174 | struct cifs_ntsd *pntsd = NULL; |
947 | int oplock = 0; | 1175 | int oplock = 0; |
948 | int xid, rc; | 1176 | int xid, rc, create_options = 0; |
949 | __u16 fid; | 1177 | __u16 fid; |
950 | struct cifs_tcon *tcon; | 1178 | struct cifs_tcon *tcon; |
951 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | 1179 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); |
@@ -956,9 +1184,12 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, | |||
956 | tcon = tlink_tcon(tlink); | 1184 | tcon = tlink_tcon(tlink); |
957 | xid = GetXid(); | 1185 | xid = GetXid(); |
958 | 1186 | ||
959 | rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0, | 1187 | if (backup_cred(cifs_sb)) |
960 | &fid, &oplock, NULL, cifs_sb->local_nls, | 1188 | create_options |= CREATE_OPEN_BACKUP_INTENT; |
961 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 1189 | |
1190 | rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, | ||
1191 | create_options, &fid, &oplock, NULL, cifs_sb->local_nls, | ||
1192 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
962 | if (!rc) { | 1193 | if (!rc) { |
963 | rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen); | 1194 | rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen); |
964 | CIFSSMBClose(xid, tcon, fid); | 1195 | CIFSSMBClose(xid, tcon, fid); |
@@ -991,31 +1222,15 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, | |||
991 | return pntsd; | 1222 | return pntsd; |
992 | } | 1223 | } |
993 | 1224 | ||
994 | static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid, | 1225 | /* Set an ACL on the server */ |
995 | struct cifs_ntsd *pnntsd, u32 acllen) | 1226 | int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, |
996 | { | 1227 | struct inode *inode, const char *path, int aclflag) |
997 | int xid, rc; | ||
998 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | ||
999 | |||
1000 | if (IS_ERR(tlink)) | ||
1001 | return PTR_ERR(tlink); | ||
1002 | |||
1003 | xid = GetXid(); | ||
1004 | rc = CIFSSMBSetCIFSACL(xid, tlink_tcon(tlink), fid, pnntsd, acllen); | ||
1005 | FreeXid(xid); | ||
1006 | cifs_put_tlink(tlink); | ||
1007 | |||
1008 | cFYI(DBG2, "SetCIFSACL rc = %d", rc); | ||
1009 | return rc; | ||
1010 | } | ||
1011 | |||
1012 | static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, | ||
1013 | struct cifs_ntsd *pnntsd, u32 acllen) | ||
1014 | { | 1228 | { |
1015 | int oplock = 0; | 1229 | int oplock = 0; |
1016 | int xid, rc; | 1230 | int xid, rc, access_flags, create_options = 0; |
1017 | __u16 fid; | 1231 | __u16 fid; |
1018 | struct cifs_tcon *tcon; | 1232 | struct cifs_tcon *tcon; |
1233 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
1019 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | 1234 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); |
1020 | 1235 | ||
1021 | if (IS_ERR(tlink)) | 1236 | if (IS_ERR(tlink)) |
@@ -1024,15 +1239,23 @@ static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, | |||
1024 | tcon = tlink_tcon(tlink); | 1239 | tcon = tlink_tcon(tlink); |
1025 | xid = GetXid(); | 1240 | xid = GetXid(); |
1026 | 1241 | ||
1027 | rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, WRITE_DAC, 0, | 1242 | if (backup_cred(cifs_sb)) |
1028 | &fid, &oplock, NULL, cifs_sb->local_nls, | 1243 | create_options |= CREATE_OPEN_BACKUP_INTENT; |
1029 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 1244 | |
1245 | if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) | ||
1246 | access_flags = WRITE_OWNER; | ||
1247 | else | ||
1248 | access_flags = WRITE_DAC; | ||
1249 | |||
1250 | rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, access_flags, | ||
1251 | create_options, &fid, &oplock, NULL, cifs_sb->local_nls, | ||
1252 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1030 | if (rc) { | 1253 | if (rc) { |
1031 | cERROR(1, "Unable to open file to set ACL"); | 1254 | cERROR(1, "Unable to open file to set ACL"); |
1032 | goto out; | 1255 | goto out; |
1033 | } | 1256 | } |
1034 | 1257 | ||
1035 | rc = CIFSSMBSetCIFSACL(xid, tcon, fid, pnntsd, acllen); | 1258 | rc = CIFSSMBSetCIFSACL(xid, tcon, fid, pnntsd, acllen, aclflag); |
1036 | cFYI(DBG2, "SetCIFSACL rc = %d", rc); | 1259 | cFYI(DBG2, "SetCIFSACL rc = %d", rc); |
1037 | 1260 | ||
1038 | CIFSSMBClose(xid, tcon, fid); | 1261 | CIFSSMBClose(xid, tcon, fid); |
@@ -1042,25 +1265,6 @@ out: | |||
1042 | return rc; | 1265 | return rc; |
1043 | } | 1266 | } |
1044 | 1267 | ||
1045 | /* Set an ACL on the server */ | ||
1046 | int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, | ||
1047 | struct inode *inode, const char *path) | ||
1048 | { | ||
1049 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
1050 | struct cifsFileInfo *open_file; | ||
1051 | int rc; | ||
1052 | |||
1053 | cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); | ||
1054 | |||
1055 | open_file = find_readable_file(CIFS_I(inode), true); | ||
1056 | if (!open_file) | ||
1057 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); | ||
1058 | |||
1059 | rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); | ||
1060 | cifsFileInfo_put(open_file); | ||
1061 | return rc; | ||
1062 | } | ||
1063 | |||
1064 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ | 1268 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ |
1065 | int | 1269 | int |
1066 | cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, | 1270 | cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, |
@@ -1092,9 +1296,12 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, | |||
1092 | } | 1296 | } |
1093 | 1297 | ||
1094 | /* Convert mode bits to an ACL so we can update the ACL on the server */ | 1298 | /* Convert mode bits to an ACL so we can update the ACL on the server */ |
1095 | int mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode) | 1299 | int |
1300 | id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, | ||
1301 | uid_t uid, gid_t gid) | ||
1096 | { | 1302 | { |
1097 | int rc = 0; | 1303 | int rc = 0; |
1304 | int aclflag = CIFS_ACL_DACL; /* default flag to set */ | ||
1098 | __u32 secdesclen = 0; | 1305 | __u32 secdesclen = 0; |
1099 | struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ | 1306 | struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ |
1100 | struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ | 1307 | struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ |
@@ -1124,13 +1331,15 @@ int mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode) | |||
1124 | return -ENOMEM; | 1331 | return -ENOMEM; |
1125 | } | 1332 | } |
1126 | 1333 | ||
1127 | rc = build_sec_desc(pntsd, pnntsd, inode, nmode); | 1334 | rc = build_sec_desc(pntsd, pnntsd, secdesclen, nmode, uid, gid, |
1335 | &aclflag); | ||
1128 | 1336 | ||
1129 | cFYI(DBG2, "build_sec_desc rc: %d", rc); | 1337 | cFYI(DBG2, "build_sec_desc rc: %d", rc); |
1130 | 1338 | ||
1131 | if (!rc) { | 1339 | if (!rc) { |
1132 | /* Set the security descriptor */ | 1340 | /* Set the security descriptor */ |
1133 | rc = set_cifs_acl(pnntsd, secdesclen, inode, path); | 1341 | rc = set_cifs_acl(pnntsd, secdesclen, inode, |
1342 | path, aclflag); | ||
1134 | cFYI(DBG2, "set_cifs_acl rc: %d", rc); | 1343 | cFYI(DBG2, "set_cifs_acl rc: %d", rc); |
1135 | } | 1344 | } |
1136 | 1345 | ||
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index e76bfeb68267..2cfb695d1f89 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -37,83 +37,8 @@ | |||
37 | * the sequence number before this function is called. Also, this function | 37 | * the sequence number before this function is called. Also, this function |
38 | * should be called with the server->srv_mutex held. | 38 | * should be called with the server->srv_mutex held. |
39 | */ | 39 | */ |
40 | static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, | 40 | static int cifs_calc_signature(const struct kvec *iov, int n_vec, |
41 | struct TCP_Server_Info *server, char *signature) | 41 | struct TCP_Server_Info *server, char *signature) |
42 | { | ||
43 | int rc; | ||
44 | |||
45 | if (cifs_pdu == NULL || signature == NULL || server == NULL) | ||
46 | return -EINVAL; | ||
47 | |||
48 | if (!server->secmech.sdescmd5) { | ||
49 | cERROR(1, "%s: Can't generate signature\n", __func__); | ||
50 | return -1; | ||
51 | } | ||
52 | |||
53 | rc = crypto_shash_init(&server->secmech.sdescmd5->shash); | ||
54 | if (rc) { | ||
55 | cERROR(1, "%s: Could not init md5\n", __func__); | ||
56 | return rc; | ||
57 | } | ||
58 | |||
59 | rc = crypto_shash_update(&server->secmech.sdescmd5->shash, | ||
60 | server->session_key.response, server->session_key.len); | ||
61 | if (rc) { | ||
62 | cERROR(1, "%s: Could not update with response\n", __func__); | ||
63 | return rc; | ||
64 | } | ||
65 | |||
66 | rc = crypto_shash_update(&server->secmech.sdescmd5->shash, | ||
67 | cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length)); | ||
68 | if (rc) { | ||
69 | cERROR(1, "%s: Could not update with payload\n", __func__); | ||
70 | return rc; | ||
71 | } | ||
72 | |||
73 | rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); | ||
74 | if (rc) | ||
75 | cERROR(1, "%s: Could not generate md5 hash\n", __func__); | ||
76 | |||
77 | return rc; | ||
78 | } | ||
79 | |||
80 | /* must be called with server->srv_mutex held */ | ||
81 | int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | ||
82 | __u32 *pexpected_response_sequence_number) | ||
83 | { | ||
84 | int rc = 0; | ||
85 | char smb_signature[20]; | ||
86 | |||
87 | if ((cifs_pdu == NULL) || (server == NULL)) | ||
88 | return -EINVAL; | ||
89 | |||
90 | if (!(cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) || | ||
91 | server->tcpStatus == CifsNeedNegotiate) | ||
92 | return rc; | ||
93 | |||
94 | if (!server->session_estab) { | ||
95 | strncpy(cifs_pdu->Signature.SecuritySignature, "BSRSPYL", 8); | ||
96 | return rc; | ||
97 | } | ||
98 | |||
99 | cifs_pdu->Signature.Sequence.SequenceNumber = | ||
100 | cpu_to_le32(server->sequence_number); | ||
101 | cifs_pdu->Signature.Sequence.Reserved = 0; | ||
102 | |||
103 | *pexpected_response_sequence_number = server->sequence_number++; | ||
104 | server->sequence_number++; | ||
105 | |||
106 | rc = cifs_calculate_signature(cifs_pdu, server, smb_signature); | ||
107 | if (rc) | ||
108 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); | ||
109 | else | ||
110 | memcpy(cifs_pdu->Signature.SecuritySignature, smb_signature, 8); | ||
111 | |||
112 | return rc; | ||
113 | } | ||
114 | |||
115 | static int cifs_calc_signature2(const struct kvec *iov, int n_vec, | ||
116 | struct TCP_Server_Info *server, char *signature) | ||
117 | { | 42 | { |
118 | int i; | 43 | int i; |
119 | int rc; | 44 | int rc; |
@@ -179,7 +104,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
179 | { | 104 | { |
180 | int rc = 0; | 105 | int rc = 0; |
181 | char smb_signature[20]; | 106 | char smb_signature[20]; |
182 | struct smb_hdr *cifs_pdu = iov[0].iov_base; | 107 | struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; |
183 | 108 | ||
184 | if ((cifs_pdu == NULL) || (server == NULL)) | 109 | if ((cifs_pdu == NULL) || (server == NULL)) |
185 | return -EINVAL; | 110 | return -EINVAL; |
@@ -189,7 +114,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
189 | return rc; | 114 | return rc; |
190 | 115 | ||
191 | if (!server->session_estab) { | 116 | if (!server->session_estab) { |
192 | strncpy(cifs_pdu->Signature.SecuritySignature, "BSRSPYL", 8); | 117 | memcpy(cifs_pdu->Signature.SecuritySignature, "BSRSPYL", 8); |
193 | return rc; | 118 | return rc; |
194 | } | 119 | } |
195 | 120 | ||
@@ -200,7 +125,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
200 | *pexpected_response_sequence_number = server->sequence_number++; | 125 | *pexpected_response_sequence_number = server->sequence_number++; |
201 | server->sequence_number++; | 126 | server->sequence_number++; |
202 | 127 | ||
203 | rc = cifs_calc_signature2(iov, n_vec, server, smb_signature); | 128 | rc = cifs_calc_signature(iov, n_vec, server, smb_signature); |
204 | if (rc) | 129 | if (rc) |
205 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); | 130 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); |
206 | else | 131 | else |
@@ -209,13 +134,27 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
209 | return rc; | 134 | return rc; |
210 | } | 135 | } |
211 | 136 | ||
212 | int cifs_verify_signature(struct smb_hdr *cifs_pdu, | 137 | /* must be called with server->srv_mutex held */ |
138 | int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | ||
139 | __u32 *pexpected_response_sequence_number) | ||
140 | { | ||
141 | struct kvec iov; | ||
142 | |||
143 | iov.iov_base = cifs_pdu; | ||
144 | iov.iov_len = be32_to_cpu(cifs_pdu->smb_buf_length) + 4; | ||
145 | |||
146 | return cifs_sign_smb2(&iov, 1, server, | ||
147 | pexpected_response_sequence_number); | ||
148 | } | ||
149 | |||
150 | int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, | ||
213 | struct TCP_Server_Info *server, | 151 | struct TCP_Server_Info *server, |
214 | __u32 expected_sequence_number) | 152 | __u32 expected_sequence_number) |
215 | { | 153 | { |
216 | unsigned int rc; | 154 | unsigned int rc; |
217 | char server_response_sig[8]; | 155 | char server_response_sig[8]; |
218 | char what_we_think_sig_should_be[20]; | 156 | char what_we_think_sig_should_be[20]; |
157 | struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; | ||
219 | 158 | ||
220 | if (cifs_pdu == NULL || server == NULL) | 159 | if (cifs_pdu == NULL || server == NULL) |
221 | return -EINVAL; | 160 | return -EINVAL; |
@@ -247,8 +186,8 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, | |||
247 | cifs_pdu->Signature.Sequence.Reserved = 0; | 186 | cifs_pdu->Signature.Sequence.Reserved = 0; |
248 | 187 | ||
249 | mutex_lock(&server->srv_mutex); | 188 | mutex_lock(&server->srv_mutex); |
250 | rc = cifs_calculate_signature(cifs_pdu, server, | 189 | rc = cifs_calc_signature(iov, nr_iov, server, |
251 | what_we_think_sig_should_be); | 190 | what_we_think_sig_should_be); |
252 | mutex_unlock(&server->srv_mutex); | 191 | mutex_unlock(&server->srv_mutex); |
253 | 192 | ||
254 | if (rc) | 193 | if (rc) |
@@ -351,9 +290,7 @@ static int | |||
351 | build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) | 290 | build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) |
352 | { | 291 | { |
353 | unsigned int dlen; | 292 | unsigned int dlen; |
354 | unsigned int wlen; | 293 | unsigned int size = 2 * sizeof(struct ntlmssp2_name); |
355 | unsigned int size = 6 * sizeof(struct ntlmssp2_name); | ||
356 | __le64 curtime; | ||
357 | char *defdmname = "WORKGROUP"; | 294 | char *defdmname = "WORKGROUP"; |
358 | unsigned char *blobptr; | 295 | unsigned char *blobptr; |
359 | struct ntlmssp2_name *attrptr; | 296 | struct ntlmssp2_name *attrptr; |
@@ -365,15 +302,14 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
365 | } | 302 | } |
366 | 303 | ||
367 | dlen = strlen(ses->domainName); | 304 | dlen = strlen(ses->domainName); |
368 | wlen = strlen(ses->server->hostname); | ||
369 | 305 | ||
370 | /* The length of this blob is a size which is | 306 | /* |
371 | * six times the size of a structure which holds name/size + | 307 | * The length of this blob is two times the size of a |
372 | * two times the unicode length of a domain name + | 308 | * structure (av pair) which holds name/size |
373 | * two times the unicode length of a server name + | 309 | * ( for NTLMSSP_AV_NB_DOMAIN_NAME followed by NTLMSSP_AV_EOL ) + |
374 | * size of a timestamp (which is 8 bytes). | 310 | * unicode length of a netbios domain name |
375 | */ | 311 | */ |
376 | ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8; | 312 | ses->auth_key.len = size + 2 * dlen; |
377 | ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); | 313 | ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); |
378 | if (!ses->auth_key.response) { | 314 | if (!ses->auth_key.response) { |
379 | ses->auth_key.len = 0; | 315 | ses->auth_key.len = 0; |
@@ -384,44 +320,15 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
384 | blobptr = ses->auth_key.response; | 320 | blobptr = ses->auth_key.response; |
385 | attrptr = (struct ntlmssp2_name *) blobptr; | 321 | attrptr = (struct ntlmssp2_name *) blobptr; |
386 | 322 | ||
323 | /* | ||
324 | * As defined in MS-NTLM 3.3.2, just this av pair field | ||
325 | * is sufficient as part of the temp | ||
326 | */ | ||
387 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); | 327 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); |
388 | attrptr->length = cpu_to_le16(2 * dlen); | 328 | attrptr->length = cpu_to_le16(2 * dlen); |
389 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | 329 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); |
390 | cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); | 330 | cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); |
391 | 331 | ||
392 | blobptr += 2 * dlen; | ||
393 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
394 | |||
395 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_COMPUTER_NAME); | ||
396 | attrptr->length = cpu_to_le16(2 * wlen); | ||
397 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
398 | cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp); | ||
399 | |||
400 | blobptr += 2 * wlen; | ||
401 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
402 | |||
403 | attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_DOMAIN_NAME); | ||
404 | attrptr->length = cpu_to_le16(2 * dlen); | ||
405 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
406 | cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); | ||
407 | |||
408 | blobptr += 2 * dlen; | ||
409 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
410 | |||
411 | attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_COMPUTER_NAME); | ||
412 | attrptr->length = cpu_to_le16(2 * wlen); | ||
413 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
414 | cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp); | ||
415 | |||
416 | blobptr += 2 * wlen; | ||
417 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
418 | |||
419 | attrptr->type = cpu_to_le16(NTLMSSP_AV_TIMESTAMP); | ||
420 | attrptr->length = cpu_to_le16(sizeof(__le64)); | ||
421 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
422 | curtime = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); | ||
423 | memcpy(blobptr, &curtime, sizeof(__le64)); | ||
424 | |||
425 | return 0; | 332 | return 0; |
426 | } | 333 | } |
427 | 334 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f93eb948d071..8f1fe324162b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -53,7 +53,7 @@ | |||
53 | int cifsFYI = 0; | 53 | int cifsFYI = 0; |
54 | int cifsERROR = 1; | 54 | int cifsERROR = 1; |
55 | int traceSMB = 0; | 55 | int traceSMB = 0; |
56 | unsigned int oplockEnabled = 1; | 56 | bool enable_oplocks = true; |
57 | unsigned int linuxExtEnabled = 1; | 57 | unsigned int linuxExtEnabled = 1; |
58 | unsigned int lookupCacheEnabled = 1; | 58 | unsigned int lookupCacheEnabled = 1; |
59 | unsigned int multiuser_mount = 0; | 59 | unsigned int multiuser_mount = 0; |
@@ -74,7 +74,7 @@ module_param(cifs_min_small, int, 0); | |||
74 | MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " | 74 | MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " |
75 | "Range: 2 to 256"); | 75 | "Range: 2 to 256"); |
76 | unsigned int cifs_max_pending = CIFS_MAX_REQ; | 76 | unsigned int cifs_max_pending = CIFS_MAX_REQ; |
77 | module_param(cifs_max_pending, int, 0); | 77 | module_param(cifs_max_pending, int, 0444); |
78 | MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " | 78 | MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " |
79 | "Default: 50 Range: 2 to 256"); | 79 | "Default: 50 Range: 2 to 256"); |
80 | unsigned short echo_retries = 5; | 80 | unsigned short echo_retries = 5; |
@@ -82,6 +82,10 @@ module_param(echo_retries, ushort, 0644); | |||
82 | MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and " | 82 | MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and " |
83 | "reconnecting server. Default: 5. 0 means " | 83 | "reconnecting server. Default: 5. 0 means " |
84 | "never reconnect."); | 84 | "never reconnect."); |
85 | module_param(enable_oplocks, bool, 0644); | ||
86 | MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:" | ||
87 | "y/Y/1"); | ||
88 | |||
85 | extern mempool_t *cifs_sm_req_poolp; | 89 | extern mempool_t *cifs_sm_req_poolp; |
86 | extern mempool_t *cifs_req_poolp; | 90 | extern mempool_t *cifs_req_poolp; |
87 | extern mempool_t *cifs_mid_poolp; | 91 | extern mempool_t *cifs_mid_poolp; |
@@ -132,12 +136,12 @@ cifs_read_super(struct super_block *sb) | |||
132 | else | 136 | else |
133 | sb->s_d_op = &cifs_dentry_ops; | 137 | sb->s_d_op = &cifs_dentry_ops; |
134 | 138 | ||
135 | #ifdef CIFS_NFSD_EXPORT | 139 | #ifdef CONFIG_CIFS_NFSD_EXPORT |
136 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { | 140 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { |
137 | cFYI(1, "export ops supported"); | 141 | cFYI(1, "export ops supported"); |
138 | sb->s_export_op = &cifs_export_ops; | 142 | sb->s_export_op = &cifs_export_ops; |
139 | } | 143 | } |
140 | #endif /* CIFS_NFSD_EXPORT */ | 144 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ |
141 | 145 | ||
142 | return 0; | 146 | return 0; |
143 | 147 | ||
@@ -432,6 +436,12 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) | |||
432 | seq_printf(s, ",mfsymlinks"); | 436 | seq_printf(s, ",mfsymlinks"); |
433 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) | 437 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) |
434 | seq_printf(s, ",fsc"); | 438 | seq_printf(s, ",fsc"); |
439 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC) | ||
440 | seq_printf(s, ",nostrictsync"); | ||
441 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) | ||
442 | seq_printf(s, ",noperm"); | ||
443 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) | ||
444 | seq_printf(s, ",strictcache"); | ||
435 | 445 | ||
436 | seq_printf(s, ",rsize=%d", cifs_sb->rsize); | 446 | seq_printf(s, ",rsize=%d", cifs_sb->rsize); |
437 | seq_printf(s, ",wsize=%d", cifs_sb->wsize); | 447 | seq_printf(s, ",wsize=%d", cifs_sb->wsize); |
@@ -530,7 +540,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
530 | char *full_path = NULL; | 540 | char *full_path = NULL; |
531 | char *s, *p; | 541 | char *s, *p; |
532 | char sep; | 542 | char sep; |
533 | int xid; | ||
534 | 543 | ||
535 | full_path = cifs_build_path_to_root(vol, cifs_sb, | 544 | full_path = cifs_build_path_to_root(vol, cifs_sb, |
536 | cifs_sb_master_tcon(cifs_sb)); | 545 | cifs_sb_master_tcon(cifs_sb)); |
@@ -539,7 +548,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
539 | 548 | ||
540 | cFYI(1, "Get root dentry for %s", full_path); | 549 | cFYI(1, "Get root dentry for %s", full_path); |
541 | 550 | ||
542 | xid = GetXid(); | ||
543 | sep = CIFS_DIR_SEP(cifs_sb); | 551 | sep = CIFS_DIR_SEP(cifs_sb); |
544 | dentry = dget(sb->s_root); | 552 | dentry = dget(sb->s_root); |
545 | p = s = full_path; | 553 | p = s = full_path; |
@@ -548,6 +556,12 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
548 | struct inode *dir = dentry->d_inode; | 556 | struct inode *dir = dentry->d_inode; |
549 | struct dentry *child; | 557 | struct dentry *child; |
550 | 558 | ||
559 | if (!dir) { | ||
560 | dput(dentry); | ||
561 | dentry = ERR_PTR(-ENOENT); | ||
562 | break; | ||
563 | } | ||
564 | |||
551 | /* skip separators */ | 565 | /* skip separators */ |
552 | while (*s == sep) | 566 | while (*s == sep) |
553 | s++; | 567 | s++; |
@@ -563,12 +577,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
563 | mutex_unlock(&dir->i_mutex); | 577 | mutex_unlock(&dir->i_mutex); |
564 | dput(dentry); | 578 | dput(dentry); |
565 | dentry = child; | 579 | dentry = child; |
566 | if (!dentry->d_inode) { | ||
567 | dput(dentry); | ||
568 | dentry = ERR_PTR(-ENOENT); | ||
569 | } | ||
570 | } while (!IS_ERR(dentry)); | 580 | } while (!IS_ERR(dentry)); |
571 | _FreeXid(xid); | ||
572 | kfree(full_path); | 581 | kfree(full_path); |
573 | return dentry; | 582 | return dentry; |
574 | } | 583 | } |
@@ -721,7 +730,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) | |||
721 | if (rc < 0) | 730 | if (rc < 0) |
722 | return (loff_t)rc; | 731 | return (loff_t)rc; |
723 | } | 732 | } |
724 | return generic_file_llseek_unlocked(file, offset, origin); | 733 | return generic_file_llseek(file, offset, origin); |
725 | } | 734 | } |
726 | 735 | ||
727 | static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) | 736 | static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) |
@@ -940,7 +949,8 @@ cifs_init_once(void *inode) | |||
940 | struct cifsInodeInfo *cifsi = inode; | 949 | struct cifsInodeInfo *cifsi = inode; |
941 | 950 | ||
942 | inode_init_once(&cifsi->vfs_inode); | 951 | inode_init_once(&cifsi->vfs_inode); |
943 | INIT_LIST_HEAD(&cifsi->lockList); | 952 | INIT_LIST_HEAD(&cifsi->llist); |
953 | mutex_init(&cifsi->lock_mutex); | ||
944 | } | 954 | } |
945 | 955 | ||
946 | static int | 956 | static int |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index cb71dc1f94d1..d9dbaf869cd1 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -121,9 +121,9 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); | |||
121 | extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); | 121 | extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); |
122 | extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | 122 | extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); |
123 | 123 | ||
124 | #ifdef CIFS_NFSD_EXPORT | 124 | #ifdef CONFIG_CIFS_NFSD_EXPORT |
125 | extern const struct export_operations cifs_export_ops; | 125 | extern const struct export_operations cifs_export_ops; |
126 | #endif /* CIFS_NFSD_EXPORT */ | 126 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ |
127 | 127 | ||
128 | #define CIFS_VERSION "1.74" | 128 | #define CIFS_VERSION "1.75" |
129 | #endif /* _CIFSFS_H */ | 129 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 38ce6d44b145..8238aa13e01c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -167,6 +167,8 @@ struct smb_vol { | |||
167 | uid_t cred_uid; | 167 | uid_t cred_uid; |
168 | uid_t linux_uid; | 168 | uid_t linux_uid; |
169 | gid_t linux_gid; | 169 | gid_t linux_gid; |
170 | uid_t backupuid; | ||
171 | gid_t backupgid; | ||
170 | mode_t file_mode; | 172 | mode_t file_mode; |
171 | mode_t dir_mode; | 173 | mode_t dir_mode; |
172 | unsigned secFlg; | 174 | unsigned secFlg; |
@@ -179,6 +181,8 @@ struct smb_vol { | |||
179 | bool noperm:1; | 181 | bool noperm:1; |
180 | bool no_psx_acl:1; /* set if posix acl support should be disabled */ | 182 | bool no_psx_acl:1; /* set if posix acl support should be disabled */ |
181 | bool cifs_acl:1; | 183 | bool cifs_acl:1; |
184 | bool backupuid_specified; /* mount option backupuid is specified */ | ||
185 | bool backupgid_specified; /* mount option backupgid is specified */ | ||
182 | bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ | 186 | bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ |
183 | bool server_ino:1; /* use inode numbers from server ie UniqueId */ | 187 | bool server_ino:1; /* use inode numbers from server ie UniqueId */ |
184 | bool direct_io:1; | 188 | bool direct_io:1; |
@@ -219,7 +223,8 @@ struct smb_vol { | |||
219 | CIFS_MOUNT_OVERR_GID | CIFS_MOUNT_DYNPERM | \ | 223 | CIFS_MOUNT_OVERR_GID | CIFS_MOUNT_DYNPERM | \ |
220 | CIFS_MOUNT_NOPOSIXBRL | CIFS_MOUNT_NOSSYNC | \ | 224 | CIFS_MOUNT_NOPOSIXBRL | CIFS_MOUNT_NOSSYNC | \ |
221 | CIFS_MOUNT_FSCACHE | CIFS_MOUNT_MF_SYMLINKS | \ | 225 | CIFS_MOUNT_FSCACHE | CIFS_MOUNT_MF_SYMLINKS | \ |
222 | CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO) | 226 | CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO | \ |
227 | CIFS_MOUNT_CIFS_BACKUPUID | CIFS_MOUNT_CIFS_BACKUPGID) | ||
223 | 228 | ||
224 | #define CIFS_MS_MASK (MS_RDONLY | MS_MANDLOCK | MS_NOEXEC | MS_NOSUID | \ | 229 | #define CIFS_MS_MASK (MS_RDONLY | MS_MANDLOCK | MS_NOEXEC | MS_NOSUID | \ |
225 | MS_NODEV | MS_SYNCHRONOUS) | 230 | MS_NODEV | MS_SYNCHRONOUS) |
@@ -286,12 +291,18 @@ struct TCP_Server_Info { | |||
286 | bool sec_kerberosu2u; /* supports U2U Kerberos */ | 291 | bool sec_kerberosu2u; /* supports U2U Kerberos */ |
287 | bool sec_kerberos; /* supports plain Kerberos */ | 292 | bool sec_kerberos; /* supports plain Kerberos */ |
288 | bool sec_mskerberos; /* supports legacy MS Kerberos */ | 293 | bool sec_mskerberos; /* supports legacy MS Kerberos */ |
294 | bool large_buf; /* is current buffer large? */ | ||
289 | struct delayed_work echo; /* echo ping workqueue job */ | 295 | struct delayed_work echo; /* echo ping workqueue job */ |
296 | struct kvec *iov; /* reusable kvec array for receives */ | ||
297 | unsigned int nr_iov; /* number of kvecs in array */ | ||
298 | char *smallbuf; /* pointer to current "small" buffer */ | ||
299 | char *bigbuf; /* pointer to current "big" buffer */ | ||
300 | unsigned int total_read; /* total amount of data read in this pass */ | ||
290 | #ifdef CONFIG_CIFS_FSCACHE | 301 | #ifdef CONFIG_CIFS_FSCACHE |
291 | struct fscache_cookie *fscache; /* client index cache cookie */ | 302 | struct fscache_cookie *fscache; /* client index cache cookie */ |
292 | #endif | 303 | #endif |
293 | #ifdef CONFIG_CIFS_STATS2 | 304 | #ifdef CONFIG_CIFS_STATS2 |
294 | atomic_t inSend; /* requests trying to send */ | 305 | atomic_t in_send; /* requests trying to send */ |
295 | atomic_t num_waiters; /* blocked waiting to get in sendrecv */ | 306 | atomic_t num_waiters; /* blocked waiting to get in sendrecv */ |
296 | #endif | 307 | #endif |
297 | }; | 308 | }; |
@@ -485,9 +496,13 @@ extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); | |||
485 | */ | 496 | */ |
486 | struct cifsLockInfo { | 497 | struct cifsLockInfo { |
487 | struct list_head llist; /* pointer to next cifsLockInfo */ | 498 | struct list_head llist; /* pointer to next cifsLockInfo */ |
499 | struct list_head blist; /* pointer to locks blocked on this */ | ||
500 | wait_queue_head_t block_q; | ||
488 | __u64 offset; | 501 | __u64 offset; |
489 | __u64 length; | 502 | __u64 length; |
503 | __u32 pid; | ||
490 | __u8 type; | 504 | __u8 type; |
505 | __u16 netfid; | ||
491 | }; | 506 | }; |
492 | 507 | ||
493 | /* | 508 | /* |
@@ -520,8 +535,6 @@ struct cifsFileInfo { | |||
520 | struct dentry *dentry; | 535 | struct dentry *dentry; |
521 | unsigned int f_flags; | 536 | unsigned int f_flags; |
522 | struct tcon_link *tlink; | 537 | struct tcon_link *tlink; |
523 | struct mutex lock_mutex; | ||
524 | struct list_head llist; /* list of byte range locks we have. */ | ||
525 | bool invalidHandle:1; /* file closed via session abend */ | 538 | bool invalidHandle:1; /* file closed via session abend */ |
526 | bool oplock_break_cancelled:1; | 539 | bool oplock_break_cancelled:1; |
527 | int count; /* refcount protected by cifs_file_list_lock */ | 540 | int count; /* refcount protected by cifs_file_list_lock */ |
@@ -554,7 +567,9 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file); | |||
554 | */ | 567 | */ |
555 | 568 | ||
556 | struct cifsInodeInfo { | 569 | struct cifsInodeInfo { |
557 | struct list_head lockList; | 570 | struct list_head llist; /* brlocks for this inode */ |
571 | bool can_cache_brlcks; | ||
572 | struct mutex lock_mutex; /* protect two fields above */ | ||
558 | /* BB add in lists for dirty pages i.e. write caching info for oplock */ | 573 | /* BB add in lists for dirty pages i.e. write caching info for oplock */ |
559 | struct list_head openFileList; | 574 | struct list_head openFileList; |
560 | __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ | 575 | __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ |
@@ -643,8 +658,24 @@ static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon, | |||
643 | struct mid_q_entry; | 658 | struct mid_q_entry; |
644 | 659 | ||
645 | /* | 660 | /* |
646 | * This is the prototype for the mid callback function. When creating one, | 661 | * This is the prototype for the mid receive function. This function is for |
647 | * take special care to avoid deadlocks. Things to bear in mind: | 662 | * receiving the rest of the SMB frame, starting with the WordCount (which is |
663 | * just after the MID in struct smb_hdr). Note: | ||
664 | * | ||
665 | * - This will be called by cifsd, with no locks held. | ||
666 | * - The mid will still be on the pending_mid_q. | ||
667 | * - mid->resp_buf will point to the current buffer. | ||
668 | * | ||
669 | * Returns zero on a successful receive, or an error. The receive state in | ||
670 | * the TCP_Server_Info will also be updated. | ||
671 | */ | ||
672 | typedef int (mid_receive_t)(struct TCP_Server_Info *server, | ||
673 | struct mid_q_entry *mid); | ||
674 | |||
675 | /* | ||
676 | * This is the prototype for the mid callback function. This is called once the | ||
677 | * mid has been received off of the socket. When creating one, take special | ||
678 | * care to avoid deadlocks. Things to bear in mind: | ||
648 | * | 679 | * |
649 | * - it will be called by cifsd, with no locks held | 680 | * - it will be called by cifsd, with no locks held |
650 | * - the mid will be removed from any lists | 681 | * - the mid will be removed from any lists |
@@ -662,9 +693,10 @@ struct mid_q_entry { | |||
662 | unsigned long when_sent; /* time when smb send finished */ | 693 | unsigned long when_sent; /* time when smb send finished */ |
663 | unsigned long when_received; /* when demux complete (taken off wire) */ | 694 | unsigned long when_received; /* when demux complete (taken off wire) */ |
664 | #endif | 695 | #endif |
696 | mid_receive_t *receive; /* call receive callback */ | ||
665 | mid_callback_t *callback; /* call completion callback */ | 697 | mid_callback_t *callback; /* call completion callback */ |
666 | void *callback_data; /* general purpose pointer for callback */ | 698 | void *callback_data; /* general purpose pointer for callback */ |
667 | struct smb_hdr *resp_buf; /* response buffer */ | 699 | struct smb_hdr *resp_buf; /* pointer to received SMB header */ |
668 | int midState; /* wish this were enum but can not pass to wait_event */ | 700 | int midState; /* wish this were enum but can not pass to wait_event */ |
669 | __u8 command; /* smb command code */ | 701 | __u8 command; /* smb command code */ |
670 | bool largeBuf:1; /* if valid response, is pointer to large buf */ | 702 | bool largeBuf:1; /* if valid response, is pointer to large buf */ |
@@ -672,12 +704,54 @@ struct mid_q_entry { | |||
672 | bool multiEnd:1; /* both received */ | 704 | bool multiEnd:1; /* both received */ |
673 | }; | 705 | }; |
674 | 706 | ||
675 | struct oplock_q_entry { | 707 | /* Make code in transport.c a little cleaner by moving |
676 | struct list_head qhead; | 708 | update of optional stats into function below */ |
677 | struct inode *pinode; | 709 | #ifdef CONFIG_CIFS_STATS2 |
678 | struct cifs_tcon *tcon; | 710 | |
679 | __u16 netfid; | 711 | static inline void cifs_in_send_inc(struct TCP_Server_Info *server) |
680 | }; | 712 | { |
713 | atomic_inc(&server->in_send); | ||
714 | } | ||
715 | |||
716 | static inline void cifs_in_send_dec(struct TCP_Server_Info *server) | ||
717 | { | ||
718 | atomic_dec(&server->in_send); | ||
719 | } | ||
720 | |||
721 | static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) | ||
722 | { | ||
723 | atomic_inc(&server->num_waiters); | ||
724 | } | ||
725 | |||
726 | static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) | ||
727 | { | ||
728 | atomic_dec(&server->num_waiters); | ||
729 | } | ||
730 | |||
731 | static inline void cifs_save_when_sent(struct mid_q_entry *mid) | ||
732 | { | ||
733 | mid->when_sent = jiffies; | ||
734 | } | ||
735 | #else | ||
736 | static inline void cifs_in_send_inc(struct TCP_Server_Info *server) | ||
737 | { | ||
738 | } | ||
739 | static inline void cifs_in_send_dec(struct TCP_Server_Info *server) | ||
740 | { | ||
741 | } | ||
742 | |||
743 | static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) | ||
744 | { | ||
745 | } | ||
746 | |||
747 | static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) | ||
748 | { | ||
749 | } | ||
750 | |||
751 | static inline void cifs_save_when_sent(struct mid_q_entry *mid) | ||
752 | { | ||
753 | } | ||
754 | #endif | ||
681 | 755 | ||
682 | /* for pending dnotify requests */ | 756 | /* for pending dnotify requests */ |
683 | struct dir_notify_req { | 757 | struct dir_notify_req { |
@@ -922,7 +996,8 @@ GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions | |||
922 | to be established on existing mount if we | 996 | to be established on existing mount if we |
923 | have the uid/password or Kerberos credential | 997 | have the uid/password or Kerberos credential |
924 | or equivalent for current user */ | 998 | or equivalent for current user */ |
925 | GLOBAL_EXTERN unsigned int oplockEnabled; | 999 | /* enable or disable oplocks */ |
1000 | GLOBAL_EXTERN bool enable_oplocks; | ||
926 | GLOBAL_EXTERN unsigned int lookupCacheEnabled; | 1001 | GLOBAL_EXTERN unsigned int lookupCacheEnabled; |
927 | GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent | 1002 | GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent |
928 | with more secure ntlmssp2 challenge/resp */ | 1003 | with more secure ntlmssp2 challenge/resp */ |
@@ -936,10 +1011,16 @@ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ | |||
936 | /* reconnect after this many failed echo attempts */ | 1011 | /* reconnect after this many failed echo attempts */ |
937 | GLOBAL_EXTERN unsigned short echo_retries; | 1012 | GLOBAL_EXTERN unsigned short echo_retries; |
938 | 1013 | ||
1014 | #ifdef CONFIG_CIFS_ACL | ||
939 | GLOBAL_EXTERN struct rb_root uidtree; | 1015 | GLOBAL_EXTERN struct rb_root uidtree; |
940 | GLOBAL_EXTERN struct rb_root gidtree; | 1016 | GLOBAL_EXTERN struct rb_root gidtree; |
941 | GLOBAL_EXTERN spinlock_t siduidlock; | 1017 | GLOBAL_EXTERN spinlock_t siduidlock; |
942 | GLOBAL_EXTERN spinlock_t sidgidlock; | 1018 | GLOBAL_EXTERN spinlock_t sidgidlock; |
1019 | GLOBAL_EXTERN struct rb_root siduidtree; | ||
1020 | GLOBAL_EXTERN struct rb_root sidgidtree; | ||
1021 | GLOBAL_EXTERN spinlock_t uidsidlock; | ||
1022 | GLOBAL_EXTERN spinlock_t gidsidlock; | ||
1023 | #endif /* CONFIG_CIFS_ACL */ | ||
943 | 1024 | ||
944 | void cifs_oplock_break(struct work_struct *work); | 1025 | void cifs_oplock_break(struct work_struct *work); |
945 | 1026 | ||
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index de3aa285de03..3fb03e2c8e86 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
@@ -1089,9 +1089,7 @@ typedef struct smb_com_read_rsp { | |||
1089 | __le16 DataLengthHigh; | 1089 | __le16 DataLengthHigh; |
1090 | __u64 Reserved2; | 1090 | __u64 Reserved2; |
1091 | __u16 ByteCount; | 1091 | __u16 ByteCount; |
1092 | __u8 Pad; /* BB check for whether padded to DWORD | 1092 | /* read response data immediately follows */ |
1093 | boundary and optimum performance here */ | ||
1094 | char Data[1]; | ||
1095 | } __attribute__((packed)) READ_RSP; | 1093 | } __attribute__((packed)) READ_RSP; |
1096 | 1094 | ||
1097 | typedef struct locking_andx_range { | 1095 | typedef struct locking_andx_range { |
@@ -1913,6 +1911,10 @@ typedef struct whoami_rsp_data { /* Query level 0x202 */ | |||
1913 | 1911 | ||
1914 | /* SETFSInfo Levels */ | 1912 | /* SETFSInfo Levels */ |
1915 | #define SMB_SET_CIFS_UNIX_INFO 0x200 | 1913 | #define SMB_SET_CIFS_UNIX_INFO 0x200 |
1914 | /* level 0x203 is defined above in list of QFS info levels */ | ||
1915 | /* #define SMB_REQUEST_TRANSPORT_ENCRYPTION 0x203 */ | ||
1916 | |||
1917 | /* Level 0x200 request structure follows */ | ||
1916 | typedef struct smb_com_transaction2_setfsi_req { | 1918 | typedef struct smb_com_transaction2_setfsi_req { |
1917 | struct smb_hdr hdr; /* wct = 15 */ | 1919 | struct smb_hdr hdr; /* wct = 15 */ |
1918 | __le16 TotalParameterCount; | 1920 | __le16 TotalParameterCount; |
@@ -1940,13 +1942,39 @@ typedef struct smb_com_transaction2_setfsi_req { | |||
1940 | __le64 ClientUnixCap; /* Data end */ | 1942 | __le64 ClientUnixCap; /* Data end */ |
1941 | } __attribute__((packed)) TRANSACTION2_SETFSI_REQ; | 1943 | } __attribute__((packed)) TRANSACTION2_SETFSI_REQ; |
1942 | 1944 | ||
1945 | /* level 0x203 request structure follows */ | ||
1946 | typedef struct smb_com_transaction2_setfs_enc_req { | ||
1947 | struct smb_hdr hdr; /* wct = 15 */ | ||
1948 | __le16 TotalParameterCount; | ||
1949 | __le16 TotalDataCount; | ||
1950 | __le16 MaxParameterCount; | ||
1951 | __le16 MaxDataCount; | ||
1952 | __u8 MaxSetupCount; | ||
1953 | __u8 Reserved; | ||
1954 | __le16 Flags; | ||
1955 | __le32 Timeout; | ||
1956 | __u16 Reserved2; | ||
1957 | __le16 ParameterCount; /* 4 */ | ||
1958 | __le16 ParameterOffset; | ||
1959 | __le16 DataCount; /* 12 */ | ||
1960 | __le16 DataOffset; | ||
1961 | __u8 SetupCount; /* one */ | ||
1962 | __u8 Reserved3; | ||
1963 | __le16 SubCommand; /* TRANS2_SET_FS_INFORMATION */ | ||
1964 | __le16 ByteCount; | ||
1965 | __u8 Pad; | ||
1966 | __u16 Reserved4; /* Parameters start. */ | ||
1967 | __le16 InformationLevel;/* Parameters end. */ | ||
1968 | /* NTLMSSP Blob, Data start. */ | ||
1969 | } __attribute__((packed)) TRANSACTION2_SETFSI_ENC_REQ; | ||
1970 | |||
1971 | /* response for setfsinfo levels 0x200 and 0x203 */ | ||
1943 | typedef struct smb_com_transaction2_setfsi_rsp { | 1972 | typedef struct smb_com_transaction2_setfsi_rsp { |
1944 | struct smb_hdr hdr; /* wct = 10 */ | 1973 | struct smb_hdr hdr; /* wct = 10 */ |
1945 | struct trans2_resp t2; | 1974 | struct trans2_resp t2; |
1946 | __u16 ByteCount; | 1975 | __u16 ByteCount; |
1947 | } __attribute__((packed)) TRANSACTION2_SETFSI_RSP; | 1976 | } __attribute__((packed)) TRANSACTION2_SETFSI_RSP; |
1948 | 1977 | ||
1949 | |||
1950 | typedef struct smb_com_transaction2_get_dfs_refer_req { | 1978 | typedef struct smb_com_transaction2_get_dfs_refer_req { |
1951 | struct smb_hdr hdr; /* wct = 15 */ | 1979 | struct smb_hdr hdr; /* wct = 15 */ |
1952 | __le16 TotalParameterCount; | 1980 | __le16 TotalParameterCount; |
@@ -2098,13 +2126,13 @@ typedef struct { | |||
2098 | #define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and | 2126 | #define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and |
2099 | QFS PROXY call */ | 2127 | QFS PROXY call */ |
2100 | #ifdef CONFIG_CIFS_POSIX | 2128 | #ifdef CONFIG_CIFS_POSIX |
2101 | /* Can not set pathnames cap yet until we send new posix create SMB since | 2129 | /* presumably don't need the 0x20 POSIX_PATH_OPS_CAP since we never send |
2102 | otherwise server can treat such handles opened with older ntcreatex | 2130 | LockingX instead of posix locking call on unix sess (and we do not expect |
2103 | (by a new client which knows how to send posix path ops) | 2131 | LockingX to use different (ie Windows) semantics than posix locking on |
2104 | as non-posix handles (can affect write behavior with byte range locks. | 2132 | the same session (if WINE needs to do this later, we can add this cap |
2105 | We can add back in POSIX_PATH_OPS cap when Posix Create/Mkdir finished */ | 2133 | back in later */ |
2106 | /* #define CIFS_UNIX_CAP_MASK 0x000000fb */ | 2134 | /* #define CIFS_UNIX_CAP_MASK 0x000000fb */ |
2107 | #define CIFS_UNIX_CAP_MASK 0x000000db | 2135 | #define CIFS_UNIX_CAP_MASK 0x000003db |
2108 | #else | 2136 | #else |
2109 | #define CIFS_UNIX_CAP_MASK 0x00000013 | 2137 | #define CIFS_UNIX_CAP_MASK 0x00000013 |
2110 | #endif /* CONFIG_CIFS_POSIX */ | 2138 | #endif /* CONFIG_CIFS_POSIX */ |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 8df28e925e5b..ef4f631e4c01 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -69,8 +69,9 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, | |||
69 | struct TCP_Server_Info *server); | 69 | struct TCP_Server_Info *server); |
70 | extern void DeleteMidQEntry(struct mid_q_entry *midEntry); | 70 | extern void DeleteMidQEntry(struct mid_q_entry *midEntry); |
71 | extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | 71 | extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, |
72 | unsigned int nvec, mid_callback_t *callback, | 72 | unsigned int nvec, mid_receive_t *receive, |
73 | void *cbdata, bool ignore_pend); | 73 | mid_callback_t *callback, void *cbdata, |
74 | bool ignore_pend); | ||
74 | extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, | 75 | extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, |
75 | struct smb_hdr * /* input */ , | 76 | struct smb_hdr * /* input */ , |
76 | struct smb_hdr * /* out */ , | 77 | struct smb_hdr * /* out */ , |
@@ -90,6 +91,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid, | |||
90 | extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); | 91 | extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); |
91 | extern bool is_valid_oplock_break(struct smb_hdr *smb, | 92 | extern bool is_valid_oplock_break(struct smb_hdr *smb, |
92 | struct TCP_Server_Info *); | 93 | struct TCP_Server_Info *); |
94 | extern bool backup_cred(struct cifs_sb_info *); | ||
93 | extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); | 95 | extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); |
94 | extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, | 96 | extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, |
95 | unsigned int bytes_written); | 97 | unsigned int bytes_written); |
@@ -145,12 +147,19 @@ extern int cifs_get_inode_info_unix(struct inode **pinode, | |||
145 | extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, | 147 | extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, |
146 | struct cifs_fattr *fattr, struct inode *inode, | 148 | struct cifs_fattr *fattr, struct inode *inode, |
147 | const char *path, const __u16 *pfid); | 149 | const char *path, const __u16 *pfid); |
148 | extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64); | 150 | extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64, |
151 | uid_t, gid_t); | ||
149 | extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, | 152 | extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, |
150 | const char *, u32 *); | 153 | const char *, u32 *); |
151 | extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, | 154 | extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, |
152 | const char *); | 155 | const char *, int); |
153 | 156 | ||
157 | extern void dequeue_mid(struct mid_q_entry *mid, bool malformed); | ||
158 | extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, | ||
159 | unsigned int to_read); | ||
160 | extern int cifs_readv_from_socket(struct TCP_Server_Info *server, | ||
161 | struct kvec *iov_orig, unsigned int nr_segs, | ||
162 | unsigned int to_read); | ||
154 | extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | 163 | extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, |
155 | struct cifs_sb_info *cifs_sb); | 164 | struct cifs_sb_info *cifs_sb); |
156 | extern int cifs_match_super(struct super_block *, void *); | 165 | extern int cifs_match_super(struct super_block *, void *); |
@@ -359,14 +368,17 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifs_tcon *tcon, | |||
359 | const struct nls_table *nls_codepage, | 368 | const struct nls_table *nls_codepage, |
360 | int remap_special_chars); | 369 | int remap_special_chars); |
361 | 370 | ||
371 | extern int cifs_lockv(const int xid, struct cifs_tcon *tcon, const __u16 netfid, | ||
372 | const __u8 lock_type, const __u32 num_unlock, | ||
373 | const __u32 num_lock, LOCKING_ANDX_RANGE *buf); | ||
362 | extern int CIFSSMBLock(const int xid, struct cifs_tcon *tcon, | 374 | extern int CIFSSMBLock(const int xid, struct cifs_tcon *tcon, |
363 | const __u16 netfid, const __u64 len, | 375 | const __u16 netfid, const __u32 netpid, const __u64 len, |
364 | const __u64 offset, const __u32 numUnlock, | 376 | const __u64 offset, const __u32 numUnlock, |
365 | const __u32 numLock, const __u8 lockType, | 377 | const __u32 numLock, const __u8 lockType, |
366 | const bool waitFlag, const __u8 oplock_level); | 378 | const bool waitFlag, const __u8 oplock_level); |
367 | extern int CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, | 379 | extern int CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, |
368 | const __u16 smb_file_id, const int get_flag, | 380 | const __u16 smb_file_id, const __u32 netpid, |
369 | const __u64 len, struct file_lock *, | 381 | const int get_flag, const __u64 len, struct file_lock *, |
370 | const __u16 lock_type, const bool waitFlag); | 382 | const __u16 lock_type, const bool waitFlag); |
371 | extern int CIFSSMBTDis(const int xid, struct cifs_tcon *tcon); | 383 | extern int CIFSSMBTDis(const int xid, struct cifs_tcon *tcon); |
372 | extern int CIFSSMBEcho(struct TCP_Server_Info *server); | 384 | extern int CIFSSMBEcho(struct TCP_Server_Info *server); |
@@ -380,7 +392,7 @@ extern void tconInfoFree(struct cifs_tcon *); | |||
380 | extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); | 392 | extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); |
381 | extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, | 393 | extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, |
382 | __u32 *); | 394 | __u32 *); |
383 | extern int cifs_verify_signature(struct smb_hdr *, | 395 | extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, |
384 | struct TCP_Server_Info *server, | 396 | struct TCP_Server_Info *server, |
385 | __u32 expected_sequence_number); | 397 | __u32 expected_sequence_number); |
386 | extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *); | 398 | extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *); |
@@ -419,7 +431,7 @@ extern int CIFSSMBSetEA(const int xid, struct cifs_tcon *tcon, | |||
419 | extern int CIFSSMBGetCIFSACL(const int xid, struct cifs_tcon *tcon, | 431 | extern int CIFSSMBGetCIFSACL(const int xid, struct cifs_tcon *tcon, |
420 | __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen); | 432 | __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen); |
421 | extern int CIFSSMBSetCIFSACL(const int, struct cifs_tcon *, __u16, | 433 | extern int CIFSSMBSetCIFSACL(const int, struct cifs_tcon *, __u16, |
422 | struct cifs_ntsd *, __u32); | 434 | struct cifs_ntsd *, __u32, int); |
423 | extern int CIFSSMBGetPosixACL(const int xid, struct cifs_tcon *tcon, | 435 | extern int CIFSSMBGetPosixACL(const int xid, struct cifs_tcon *tcon, |
424 | const unsigned char *searchName, | 436 | const unsigned char *searchName, |
425 | char *acl_inf, const int buflen, const int acl_type, | 437 | char *acl_inf, const int buflen, const int acl_type, |
@@ -440,6 +452,24 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); | |||
440 | extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, | 452 | extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, |
441 | unsigned char *p24); | 453 | unsigned char *p24); |
442 | 454 | ||
455 | /* asynchronous read support */ | ||
456 | struct cifs_readdata { | ||
457 | struct cifsFileInfo *cfile; | ||
458 | struct address_space *mapping; | ||
459 | __u64 offset; | ||
460 | unsigned int bytes; | ||
461 | pid_t pid; | ||
462 | int result; | ||
463 | struct list_head pages; | ||
464 | struct work_struct work; | ||
465 | unsigned int nr_iov; | ||
466 | struct kvec iov[1]; | ||
467 | }; | ||
468 | |||
469 | struct cifs_readdata *cifs_readdata_alloc(unsigned int nr_pages); | ||
470 | void cifs_readdata_free(struct cifs_readdata *rdata); | ||
471 | int cifs_async_readv(struct cifs_readdata *rdata); | ||
472 | |||
443 | /* asynchronous write support */ | 473 | /* asynchronous write support */ |
444 | struct cifs_writedata { | 474 | struct cifs_writedata { |
445 | struct kref refcount; | 475 | struct kref refcount; |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index aac37d99a487..6600aa2d2ef3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -33,6 +33,8 @@ | |||
33 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
34 | #include <linux/posix_acl_xattr.h> | 34 | #include <linux/posix_acl_xattr.h> |
35 | #include <linux/pagemap.h> | 35 | #include <linux/pagemap.h> |
36 | #include <linux/swap.h> | ||
37 | #include <linux/task_io_accounting_ops.h> | ||
36 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
37 | #include "cifspdu.h" | 39 | #include "cifspdu.h" |
38 | #include "cifsglob.h" | 40 | #include "cifsglob.h" |
@@ -40,6 +42,7 @@ | |||
40 | #include "cifsproto.h" | 42 | #include "cifsproto.h" |
41 | #include "cifs_unicode.h" | 43 | #include "cifs_unicode.h" |
42 | #include "cifs_debug.h" | 44 | #include "cifs_debug.h" |
45 | #include "fscache.h" | ||
43 | 46 | ||
44 | #ifdef CONFIG_CIFS_POSIX | 47 | #ifdef CONFIG_CIFS_POSIX |
45 | static struct { | 48 | static struct { |
@@ -83,6 +86,9 @@ static struct { | |||
83 | #endif /* CONFIG_CIFS_WEAK_PW_HASH */ | 86 | #endif /* CONFIG_CIFS_WEAK_PW_HASH */ |
84 | #endif /* CIFS_POSIX */ | 87 | #endif /* CIFS_POSIX */ |
85 | 88 | ||
89 | /* Forward declarations */ | ||
90 | static void cifs_readv_complete(struct work_struct *work); | ||
91 | |||
86 | /* Mark as invalid, all open files on tree connections since they | 92 | /* Mark as invalid, all open files on tree connections since they |
87 | were closed when session to server was lost */ | 93 | were closed when session to server was lost */ |
88 | static void mark_open_files_invalid(struct cifs_tcon *pTcon) | 94 | static void mark_open_files_invalid(struct cifs_tcon *pTcon) |
@@ -453,8 +459,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) | |||
453 | } | 459 | } |
454 | server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode); | 460 | server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode); |
455 | server->maxReq = le16_to_cpu(rsp->MaxMpxCount); | 461 | server->maxReq = le16_to_cpu(rsp->MaxMpxCount); |
456 | server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize), | 462 | server->maxBuf = le16_to_cpu(rsp->MaxBufSize); |
457 | (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); | ||
458 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); | 463 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); |
459 | /* even though we do not use raw we might as well set this | 464 | /* even though we do not use raw we might as well set this |
460 | accurately, in case we ever find a need for it */ | 465 | accurately, in case we ever find a need for it */ |
@@ -561,8 +566,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) | |||
561 | little endian */ | 566 | little endian */ |
562 | server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); | 567 | server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); |
563 | /* probably no need to store and check maxvcs */ | 568 | /* probably no need to store and check maxvcs */ |
564 | server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize), | 569 | server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); |
565 | (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); | ||
566 | server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); | 570 | server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); |
567 | cFYI(DBG2, "Max buf = %d", ses->server->maxBuf); | 571 | cFYI(DBG2, "Max buf = %d", ses->server->maxBuf); |
568 | server->capabilities = le32_to_cpu(pSMBr->Capabilities); | 572 | server->capabilities = le32_to_cpu(pSMBr->Capabilities); |
@@ -739,7 +743,8 @@ CIFSSMBEcho(struct TCP_Server_Info *server) | |||
739 | iov.iov_base = smb; | 743 | iov.iov_base = smb; |
740 | iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; | 744 | iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; |
741 | 745 | ||
742 | rc = cifs_call_async(server, &iov, 1, cifs_echo_callback, server, true); | 746 | rc = cifs_call_async(server, &iov, 1, NULL, cifs_echo_callback, |
747 | server, true); | ||
743 | if (rc) | 748 | if (rc) |
744 | cFYI(1, "Echo request failed: %d", rc); | 749 | cFYI(1, "Echo request failed: %d", rc); |
745 | 750 | ||
@@ -1376,6 +1381,359 @@ openRetry: | |||
1376 | return rc; | 1381 | return rc; |
1377 | } | 1382 | } |
1378 | 1383 | ||
1384 | struct cifs_readdata * | ||
1385 | cifs_readdata_alloc(unsigned int nr_pages) | ||
1386 | { | ||
1387 | struct cifs_readdata *rdata; | ||
1388 | |||
1389 | /* readdata + 1 kvec for each page */ | ||
1390 | rdata = kzalloc(sizeof(*rdata) + | ||
1391 | sizeof(struct kvec) * nr_pages, GFP_KERNEL); | ||
1392 | if (rdata != NULL) { | ||
1393 | INIT_WORK(&rdata->work, cifs_readv_complete); | ||
1394 | INIT_LIST_HEAD(&rdata->pages); | ||
1395 | } | ||
1396 | return rdata; | ||
1397 | } | ||
1398 | |||
1399 | void | ||
1400 | cifs_readdata_free(struct cifs_readdata *rdata) | ||
1401 | { | ||
1402 | cifsFileInfo_put(rdata->cfile); | ||
1403 | kfree(rdata); | ||
1404 | } | ||
1405 | |||
1406 | /* | ||
1407 | * Discard any remaining data in the current SMB. To do this, we borrow the | ||
1408 | * current bigbuf. | ||
1409 | */ | ||
1410 | static int | ||
1411 | cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) | ||
1412 | { | ||
1413 | READ_RSP *rsp = (READ_RSP *)server->smallbuf; | ||
1414 | unsigned int rfclen = be32_to_cpu(rsp->hdr.smb_buf_length); | ||
1415 | int remaining = rfclen + 4 - server->total_read; | ||
1416 | struct cifs_readdata *rdata = mid->callback_data; | ||
1417 | |||
1418 | while (remaining > 0) { | ||
1419 | int length; | ||
1420 | |||
1421 | length = cifs_read_from_socket(server, server->bigbuf, | ||
1422 | min_t(unsigned int, remaining, | ||
1423 | CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)); | ||
1424 | if (length < 0) | ||
1425 | return length; | ||
1426 | server->total_read += length; | ||
1427 | remaining -= length; | ||
1428 | } | ||
1429 | |||
1430 | dequeue_mid(mid, rdata->result); | ||
1431 | return 0; | ||
1432 | } | ||
1433 | |||
1434 | static int | ||
1435 | cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) | ||
1436 | { | ||
1437 | int length, len; | ||
1438 | unsigned int data_offset, remaining, data_len; | ||
1439 | struct cifs_readdata *rdata = mid->callback_data; | ||
1440 | READ_RSP *rsp = (READ_RSP *)server->smallbuf; | ||
1441 | unsigned int rfclen = be32_to_cpu(rsp->hdr.smb_buf_length) + 4; | ||
1442 | u64 eof; | ||
1443 | pgoff_t eof_index; | ||
1444 | struct page *page, *tpage; | ||
1445 | |||
1446 | cFYI(1, "%s: mid=%u offset=%llu bytes=%u", __func__, | ||
1447 | mid->mid, rdata->offset, rdata->bytes); | ||
1448 | |||
1449 | /* | ||
1450 | * read the rest of READ_RSP header (sans Data array), or whatever we | ||
1451 | * can if there's not enough data. At this point, we've read down to | ||
1452 | * the Mid. | ||
1453 | */ | ||
1454 | len = min_t(unsigned int, rfclen, sizeof(*rsp)) - | ||
1455 | sizeof(struct smb_hdr) + 1; | ||
1456 | |||
1457 | rdata->iov[0].iov_base = server->smallbuf + sizeof(struct smb_hdr) - 1; | ||
1458 | rdata->iov[0].iov_len = len; | ||
1459 | |||
1460 | length = cifs_readv_from_socket(server, rdata->iov, 1, len); | ||
1461 | if (length < 0) | ||
1462 | return length; | ||
1463 | server->total_read += length; | ||
1464 | |||
1465 | /* Was the SMB read successful? */ | ||
1466 | rdata->result = map_smb_to_linux_error(&rsp->hdr, false); | ||
1467 | if (rdata->result != 0) { | ||
1468 | cFYI(1, "%s: server returned error %d", __func__, | ||
1469 | rdata->result); | ||
1470 | return cifs_readv_discard(server, mid); | ||
1471 | } | ||
1472 | |||
1473 | /* Is there enough to get to the rest of the READ_RSP header? */ | ||
1474 | if (server->total_read < sizeof(READ_RSP)) { | ||
1475 | cFYI(1, "%s: server returned short header. got=%u expected=%zu", | ||
1476 | __func__, server->total_read, sizeof(READ_RSP)); | ||
1477 | rdata->result = -EIO; | ||
1478 | return cifs_readv_discard(server, mid); | ||
1479 | } | ||
1480 | |||
1481 | data_offset = le16_to_cpu(rsp->DataOffset) + 4; | ||
1482 | if (data_offset < server->total_read) { | ||
1483 | /* | ||
1484 | * win2k8 sometimes sends an offset of 0 when the read | ||
1485 | * is beyond the EOF. Treat it as if the data starts just after | ||
1486 | * the header. | ||
1487 | */ | ||
1488 | cFYI(1, "%s: data offset (%u) inside read response header", | ||
1489 | __func__, data_offset); | ||
1490 | data_offset = server->total_read; | ||
1491 | } else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) { | ||
1492 | /* data_offset is beyond the end of smallbuf */ | ||
1493 | cFYI(1, "%s: data offset (%u) beyond end of smallbuf", | ||
1494 | __func__, data_offset); | ||
1495 | rdata->result = -EIO; | ||
1496 | return cifs_readv_discard(server, mid); | ||
1497 | } | ||
1498 | |||
1499 | cFYI(1, "%s: total_read=%u data_offset=%u", __func__, | ||
1500 | server->total_read, data_offset); | ||
1501 | |||
1502 | len = data_offset - server->total_read; | ||
1503 | if (len > 0) { | ||
1504 | /* read any junk before data into the rest of smallbuf */ | ||
1505 | rdata->iov[0].iov_base = server->smallbuf + server->total_read; | ||
1506 | rdata->iov[0].iov_len = len; | ||
1507 | length = cifs_readv_from_socket(server, rdata->iov, 1, len); | ||
1508 | if (length < 0) | ||
1509 | return length; | ||
1510 | server->total_read += length; | ||
1511 | } | ||
1512 | |||
1513 | /* set up first iov for signature check */ | ||
1514 | rdata->iov[0].iov_base = server->smallbuf; | ||
1515 | rdata->iov[0].iov_len = server->total_read; | ||
1516 | cFYI(1, "0: iov_base=%p iov_len=%zu", | ||
1517 | rdata->iov[0].iov_base, rdata->iov[0].iov_len); | ||
1518 | |||
1519 | /* how much data is in the response? */ | ||
1520 | data_len = le16_to_cpu(rsp->DataLengthHigh) << 16; | ||
1521 | data_len += le16_to_cpu(rsp->DataLength); | ||
1522 | if (data_offset + data_len > rfclen) { | ||
1523 | /* data_len is corrupt -- discard frame */ | ||
1524 | rdata->result = -EIO; | ||
1525 | return cifs_readv_discard(server, mid); | ||
1526 | } | ||
1527 | |||
1528 | /* marshal up the page array */ | ||
1529 | len = 0; | ||
1530 | remaining = data_len; | ||
1531 | rdata->nr_iov = 1; | ||
1532 | |||
1533 | /* determine the eof that the server (probably) has */ | ||
1534 | eof = CIFS_I(rdata->mapping->host)->server_eof; | ||
1535 | eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; | ||
1536 | cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); | ||
1537 | |||
1538 | list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { | ||
1539 | if (remaining >= PAGE_CACHE_SIZE) { | ||
1540 | /* enough data to fill the page */ | ||
1541 | rdata->iov[rdata->nr_iov].iov_base = kmap(page); | ||
1542 | rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE; | ||
1543 | cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", | ||
1544 | rdata->nr_iov, page->index, | ||
1545 | rdata->iov[rdata->nr_iov].iov_base, | ||
1546 | rdata->iov[rdata->nr_iov].iov_len); | ||
1547 | ++rdata->nr_iov; | ||
1548 | len += PAGE_CACHE_SIZE; | ||
1549 | remaining -= PAGE_CACHE_SIZE; | ||
1550 | } else if (remaining > 0) { | ||
1551 | /* enough for partial page, fill and zero the rest */ | ||
1552 | rdata->iov[rdata->nr_iov].iov_base = kmap(page); | ||
1553 | rdata->iov[rdata->nr_iov].iov_len = remaining; | ||
1554 | cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", | ||
1555 | rdata->nr_iov, page->index, | ||
1556 | rdata->iov[rdata->nr_iov].iov_base, | ||
1557 | rdata->iov[rdata->nr_iov].iov_len); | ||
1558 | memset(rdata->iov[rdata->nr_iov].iov_base + remaining, | ||
1559 | '\0', PAGE_CACHE_SIZE - remaining); | ||
1560 | ++rdata->nr_iov; | ||
1561 | len += remaining; | ||
1562 | remaining = 0; | ||
1563 | } else if (page->index > eof_index) { | ||
1564 | /* | ||
1565 | * The VFS will not try to do readahead past the | ||
1566 | * i_size, but it's possible that we have outstanding | ||
1567 | * writes with gaps in the middle and the i_size hasn't | ||
1568 | * caught up yet. Populate those with zeroed out pages | ||
1569 | * to prevent the VFS from repeatedly attempting to | ||
1570 | * fill them until the writes are flushed. | ||
1571 | */ | ||
1572 | zero_user(page, 0, PAGE_CACHE_SIZE); | ||
1573 | list_del(&page->lru); | ||
1574 | lru_cache_add_file(page); | ||
1575 | flush_dcache_page(page); | ||
1576 | SetPageUptodate(page); | ||
1577 | unlock_page(page); | ||
1578 | page_cache_release(page); | ||
1579 | } else { | ||
1580 | /* no need to hold page hostage */ | ||
1581 | list_del(&page->lru); | ||
1582 | lru_cache_add_file(page); | ||
1583 | unlock_page(page); | ||
1584 | page_cache_release(page); | ||
1585 | } | ||
1586 | } | ||
1587 | |||
1588 | /* issue the read if we have any iovecs left to fill */ | ||
1589 | if (rdata->nr_iov > 1) { | ||
1590 | length = cifs_readv_from_socket(server, &rdata->iov[1], | ||
1591 | rdata->nr_iov - 1, len); | ||
1592 | if (length < 0) | ||
1593 | return length; | ||
1594 | server->total_read += length; | ||
1595 | } else { | ||
1596 | length = 0; | ||
1597 | } | ||
1598 | |||
1599 | rdata->bytes = length; | ||
1600 | |||
1601 | cFYI(1, "total_read=%u rfclen=%u remaining=%u", server->total_read, | ||
1602 | rfclen, remaining); | ||
1603 | |||
1604 | /* discard anything left over */ | ||
1605 | if (server->total_read < rfclen) | ||
1606 | return cifs_readv_discard(server, mid); | ||
1607 | |||
1608 | dequeue_mid(mid, false); | ||
1609 | return length; | ||
1610 | } | ||
1611 | |||
1612 | static void | ||
1613 | cifs_readv_complete(struct work_struct *work) | ||
1614 | { | ||
1615 | struct cifs_readdata *rdata = container_of(work, | ||
1616 | struct cifs_readdata, work); | ||
1617 | struct page *page, *tpage; | ||
1618 | |||
1619 | list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { | ||
1620 | list_del(&page->lru); | ||
1621 | lru_cache_add_file(page); | ||
1622 | |||
1623 | if (rdata->result == 0) { | ||
1624 | kunmap(page); | ||
1625 | flush_dcache_page(page); | ||
1626 | SetPageUptodate(page); | ||
1627 | } | ||
1628 | |||
1629 | unlock_page(page); | ||
1630 | |||
1631 | if (rdata->result == 0) | ||
1632 | cifs_readpage_to_fscache(rdata->mapping->host, page); | ||
1633 | |||
1634 | page_cache_release(page); | ||
1635 | } | ||
1636 | cifs_readdata_free(rdata); | ||
1637 | } | ||
1638 | |||
1639 | static void | ||
1640 | cifs_readv_callback(struct mid_q_entry *mid) | ||
1641 | { | ||
1642 | struct cifs_readdata *rdata = mid->callback_data; | ||
1643 | struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); | ||
1644 | struct TCP_Server_Info *server = tcon->ses->server; | ||
1645 | |||
1646 | cFYI(1, "%s: mid=%u state=%d result=%d bytes=%u", __func__, | ||
1647 | mid->mid, mid->midState, rdata->result, rdata->bytes); | ||
1648 | |||
1649 | switch (mid->midState) { | ||
1650 | case MID_RESPONSE_RECEIVED: | ||
1651 | /* result already set, check signature */ | ||
1652 | if (server->sec_mode & | ||
1653 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
1654 | if (cifs_verify_signature(rdata->iov, rdata->nr_iov, | ||
1655 | server, mid->sequence_number + 1)) | ||
1656 | cERROR(1, "Unexpected SMB signature"); | ||
1657 | } | ||
1658 | /* FIXME: should this be counted toward the initiating task? */ | ||
1659 | task_io_account_read(rdata->bytes); | ||
1660 | cifs_stats_bytes_read(tcon, rdata->bytes); | ||
1661 | break; | ||
1662 | case MID_REQUEST_SUBMITTED: | ||
1663 | case MID_RETRY_NEEDED: | ||
1664 | rdata->result = -EAGAIN; | ||
1665 | break; | ||
1666 | default: | ||
1667 | rdata->result = -EIO; | ||
1668 | } | ||
1669 | |||
1670 | queue_work(system_nrt_wq, &rdata->work); | ||
1671 | DeleteMidQEntry(mid); | ||
1672 | atomic_dec(&server->inFlight); | ||
1673 | wake_up(&server->request_q); | ||
1674 | } | ||
1675 | |||
1676 | /* cifs_async_readv - send an async write, and set up mid to handle result */ | ||
1677 | int | ||
1678 | cifs_async_readv(struct cifs_readdata *rdata) | ||
1679 | { | ||
1680 | int rc; | ||
1681 | READ_REQ *smb = NULL; | ||
1682 | int wct; | ||
1683 | struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); | ||
1684 | |||
1685 | cFYI(1, "%s: offset=%llu bytes=%u", __func__, | ||
1686 | rdata->offset, rdata->bytes); | ||
1687 | |||
1688 | if (tcon->ses->capabilities & CAP_LARGE_FILES) | ||
1689 | wct = 12; | ||
1690 | else { | ||
1691 | wct = 10; /* old style read */ | ||
1692 | if ((rdata->offset >> 32) > 0) { | ||
1693 | /* can not handle this big offset for old */ | ||
1694 | return -EIO; | ||
1695 | } | ||
1696 | } | ||
1697 | |||
1698 | rc = small_smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **)&smb); | ||
1699 | if (rc) | ||
1700 | return rc; | ||
1701 | |||
1702 | smb->hdr.Pid = cpu_to_le16((__u16)rdata->pid); | ||
1703 | smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); | ||
1704 | |||
1705 | smb->AndXCommand = 0xFF; /* none */ | ||
1706 | smb->Fid = rdata->cfile->netfid; | ||
1707 | smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF); | ||
1708 | if (wct == 12) | ||
1709 | smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32); | ||
1710 | smb->Remaining = 0; | ||
1711 | smb->MaxCount = cpu_to_le16(rdata->bytes & 0xFFFF); | ||
1712 | smb->MaxCountHigh = cpu_to_le32(rdata->bytes >> 16); | ||
1713 | if (wct == 12) | ||
1714 | smb->ByteCount = 0; | ||
1715 | else { | ||
1716 | /* old style read */ | ||
1717 | struct smb_com_readx_req *smbr = | ||
1718 | (struct smb_com_readx_req *)smb; | ||
1719 | smbr->ByteCount = 0; | ||
1720 | } | ||
1721 | |||
1722 | /* 4 for RFC1001 length + 1 for BCC */ | ||
1723 | rdata->iov[0].iov_base = smb; | ||
1724 | rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; | ||
1725 | |||
1726 | rc = cifs_call_async(tcon->ses->server, rdata->iov, 1, | ||
1727 | cifs_readv_receive, cifs_readv_callback, | ||
1728 | rdata, false); | ||
1729 | |||
1730 | if (rc == 0) | ||
1731 | cifs_stats_inc(&tcon->num_reads); | ||
1732 | |||
1733 | cifs_small_buf_release(smb); | ||
1734 | return rc; | ||
1735 | } | ||
1736 | |||
1379 | int | 1737 | int |
1380 | CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, | 1738 | CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, |
1381 | char **buf, int *pbuf_type) | 1739 | char **buf, int *pbuf_type) |
@@ -1836,7 +2194,7 @@ cifs_async_writev(struct cifs_writedata *wdata) | |||
1836 | 2194 | ||
1837 | kref_get(&wdata->refcount); | 2195 | kref_get(&wdata->refcount); |
1838 | rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, | 2196 | rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, |
1839 | cifs_writev_callback, wdata, false); | 2197 | NULL, cifs_writev_callback, wdata, false); |
1840 | 2198 | ||
1841 | if (rc == 0) | 2199 | if (rc == 0) |
1842 | cifs_stats_inc(&tcon->num_writes); | 2200 | cifs_stats_inc(&tcon->num_writes); |
@@ -1962,10 +2320,50 @@ CIFSSMBWrite2(const int xid, struct cifs_io_parms *io_parms, | |||
1962 | return rc; | 2320 | return rc; |
1963 | } | 2321 | } |
1964 | 2322 | ||
2323 | int cifs_lockv(const int xid, struct cifs_tcon *tcon, const __u16 netfid, | ||
2324 | const __u8 lock_type, const __u32 num_unlock, | ||
2325 | const __u32 num_lock, LOCKING_ANDX_RANGE *buf) | ||
2326 | { | ||
2327 | int rc = 0; | ||
2328 | LOCK_REQ *pSMB = NULL; | ||
2329 | struct kvec iov[2]; | ||
2330 | int resp_buf_type; | ||
2331 | __u16 count; | ||
2332 | |||
2333 | cFYI(1, "cifs_lockv num lock %d num unlock %d", num_lock, num_unlock); | ||
2334 | |||
2335 | rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); | ||
2336 | if (rc) | ||
2337 | return rc; | ||
2338 | |||
2339 | pSMB->Timeout = 0; | ||
2340 | pSMB->NumberOfLocks = cpu_to_le16(num_lock); | ||
2341 | pSMB->NumberOfUnlocks = cpu_to_le16(num_unlock); | ||
2342 | pSMB->LockType = lock_type; | ||
2343 | pSMB->AndXCommand = 0xFF; /* none */ | ||
2344 | pSMB->Fid = netfid; /* netfid stays le */ | ||
2345 | |||
2346 | count = (num_unlock + num_lock) * sizeof(LOCKING_ANDX_RANGE); | ||
2347 | inc_rfc1001_len(pSMB, count); | ||
2348 | pSMB->ByteCount = cpu_to_le16(count); | ||
2349 | |||
2350 | iov[0].iov_base = (char *)pSMB; | ||
2351 | iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4 - | ||
2352 | (num_unlock + num_lock) * sizeof(LOCKING_ANDX_RANGE); | ||
2353 | iov[1].iov_base = (char *)buf; | ||
2354 | iov[1].iov_len = (num_unlock + num_lock) * sizeof(LOCKING_ANDX_RANGE); | ||
2355 | |||
2356 | cifs_stats_inc(&tcon->num_locks); | ||
2357 | rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP); | ||
2358 | if (rc) | ||
2359 | cFYI(1, "Send error in cifs_lockv = %d", rc); | ||
2360 | |||
2361 | return rc; | ||
2362 | } | ||
1965 | 2363 | ||
1966 | int | 2364 | int |
1967 | CIFSSMBLock(const int xid, struct cifs_tcon *tcon, | 2365 | CIFSSMBLock(const int xid, struct cifs_tcon *tcon, |
1968 | const __u16 smb_file_id, const __u64 len, | 2366 | const __u16 smb_file_id, const __u32 netpid, const __u64 len, |
1969 | const __u64 offset, const __u32 numUnlock, | 2367 | const __u64 offset, const __u32 numUnlock, |
1970 | const __u32 numLock, const __u8 lockType, | 2368 | const __u32 numLock, const __u8 lockType, |
1971 | const bool waitFlag, const __u8 oplock_level) | 2369 | const bool waitFlag, const __u8 oplock_level) |
@@ -2001,7 +2399,7 @@ CIFSSMBLock(const int xid, struct cifs_tcon *tcon, | |||
2001 | pSMB->Fid = smb_file_id; /* netfid stays le */ | 2399 | pSMB->Fid = smb_file_id; /* netfid stays le */ |
2002 | 2400 | ||
2003 | if ((numLock != 0) || (numUnlock != 0)) { | 2401 | if ((numLock != 0) || (numUnlock != 0)) { |
2004 | pSMB->Locks[0].Pid = cpu_to_le16(current->tgid); | 2402 | pSMB->Locks[0].Pid = cpu_to_le16(netpid); |
2005 | /* BB where to store pid high? */ | 2403 | /* BB where to store pid high? */ |
2006 | pSMB->Locks[0].LengthLow = cpu_to_le32((u32)len); | 2404 | pSMB->Locks[0].LengthLow = cpu_to_le32((u32)len); |
2007 | pSMB->Locks[0].LengthHigh = cpu_to_le32((u32)(len>>32)); | 2405 | pSMB->Locks[0].LengthHigh = cpu_to_le32((u32)(len>>32)); |
@@ -2035,9 +2433,9 @@ CIFSSMBLock(const int xid, struct cifs_tcon *tcon, | |||
2035 | 2433 | ||
2036 | int | 2434 | int |
2037 | CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, | 2435 | CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, |
2038 | const __u16 smb_file_id, const int get_flag, const __u64 len, | 2436 | const __u16 smb_file_id, const __u32 netpid, const int get_flag, |
2039 | struct file_lock *pLockData, const __u16 lock_type, | 2437 | const __u64 len, struct file_lock *pLockData, |
2040 | const bool waitFlag) | 2438 | const __u16 lock_type, const bool waitFlag) |
2041 | { | 2439 | { |
2042 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | 2440 | struct smb_com_transaction2_sfi_req *pSMB = NULL; |
2043 | struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; | 2441 | struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; |
@@ -2095,7 +2493,7 @@ CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon, | |||
2095 | } else | 2493 | } else |
2096 | pSMB->Timeout = 0; | 2494 | pSMB->Timeout = 0; |
2097 | 2495 | ||
2098 | parm_data->pid = cpu_to_le32(current->tgid); | 2496 | parm_data->pid = cpu_to_le32(netpid); |
2099 | parm_data->start = cpu_to_le64(pLockData->fl_start); | 2497 | parm_data->start = cpu_to_le64(pLockData->fl_start); |
2100 | parm_data->length = cpu_to_le64(len); /* normalize negative numbers */ | 2498 | parm_data->length = cpu_to_le64(len); /* normalize negative numbers */ |
2101 | 2499 | ||
@@ -2812,8 +3210,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifs_tcon *tcon, | |||
2812 | pSMB->TotalDataCount = 0; | 3210 | pSMB->TotalDataCount = 0; |
2813 | pSMB->MaxParameterCount = cpu_to_le32(2); | 3211 | pSMB->MaxParameterCount = cpu_to_le32(2); |
2814 | /* BB find exact data count max from sess structure BB */ | 3212 | /* BB find exact data count max from sess structure BB */ |
2815 | pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf - | 3213 | pSMB->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); |
2816 | MAX_CIFS_HDR_SIZE) & 0xFFFFFF00); | ||
2817 | pSMB->MaxSetupCount = 4; | 3214 | pSMB->MaxSetupCount = 4; |
2818 | pSMB->Reserved = 0; | 3215 | pSMB->Reserved = 0; |
2819 | pSMB->ParameterOffset = 0; | 3216 | pSMB->ParameterOffset = 0; |
@@ -3306,8 +3703,7 @@ smb_init_nttransact(const __u16 sub_command, const int setup_count, | |||
3306 | pSMB->Reserved = 0; | 3703 | pSMB->Reserved = 0; |
3307 | pSMB->TotalParameterCount = cpu_to_le32(parm_len); | 3704 | pSMB->TotalParameterCount = cpu_to_le32(parm_len); |
3308 | pSMB->TotalDataCount = 0; | 3705 | pSMB->TotalDataCount = 0; |
3309 | pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf - | 3706 | pSMB->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); |
3310 | MAX_CIFS_HDR_SIZE) & 0xFFFFFF00); | ||
3311 | pSMB->ParameterCount = pSMB->TotalParameterCount; | 3707 | pSMB->ParameterCount = pSMB->TotalParameterCount; |
3312 | pSMB->DataCount = pSMB->TotalDataCount; | 3708 | pSMB->DataCount = pSMB->TotalDataCount; |
3313 | temp_offset = offsetof(struct smb_com_ntransact_req, Parms) + | 3709 | temp_offset = offsetof(struct smb_com_ntransact_req, Parms) + |
@@ -3467,7 +3863,7 @@ qsec_out: | |||
3467 | 3863 | ||
3468 | int | 3864 | int |
3469 | CIFSSMBSetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid, | 3865 | CIFSSMBSetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid, |
3470 | struct cifs_ntsd *pntsd, __u32 acllen) | 3866 | struct cifs_ntsd *pntsd, __u32 acllen, int aclflag) |
3471 | { | 3867 | { |
3472 | __u16 byte_count, param_count, data_count, param_offset, data_offset; | 3868 | __u16 byte_count, param_count, data_count, param_offset, data_offset; |
3473 | int rc = 0; | 3869 | int rc = 0; |
@@ -3504,7 +3900,7 @@ setCifsAclRetry: | |||
3504 | 3900 | ||
3505 | pSMB->Fid = fid; /* file handle always le */ | 3901 | pSMB->Fid = fid; /* file handle always le */ |
3506 | pSMB->Reserved2 = 0; | 3902 | pSMB->Reserved2 = 0; |
3507 | pSMB->AclFlags = cpu_to_le32(CIFS_ACL_DACL); | 3903 | pSMB->AclFlags = cpu_to_le32(aclflag); |
3508 | 3904 | ||
3509 | if (pntsd && acllen) { | 3905 | if (pntsd && acllen) { |
3510 | memcpy((char *) &pSMBr->hdr.Protocol + data_offset, | 3906 | memcpy((char *) &pSMBr->hdr.Protocol + data_offset, |
@@ -3977,8 +4373,7 @@ findFirstRetry: | |||
3977 | params = 12 + name_len /* includes null */ ; | 4373 | params = 12 + name_len /* includes null */ ; |
3978 | pSMB->TotalDataCount = 0; /* no EAs */ | 4374 | pSMB->TotalDataCount = 0; /* no EAs */ |
3979 | pSMB->MaxParameterCount = cpu_to_le16(10); | 4375 | pSMB->MaxParameterCount = cpu_to_le16(10); |
3980 | pSMB->MaxDataCount = cpu_to_le16((tcon->ses->server->maxBuf - | 4376 | pSMB->MaxDataCount = cpu_to_le16(CIFSMaxBufSize & 0xFFFFFF00); |
3981 | MAX_CIFS_HDR_SIZE) & 0xFFFFFF00); | ||
3982 | pSMB->MaxSetupCount = 0; | 4377 | pSMB->MaxSetupCount = 0; |
3983 | pSMB->Reserved = 0; | 4378 | pSMB->Reserved = 0; |
3984 | pSMB->Flags = 0; | 4379 | pSMB->Flags = 0; |
@@ -4052,8 +4447,7 @@ findFirstRetry: | |||
4052 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + | 4447 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + |
4053 | psrch_inf->entries_in_buffer; | 4448 | psrch_inf->entries_in_buffer; |
4054 | lnoff = le16_to_cpu(parms->LastNameOffset); | 4449 | lnoff = le16_to_cpu(parms->LastNameOffset); |
4055 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | 4450 | if (CIFSMaxBufSize < lnoff) { |
4056 | lnoff) { | ||
4057 | cERROR(1, "ignoring corrupt resume name"); | 4451 | cERROR(1, "ignoring corrupt resume name"); |
4058 | psrch_inf->last_entry = NULL; | 4452 | psrch_inf->last_entry = NULL; |
4059 | return rc; | 4453 | return rc; |
@@ -4079,7 +4473,8 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon, | |||
4079 | T2_FNEXT_RSP_PARMS *parms; | 4473 | T2_FNEXT_RSP_PARMS *parms; |
4080 | char *response_data; | 4474 | char *response_data; |
4081 | int rc = 0; | 4475 | int rc = 0; |
4082 | int bytes_returned, name_len; | 4476 | int bytes_returned; |
4477 | unsigned int name_len; | ||
4083 | __u16 params, byte_count; | 4478 | __u16 params, byte_count; |
4084 | 4479 | ||
4085 | cFYI(1, "In FindNext"); | 4480 | cFYI(1, "In FindNext"); |
@@ -4096,9 +4491,7 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon, | |||
4096 | byte_count = 0; | 4491 | byte_count = 0; |
4097 | pSMB->TotalDataCount = 0; /* no EAs */ | 4492 | pSMB->TotalDataCount = 0; /* no EAs */ |
4098 | pSMB->MaxParameterCount = cpu_to_le16(8); | 4493 | pSMB->MaxParameterCount = cpu_to_le16(8); |
4099 | pSMB->MaxDataCount = | 4494 | pSMB->MaxDataCount = cpu_to_le16(CIFSMaxBufSize & 0xFFFFFF00); |
4100 | cpu_to_le16((tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & | ||
4101 | 0xFFFFFF00); | ||
4102 | pSMB->MaxSetupCount = 0; | 4495 | pSMB->MaxSetupCount = 0; |
4103 | pSMB->Reserved = 0; | 4496 | pSMB->Reserved = 0; |
4104 | pSMB->Flags = 0; | 4497 | pSMB->Flags = 0; |
@@ -4180,8 +4573,7 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon, | |||
4180 | psrch_inf->index_of_last_entry += | 4573 | psrch_inf->index_of_last_entry += |
4181 | psrch_inf->entries_in_buffer; | 4574 | psrch_inf->entries_in_buffer; |
4182 | lnoff = le16_to_cpu(parms->LastNameOffset); | 4575 | lnoff = le16_to_cpu(parms->LastNameOffset); |
4183 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | 4576 | if (CIFSMaxBufSize < lnoff) { |
4184 | lnoff) { | ||
4185 | cERROR(1, "ignoring corrupt resume name"); | 4577 | cERROR(1, "ignoring corrupt resume name"); |
4186 | psrch_inf->last_entry = NULL; | 4578 | psrch_inf->last_entry = NULL; |
4187 | return rc; | 4579 | return rc; |
@@ -5839,7 +6231,7 @@ QAllEAsRetry: | |||
5839 | 6231 | ||
5840 | if (ea_name) { | 6232 | if (ea_name) { |
5841 | if (ea_name_len == name_len && | 6233 | if (ea_name_len == name_len && |
5842 | strncmp(ea_name, temp_ptr, name_len) == 0) { | 6234 | memcmp(ea_name, temp_ptr, name_len) == 0) { |
5843 | temp_ptr += name_len + 1; | 6235 | temp_ptr += name_len + 1; |
5844 | rc = value_len; | 6236 | rc = value_len; |
5845 | if (buf_size == 0) | 6237 | if (buf_size == 0) |
@@ -6034,12 +6426,7 @@ int CIFSSMBNotify(const int xid, struct cifs_tcon *tcon, | |||
6034 | pSMB->TotalParameterCount = 0 ; | 6426 | pSMB->TotalParameterCount = 0 ; |
6035 | pSMB->TotalDataCount = 0; | 6427 | pSMB->TotalDataCount = 0; |
6036 | pSMB->MaxParameterCount = cpu_to_le32(2); | 6428 | pSMB->MaxParameterCount = cpu_to_le32(2); |
6037 | /* BB find exact data count max from sess structure BB */ | 6429 | pSMB->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); |
6038 | pSMB->MaxDataCount = 0; /* same in little endian or be */ | ||
6039 | /* BB VERIFY verify which is correct for above BB */ | ||
6040 | pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf - | ||
6041 | MAX_CIFS_HDR_SIZE) & 0xFFFFFF00); | ||
6042 | |||
6043 | pSMB->MaxSetupCount = 4; | 6430 | pSMB->MaxSetupCount = 4; |
6044 | pSMB->Reserved = 0; | 6431 | pSMB->Reserved = 0; |
6045 | pSMB->ParameterOffset = 0; | 6432 | pSMB->ParameterOffset = 0; |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 80c2e3add3a2..d545a95c30ed 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -181,7 +181,7 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
181 | -EINVAL = invalid transact2 | 181 | -EINVAL = invalid transact2 |
182 | 182 | ||
183 | */ | 183 | */ |
184 | static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize) | 184 | static int check2ndT2(struct smb_hdr *pSMB) |
185 | { | 185 | { |
186 | struct smb_t2_rsp *pSMBt; | 186 | struct smb_t2_rsp *pSMBt; |
187 | int remaining; | 187 | int remaining; |
@@ -214,9 +214,9 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize) | |||
214 | 214 | ||
215 | cFYI(1, "missing %d bytes from transact2, check next response", | 215 | cFYI(1, "missing %d bytes from transact2, check next response", |
216 | remaining); | 216 | remaining); |
217 | if (total_data_size > maxBufSize) { | 217 | if (total_data_size > CIFSMaxBufSize) { |
218 | cERROR(1, "TotalDataSize %d is over maximum buffer %d", | 218 | cERROR(1, "TotalDataSize %d is over maximum buffer %d", |
219 | total_data_size, maxBufSize); | 219 | total_data_size, CIFSMaxBufSize); |
220 | return -EINVAL; | 220 | return -EINVAL; |
221 | } | 221 | } |
222 | return remaining; | 222 | return remaining; |
@@ -320,27 +320,24 @@ requeue_echo: | |||
320 | } | 320 | } |
321 | 321 | ||
322 | static bool | 322 | static bool |
323 | allocate_buffers(char **bigbuf, char **smallbuf, unsigned int size, | 323 | allocate_buffers(struct TCP_Server_Info *server) |
324 | bool is_large_buf) | ||
325 | { | 324 | { |
326 | char *bbuf = *bigbuf, *sbuf = *smallbuf; | 325 | if (!server->bigbuf) { |
327 | 326 | server->bigbuf = (char *)cifs_buf_get(); | |
328 | if (bbuf == NULL) { | 327 | if (!server->bigbuf) { |
329 | bbuf = (char *)cifs_buf_get(); | ||
330 | if (!bbuf) { | ||
331 | cERROR(1, "No memory for large SMB response"); | 328 | cERROR(1, "No memory for large SMB response"); |
332 | msleep(3000); | 329 | msleep(3000); |
333 | /* retry will check if exiting */ | 330 | /* retry will check if exiting */ |
334 | return false; | 331 | return false; |
335 | } | 332 | } |
336 | } else if (is_large_buf) { | 333 | } else if (server->large_buf) { |
337 | /* we are reusing a dirty large buf, clear its start */ | 334 | /* we are reusing a dirty large buf, clear its start */ |
338 | memset(bbuf, 0, size); | 335 | memset(server->bigbuf, 0, sizeof(struct smb_hdr)); |
339 | } | 336 | } |
340 | 337 | ||
341 | if (sbuf == NULL) { | 338 | if (!server->smallbuf) { |
342 | sbuf = (char *)cifs_small_buf_get(); | 339 | server->smallbuf = (char *)cifs_small_buf_get(); |
343 | if (!sbuf) { | 340 | if (!server->smallbuf) { |
344 | cERROR(1, "No memory for SMB response"); | 341 | cERROR(1, "No memory for SMB response"); |
345 | msleep(1000); | 342 | msleep(1000); |
346 | /* retry will check if exiting */ | 343 | /* retry will check if exiting */ |
@@ -349,36 +346,116 @@ allocate_buffers(char **bigbuf, char **smallbuf, unsigned int size, | |||
349 | /* beginning of smb buffer is cleared in our buf_get */ | 346 | /* beginning of smb buffer is cleared in our buf_get */ |
350 | } else { | 347 | } else { |
351 | /* if existing small buf clear beginning */ | 348 | /* if existing small buf clear beginning */ |
352 | memset(sbuf, 0, size); | 349 | memset(server->smallbuf, 0, sizeof(struct smb_hdr)); |
353 | } | 350 | } |
354 | 351 | ||
355 | *bigbuf = bbuf; | ||
356 | *smallbuf = sbuf; | ||
357 | |||
358 | return true; | 352 | return true; |
359 | } | 353 | } |
360 | 354 | ||
361 | static int | 355 | static bool |
362 | read_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg, | 356 | server_unresponsive(struct TCP_Server_Info *server) |
363 | struct kvec *iov, unsigned int to_read, | 357 | { |
364 | unsigned int *ptotal_read, bool is_header_read) | 358 | if (echo_retries > 0 && server->tcpStatus == CifsGood && |
359 | time_after(jiffies, server->lstrp + | ||
360 | (echo_retries * SMB_ECHO_INTERVAL))) { | ||
361 | cERROR(1, "Server %s has not responded in %d seconds. " | ||
362 | "Reconnecting...", server->hostname, | ||
363 | (echo_retries * SMB_ECHO_INTERVAL / HZ)); | ||
364 | cifs_reconnect(server); | ||
365 | wake_up(&server->response_q); | ||
366 | return true; | ||
367 | } | ||
368 | |||
369 | return false; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * kvec_array_init - clone a kvec array, and advance into it | ||
374 | * @new: pointer to memory for cloned array | ||
375 | * @iov: pointer to original array | ||
376 | * @nr_segs: number of members in original array | ||
377 | * @bytes: number of bytes to advance into the cloned array | ||
378 | * | ||
379 | * This function will copy the array provided in iov to a section of memory | ||
380 | * and advance the specified number of bytes into the new array. It returns | ||
381 | * the number of segments in the new array. "new" must be at least as big as | ||
382 | * the original iov array. | ||
383 | */ | ||
384 | static unsigned int | ||
385 | kvec_array_init(struct kvec *new, struct kvec *iov, unsigned int nr_segs, | ||
386 | size_t bytes) | ||
387 | { | ||
388 | size_t base = 0; | ||
389 | |||
390 | while (bytes || !iov->iov_len) { | ||
391 | int copy = min(bytes, iov->iov_len); | ||
392 | |||
393 | bytes -= copy; | ||
394 | base += copy; | ||
395 | if (iov->iov_len == base) { | ||
396 | iov++; | ||
397 | nr_segs--; | ||
398 | base = 0; | ||
399 | } | ||
400 | } | ||
401 | memcpy(new, iov, sizeof(*iov) * nr_segs); | ||
402 | new->iov_base += base; | ||
403 | new->iov_len -= base; | ||
404 | return nr_segs; | ||
405 | } | ||
406 | |||
407 | static struct kvec * | ||
408 | get_server_iovec(struct TCP_Server_Info *server, unsigned int nr_segs) | ||
409 | { | ||
410 | struct kvec *new_iov; | ||
411 | |||
412 | if (server->iov && nr_segs <= server->nr_iov) | ||
413 | return server->iov; | ||
414 | |||
415 | /* not big enough -- allocate a new one and release the old */ | ||
416 | new_iov = kmalloc(sizeof(*new_iov) * nr_segs, GFP_NOFS); | ||
417 | if (new_iov) { | ||
418 | kfree(server->iov); | ||
419 | server->iov = new_iov; | ||
420 | server->nr_iov = nr_segs; | ||
421 | } | ||
422 | return new_iov; | ||
423 | } | ||
424 | |||
425 | int | ||
426 | cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, | ||
427 | unsigned int nr_segs, unsigned int to_read) | ||
365 | { | 428 | { |
366 | int length, rc = 0; | 429 | int length = 0; |
367 | unsigned int total_read; | 430 | int total_read; |
368 | char *buf = iov->iov_base; | 431 | unsigned int segs; |
432 | struct msghdr smb_msg; | ||
433 | struct kvec *iov; | ||
434 | |||
435 | iov = get_server_iovec(server, nr_segs); | ||
436 | if (!iov) | ||
437 | return -ENOMEM; | ||
438 | |||
439 | smb_msg.msg_control = NULL; | ||
440 | smb_msg.msg_controllen = 0; | ||
441 | |||
442 | for (total_read = 0; to_read; total_read += length, to_read -= length) { | ||
443 | if (server_unresponsive(server)) { | ||
444 | total_read = -EAGAIN; | ||
445 | break; | ||
446 | } | ||
447 | |||
448 | segs = kvec_array_init(iov, iov_orig, nr_segs, total_read); | ||
449 | |||
450 | length = kernel_recvmsg(server->ssocket, &smb_msg, | ||
451 | iov, segs, to_read, 0); | ||
369 | 452 | ||
370 | for (total_read = 0; total_read < to_read; total_read += length) { | ||
371 | length = kernel_recvmsg(server->ssocket, smb_msg, iov, 1, | ||
372 | to_read - total_read, 0); | ||
373 | if (server->tcpStatus == CifsExiting) { | 453 | if (server->tcpStatus == CifsExiting) { |
374 | /* then will exit */ | 454 | total_read = -ESHUTDOWN; |
375 | rc = 2; | ||
376 | break; | 455 | break; |
377 | } else if (server->tcpStatus == CifsNeedReconnect) { | 456 | } else if (server->tcpStatus == CifsNeedReconnect) { |
378 | cifs_reconnect(server); | 457 | cifs_reconnect(server); |
379 | /* Reconnect wakes up rspns q */ | 458 | total_read = -EAGAIN; |
380 | /* Now we will reread sock */ | ||
381 | rc = 1; | ||
382 | break; | 459 | break; |
383 | } else if (length == -ERESTARTSYS || | 460 | } else if (length == -ERESTARTSYS || |
384 | length == -EAGAIN || | 461 | length == -EAGAIN || |
@@ -390,56 +467,54 @@ read_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg, | |||
390 | */ | 467 | */ |
391 | usleep_range(1000, 2000); | 468 | usleep_range(1000, 2000); |
392 | length = 0; | 469 | length = 0; |
393 | if (!is_header_read) | 470 | continue; |
394 | continue; | ||
395 | /* Special handling for header read */ | ||
396 | if (total_read) { | ||
397 | iov->iov_base = (to_read - total_read) + | ||
398 | buf; | ||
399 | iov->iov_len = to_read - total_read; | ||
400 | smb_msg->msg_control = NULL; | ||
401 | smb_msg->msg_controllen = 0; | ||
402 | rc = 3; | ||
403 | } else | ||
404 | rc = 1; | ||
405 | break; | ||
406 | } else if (length <= 0) { | 471 | } else if (length <= 0) { |
407 | cERROR(1, "Received no data, expecting %d", | 472 | cFYI(1, "Received no data or error: expecting %d " |
408 | to_read - total_read); | 473 | "got %d", to_read, length); |
409 | cifs_reconnect(server); | 474 | cifs_reconnect(server); |
410 | rc = 1; | 475 | total_read = -EAGAIN; |
411 | break; | 476 | break; |
412 | } | 477 | } |
413 | } | 478 | } |
479 | return total_read; | ||
480 | } | ||
414 | 481 | ||
415 | *ptotal_read = total_read; | 482 | int |
416 | return rc; | 483 | cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, |
484 | unsigned int to_read) | ||
485 | { | ||
486 | struct kvec iov; | ||
487 | |||
488 | iov.iov_base = buf; | ||
489 | iov.iov_len = to_read; | ||
490 | |||
491 | return cifs_readv_from_socket(server, &iov, 1, to_read); | ||
417 | } | 492 | } |
418 | 493 | ||
419 | static bool | 494 | static bool |
420 | check_rfc1002_header(struct TCP_Server_Info *server, char *buf) | 495 | is_smb_response(struct TCP_Server_Info *server, unsigned char type) |
421 | { | 496 | { |
422 | char temp = *buf; | ||
423 | unsigned int pdu_length = be32_to_cpu( | ||
424 | ((struct smb_hdr *)buf)->smb_buf_length); | ||
425 | |||
426 | /* | 497 | /* |
427 | * The first byte big endian of the length field, | 498 | * The first byte big endian of the length field, |
428 | * is actually not part of the length but the type | 499 | * is actually not part of the length but the type |
429 | * with the most common, zero, as regular data. | 500 | * with the most common, zero, as regular data. |
430 | */ | 501 | */ |
431 | if (temp == (char) RFC1002_SESSION_KEEP_ALIVE) { | 502 | switch (type) { |
432 | return false; | 503 | case RFC1002_SESSION_MESSAGE: |
433 | } else if (temp == (char)RFC1002_POSITIVE_SESSION_RESPONSE) { | 504 | /* Regular SMB response */ |
434 | cFYI(1, "Good RFC 1002 session rsp"); | 505 | return true; |
435 | return false; | 506 | case RFC1002_SESSION_KEEP_ALIVE: |
436 | } else if (temp == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) { | 507 | cFYI(1, "RFC 1002 session keep alive"); |
508 | break; | ||
509 | case RFC1002_POSITIVE_SESSION_RESPONSE: | ||
510 | cFYI(1, "RFC 1002 positive session response"); | ||
511 | break; | ||
512 | case RFC1002_NEGATIVE_SESSION_RESPONSE: | ||
437 | /* | 513 | /* |
438 | * We get this from Windows 98 instead of an error on | 514 | * We get this from Windows 98 instead of an error on |
439 | * SMB negprot response. | 515 | * SMB negprot response. |
440 | */ | 516 | */ |
441 | cFYI(1, "Negative RFC1002 Session Response Error 0x%x)", | 517 | cFYI(1, "RFC 1002 negative session response"); |
442 | pdu_length); | ||
443 | /* give server a second to clean up */ | 518 | /* give server a second to clean up */ |
444 | msleep(1000); | 519 | msleep(1000); |
445 | /* | 520 | /* |
@@ -448,87 +523,89 @@ check_rfc1002_header(struct TCP_Server_Info *server, char *buf) | |||
448 | * is since we do not begin with RFC1001 session | 523 | * is since we do not begin with RFC1001 session |
449 | * initialize frame). | 524 | * initialize frame). |
450 | */ | 525 | */ |
451 | cifs_set_port((struct sockaddr *) | 526 | cifs_set_port((struct sockaddr *)&server->dstaddr, CIFS_PORT); |
452 | &server->dstaddr, CIFS_PORT); | ||
453 | cifs_reconnect(server); | 527 | cifs_reconnect(server); |
454 | wake_up(&server->response_q); | 528 | wake_up(&server->response_q); |
455 | return false; | 529 | break; |
456 | } else if (temp != (char) 0) { | 530 | default: |
457 | cERROR(1, "Unknown RFC 1002 frame"); | 531 | cERROR(1, "RFC 1002 unknown response type 0x%x", type); |
458 | cifs_dump_mem(" Received Data: ", buf, 4); | ||
459 | cifs_reconnect(server); | ||
460 | return false; | ||
461 | } | ||
462 | |||
463 | /* else we have an SMB response */ | ||
464 | if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) || | ||
465 | (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) { | ||
466 | cERROR(1, "Invalid size SMB length %d pdu_length %d", | ||
467 | 4, pdu_length+4); | ||
468 | cifs_reconnect(server); | 532 | cifs_reconnect(server); |
469 | wake_up(&server->response_q); | ||
470 | return false; | ||
471 | } | 533 | } |
472 | 534 | ||
473 | return true; | 535 | return false; |
474 | } | 536 | } |
475 | 537 | ||
476 | static struct mid_q_entry * | 538 | static struct mid_q_entry * |
477 | find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, | 539 | find_mid(struct TCP_Server_Info *server, struct smb_hdr *buf) |
478 | int *length, bool is_large_buf, bool *is_multi_rsp, char **bigbuf) | ||
479 | { | 540 | { |
480 | struct mid_q_entry *mid = NULL, *tmp_mid, *ret = NULL; | 541 | struct mid_q_entry *mid; |
481 | 542 | ||
482 | spin_lock(&GlobalMid_Lock); | 543 | spin_lock(&GlobalMid_Lock); |
483 | list_for_each_entry_safe(mid, tmp_mid, &server->pending_mid_q, qhead) { | 544 | list_for_each_entry(mid, &server->pending_mid_q, qhead) { |
484 | if (mid->mid != buf->Mid || | 545 | if (mid->mid == buf->Mid && |
485 | mid->midState != MID_REQUEST_SUBMITTED || | 546 | mid->midState == MID_REQUEST_SUBMITTED && |
486 | mid->command != buf->Command) | 547 | mid->command == buf->Command) { |
487 | continue; | 548 | spin_unlock(&GlobalMid_Lock); |
488 | 549 | return mid; | |
489 | if (*length == 0 && check2ndT2(buf, server->maxBuf) > 0) { | ||
490 | /* We have a multipart transact2 resp */ | ||
491 | *is_multi_rsp = true; | ||
492 | if (mid->resp_buf) { | ||
493 | /* merge response - fix up 1st*/ | ||
494 | *length = coalesce_t2(buf, mid->resp_buf); | ||
495 | if (*length > 0) { | ||
496 | *length = 0; | ||
497 | mid->multiRsp = true; | ||
498 | break; | ||
499 | } | ||
500 | /* All parts received or packet is malformed. */ | ||
501 | mid->multiEnd = true; | ||
502 | goto multi_t2_fnd; | ||
503 | } | ||
504 | if (!is_large_buf) { | ||
505 | /*FIXME: switch to already allocated largebuf?*/ | ||
506 | cERROR(1, "1st trans2 resp needs bigbuf"); | ||
507 | } else { | ||
508 | /* Have first buffer */ | ||
509 | mid->resp_buf = buf; | ||
510 | mid->largeBuf = true; | ||
511 | *bigbuf = NULL; | ||
512 | } | ||
513 | break; | ||
514 | } | 550 | } |
515 | mid->resp_buf = buf; | 551 | } |
516 | mid->largeBuf = is_large_buf; | 552 | spin_unlock(&GlobalMid_Lock); |
517 | multi_t2_fnd: | 553 | return NULL; |
518 | if (*length == 0) | 554 | } |
519 | mid->midState = MID_RESPONSE_RECEIVED; | 555 | |
520 | else | 556 | void |
521 | mid->midState = MID_RESPONSE_MALFORMED; | 557 | dequeue_mid(struct mid_q_entry *mid, bool malformed) |
558 | { | ||
522 | #ifdef CONFIG_CIFS_STATS2 | 559 | #ifdef CONFIG_CIFS_STATS2 |
523 | mid->when_received = jiffies; | 560 | mid->when_received = jiffies; |
524 | #endif | 561 | #endif |
525 | list_del_init(&mid->qhead); | 562 | spin_lock(&GlobalMid_Lock); |
526 | ret = mid; | 563 | if (!malformed) |
527 | break; | 564 | mid->midState = MID_RESPONSE_RECEIVED; |
528 | } | 565 | else |
566 | mid->midState = MID_RESPONSE_MALFORMED; | ||
567 | list_del_init(&mid->qhead); | ||
529 | spin_unlock(&GlobalMid_Lock); | 568 | spin_unlock(&GlobalMid_Lock); |
569 | } | ||
530 | 570 | ||
531 | return ret; | 571 | static void |
572 | handle_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server, | ||
573 | struct smb_hdr *buf, int malformed) | ||
574 | { | ||
575 | if (malformed == 0 && check2ndT2(buf) > 0) { | ||
576 | mid->multiRsp = true; | ||
577 | if (mid->resp_buf) { | ||
578 | /* merge response - fix up 1st*/ | ||
579 | malformed = coalesce_t2(buf, mid->resp_buf); | ||
580 | if (malformed > 0) | ||
581 | return; | ||
582 | |||
583 | /* All parts received or packet is malformed. */ | ||
584 | mid->multiEnd = true; | ||
585 | return dequeue_mid(mid, malformed); | ||
586 | } | ||
587 | if (!server->large_buf) { | ||
588 | /*FIXME: switch to already allocated largebuf?*/ | ||
589 | cERROR(1, "1st trans2 resp needs bigbuf"); | ||
590 | } else { | ||
591 | /* Have first buffer */ | ||
592 | mid->resp_buf = buf; | ||
593 | mid->largeBuf = true; | ||
594 | server->bigbuf = NULL; | ||
595 | } | ||
596 | return; | ||
597 | } | ||
598 | mid->resp_buf = buf; | ||
599 | mid->largeBuf = server->large_buf; | ||
600 | /* Was previous buf put in mpx struct for multi-rsp? */ | ||
601 | if (!mid->multiRsp) { | ||
602 | /* smb buffer will be freed by user thread */ | ||
603 | if (server->large_buf) | ||
604 | server->bigbuf = NULL; | ||
605 | else | ||
606 | server->smallbuf = NULL; | ||
607 | } | ||
608 | dequeue_mid(mid, malformed); | ||
532 | } | 609 | } |
533 | 610 | ||
534 | static void clean_demultiplex_info(struct TCP_Server_Info *server) | 611 | static void clean_demultiplex_info(struct TCP_Server_Info *server) |
@@ -618,6 +695,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) | |||
618 | } | 695 | } |
619 | 696 | ||
620 | kfree(server->hostname); | 697 | kfree(server->hostname); |
698 | kfree(server->iov); | ||
621 | kfree(server); | 699 | kfree(server); |
622 | 700 | ||
623 | length = atomic_dec_return(&tcpSesAllocCount); | 701 | length = atomic_dec_return(&tcpSesAllocCount); |
@@ -627,20 +705,70 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) | |||
627 | } | 705 | } |
628 | 706 | ||
629 | static int | 707 | static int |
708 | standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) | ||
709 | { | ||
710 | int length; | ||
711 | char *buf = server->smallbuf; | ||
712 | struct smb_hdr *smb_buffer = (struct smb_hdr *)buf; | ||
713 | unsigned int pdu_length = be32_to_cpu(smb_buffer->smb_buf_length); | ||
714 | |||
715 | /* make sure this will fit in a large buffer */ | ||
716 | if (pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { | ||
717 | cERROR(1, "SMB response too long (%u bytes)", | ||
718 | pdu_length); | ||
719 | cifs_reconnect(server); | ||
720 | wake_up(&server->response_q); | ||
721 | return -EAGAIN; | ||
722 | } | ||
723 | |||
724 | /* switch to large buffer if too big for a small one */ | ||
725 | if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { | ||
726 | server->large_buf = true; | ||
727 | memcpy(server->bigbuf, server->smallbuf, server->total_read); | ||
728 | buf = server->bigbuf; | ||
729 | smb_buffer = (struct smb_hdr *)buf; | ||
730 | } | ||
731 | |||
732 | /* now read the rest */ | ||
733 | length = cifs_read_from_socket(server, | ||
734 | buf + sizeof(struct smb_hdr) - 1, | ||
735 | pdu_length - sizeof(struct smb_hdr) + 1 + 4); | ||
736 | if (length < 0) | ||
737 | return length; | ||
738 | server->total_read += length; | ||
739 | |||
740 | dump_smb(smb_buffer, server->total_read); | ||
741 | |||
742 | /* | ||
743 | * We know that we received enough to get to the MID as we | ||
744 | * checked the pdu_length earlier. Now check to see | ||
745 | * if the rest of the header is OK. We borrow the length | ||
746 | * var for the rest of the loop to avoid a new stack var. | ||
747 | * | ||
748 | * 48 bytes is enough to display the header and a little bit | ||
749 | * into the payload for debugging purposes. | ||
750 | */ | ||
751 | length = checkSMB(smb_buffer, smb_buffer->Mid, server->total_read); | ||
752 | if (length != 0) | ||
753 | cifs_dump_mem("Bad SMB: ", buf, | ||
754 | min_t(unsigned int, server->total_read, 48)); | ||
755 | |||
756 | if (mid) | ||
757 | handle_mid(mid, server, smb_buffer, length); | ||
758 | |||
759 | return length; | ||
760 | } | ||
761 | |||
762 | static int | ||
630 | cifs_demultiplex_thread(void *p) | 763 | cifs_demultiplex_thread(void *p) |
631 | { | 764 | { |
632 | int length; | 765 | int length; |
633 | struct TCP_Server_Info *server = p; | 766 | struct TCP_Server_Info *server = p; |
634 | unsigned int pdu_length, total_read; | 767 | unsigned int pdu_length; |
635 | char *buf = NULL, *bigbuf = NULL, *smallbuf = NULL; | 768 | char *buf = NULL; |
636 | struct smb_hdr *smb_buffer = NULL; | 769 | struct smb_hdr *smb_buffer = NULL; |
637 | struct msghdr smb_msg; | ||
638 | struct kvec iov; | ||
639 | struct task_struct *task_to_wake = NULL; | 770 | struct task_struct *task_to_wake = NULL; |
640 | struct mid_q_entry *mid_entry; | 771 | struct mid_q_entry *mid_entry; |
641 | bool isLargeBuf = false; | ||
642 | bool isMultiRsp = false; | ||
643 | int rc; | ||
644 | 772 | ||
645 | current->flags |= PF_MEMALLOC; | 773 | current->flags |= PF_MEMALLOC; |
646 | cFYI(1, "Demultiplex PID: %d", task_pid_nr(current)); | 774 | cFYI(1, "Demultiplex PID: %d", task_pid_nr(current)); |
@@ -655,111 +783,65 @@ cifs_demultiplex_thread(void *p) | |||
655 | if (try_to_freeze()) | 783 | if (try_to_freeze()) |
656 | continue; | 784 | continue; |
657 | 785 | ||
658 | if (!allocate_buffers(&bigbuf, &smallbuf, | 786 | if (!allocate_buffers(server)) |
659 | sizeof(struct smb_hdr), isLargeBuf)) | ||
660 | continue; | 787 | continue; |
661 | 788 | ||
662 | isLargeBuf = false; | 789 | server->large_buf = false; |
663 | isMultiRsp = false; | 790 | smb_buffer = (struct smb_hdr *)server->smallbuf; |
664 | smb_buffer = (struct smb_hdr *)smallbuf; | 791 | buf = server->smallbuf; |
665 | buf = smallbuf; | ||
666 | iov.iov_base = buf; | ||
667 | iov.iov_len = 4; | ||
668 | smb_msg.msg_control = NULL; | ||
669 | smb_msg.msg_controllen = 0; | ||
670 | pdu_length = 4; /* enough to get RFC1001 header */ | 792 | pdu_length = 4; /* enough to get RFC1001 header */ |
671 | 793 | ||
672 | incomplete_rcv: | 794 | length = cifs_read_from_socket(server, buf, pdu_length); |
673 | if (echo_retries > 0 && server->tcpStatus == CifsGood && | 795 | if (length < 0) |
674 | time_after(jiffies, server->lstrp + | ||
675 | (echo_retries * SMB_ECHO_INTERVAL))) { | ||
676 | cERROR(1, "Server %s has not responded in %d seconds. " | ||
677 | "Reconnecting...", server->hostname, | ||
678 | (echo_retries * SMB_ECHO_INTERVAL / HZ)); | ||
679 | cifs_reconnect(server); | ||
680 | wake_up(&server->response_q); | ||
681 | continue; | ||
682 | } | ||
683 | |||
684 | rc = read_from_socket(server, &smb_msg, &iov, pdu_length, | ||
685 | &total_read, true /* header read */); | ||
686 | if (rc == 3) | ||
687 | goto incomplete_rcv; | ||
688 | else if (rc == 2) | ||
689 | break; | ||
690 | else if (rc == 1) | ||
691 | continue; | 796 | continue; |
797 | server->total_read = length; | ||
692 | 798 | ||
693 | /* | 799 | /* |
694 | * The right amount was read from socket - 4 bytes, | 800 | * The right amount was read from socket - 4 bytes, |
695 | * so we can now interpret the length field. | 801 | * so we can now interpret the length field. |
696 | */ | 802 | */ |
697 | |||
698 | /* | ||
699 | * Note that RFC 1001 length is big endian on the wire, | ||
700 | * but we convert it here so it is always manipulated | ||
701 | * as host byte order. | ||
702 | */ | ||
703 | pdu_length = be32_to_cpu(smb_buffer->smb_buf_length); | 803 | pdu_length = be32_to_cpu(smb_buffer->smb_buf_length); |
704 | 804 | ||
705 | cFYI(1, "rfc1002 length 0x%x", pdu_length+4); | 805 | cFYI(1, "RFC1002 header 0x%x", pdu_length); |
706 | if (!check_rfc1002_header(server, buf)) | 806 | if (!is_smb_response(server, buf[0])) |
707 | continue; | 807 | continue; |
708 | 808 | ||
709 | /* else length ok */ | 809 | /* make sure we have enough to get to the MID */ |
710 | if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { | 810 | if (pdu_length < sizeof(struct smb_hdr) - 1 - 4) { |
711 | isLargeBuf = true; | 811 | cERROR(1, "SMB response too short (%u bytes)", |
712 | memcpy(bigbuf, smallbuf, 4); | 812 | pdu_length); |
713 | smb_buffer = (struct smb_hdr *)bigbuf; | 813 | cifs_reconnect(server); |
714 | buf = bigbuf; | 814 | wake_up(&server->response_q); |
815 | continue; | ||
715 | } | 816 | } |
716 | 817 | ||
717 | iov.iov_base = 4 + buf; | 818 | /* read down to the MID */ |
718 | iov.iov_len = pdu_length; | 819 | length = cifs_read_from_socket(server, buf + 4, |
719 | rc = read_from_socket(server, &smb_msg, &iov, pdu_length, | 820 | sizeof(struct smb_hdr) - 1 - 4); |
720 | &total_read, false); | 821 | if (length < 0) |
721 | if (rc == 2) | ||
722 | break; | ||
723 | else if (rc == 1) | ||
724 | continue; | 822 | continue; |
823 | server->total_read += length; | ||
725 | 824 | ||
726 | total_read += 4; /* account for rfc1002 hdr */ | 825 | mid_entry = find_mid(server, smb_buffer); |
727 | 826 | ||
728 | dump_smb(smb_buffer, total_read); | 827 | if (!mid_entry || !mid_entry->receive) |
828 | length = standard_receive3(server, mid_entry); | ||
829 | else | ||
830 | length = mid_entry->receive(server, mid_entry); | ||
729 | 831 | ||
730 | /* | 832 | if (length < 0) |
731 | * We know that we received enough to get to the MID as we | 833 | continue; |
732 | * checked the pdu_length earlier. Now check to see | ||
733 | * if the rest of the header is OK. We borrow the length | ||
734 | * var for the rest of the loop to avoid a new stack var. | ||
735 | * | ||
736 | * 48 bytes is enough to display the header and a little bit | ||
737 | * into the payload for debugging purposes. | ||
738 | */ | ||
739 | length = checkSMB(smb_buffer, smb_buffer->Mid, total_read); | ||
740 | if (length != 0) | ||
741 | cifs_dump_mem("Bad SMB: ", buf, | ||
742 | min_t(unsigned int, total_read, 48)); | ||
743 | 834 | ||
744 | server->lstrp = jiffies; | 835 | if (server->large_buf) { |
836 | buf = server->bigbuf; | ||
837 | smb_buffer = (struct smb_hdr *)buf; | ||
838 | } | ||
745 | 839 | ||
746 | mid_entry = find_cifs_mid(server, smb_buffer, &length, | 840 | server->lstrp = jiffies; |
747 | isLargeBuf, &isMultiRsp, &bigbuf); | ||
748 | if (mid_entry != NULL) { | 841 | if (mid_entry != NULL) { |
749 | mid_entry->callback(mid_entry); | 842 | if (!mid_entry->multiRsp || mid_entry->multiEnd) |
750 | /* Was previous buf put in mpx struct for multi-rsp? */ | 843 | mid_entry->callback(mid_entry); |
751 | if (!isMultiRsp) { | 844 | } else if (!is_valid_oplock_break(smb_buffer, server)) { |
752 | /* smb buffer will be freed by user thread */ | ||
753 | if (isLargeBuf) | ||
754 | bigbuf = NULL; | ||
755 | else | ||
756 | smallbuf = NULL; | ||
757 | } | ||
758 | } else if (length != 0) { | ||
759 | /* response sanity checks failed */ | ||
760 | continue; | ||
761 | } else if (!is_valid_oplock_break(smb_buffer, server) && | ||
762 | !isMultiRsp) { | ||
763 | cERROR(1, "No task to wake, unknown frame received! " | 845 | cERROR(1, "No task to wake, unknown frame received! " |
764 | "NumMids %d", atomic_read(&midCount)); | 846 | "NumMids %d", atomic_read(&midCount)); |
765 | cifs_dump_mem("Received Data is: ", buf, | 847 | cifs_dump_mem("Received Data is: ", buf, |
@@ -773,9 +855,9 @@ incomplete_rcv: | |||
773 | } /* end while !EXITING */ | 855 | } /* end while !EXITING */ |
774 | 856 | ||
775 | /* buffer usually freed in free_mid - need to free it here on exit */ | 857 | /* buffer usually freed in free_mid - need to free it here on exit */ |
776 | cifs_buf_release(bigbuf); | 858 | cifs_buf_release(server->bigbuf); |
777 | if (smallbuf) /* no sense logging a debug message if NULL */ | 859 | if (server->smallbuf) /* no sense logging a debug message if NULL */ |
778 | cifs_small_buf_release(smallbuf); | 860 | cifs_small_buf_release(server->smallbuf); |
779 | 861 | ||
780 | task_to_wake = xchg(&server->tsk, NULL); | 862 | task_to_wake = xchg(&server->tsk, NULL); |
781 | clean_demultiplex_info(server); | 863 | clean_demultiplex_info(server); |
@@ -827,6 +909,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
827 | { | 909 | { |
828 | char *value, *data, *end; | 910 | char *value, *data, *end; |
829 | char *mountdata_copy = NULL, *options; | 911 | char *mountdata_copy = NULL, *options; |
912 | int err; | ||
830 | unsigned int temp_len, i, j; | 913 | unsigned int temp_len, i, j; |
831 | char separator[2]; | 914 | char separator[2]; |
832 | short int override_uid = -1; | 915 | short int override_uid = -1; |
@@ -883,6 +966,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
883 | cFYI(1, "Null separator not allowed"); | 966 | cFYI(1, "Null separator not allowed"); |
884 | } | 967 | } |
885 | } | 968 | } |
969 | vol->backupuid_specified = false; /* no backup intent for a user */ | ||
970 | vol->backupgid_specified = false; /* no backup intent for a group */ | ||
886 | 971 | ||
887 | while ((data = strsep(&options, separator)) != NULL) { | 972 | while ((data = strsep(&options, separator)) != NULL) { |
888 | if (!*data) | 973 | if (!*data) |
@@ -1298,7 +1383,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1298 | /* ignore */ | 1383 | /* ignore */ |
1299 | } else if (strnicmp(data, "guest", 5) == 0) { | 1384 | } else if (strnicmp(data, "guest", 5) == 0) { |
1300 | /* ignore */ | 1385 | /* ignore */ |
1301 | } else if (strnicmp(data, "rw", 2) == 0) { | 1386 | } else if (strnicmp(data, "rw", 2) == 0 && strlen(data) == 2) { |
1302 | /* ignore */ | 1387 | /* ignore */ |
1303 | } else if (strnicmp(data, "ro", 2) == 0) { | 1388 | } else if (strnicmp(data, "ro", 2) == 0) { |
1304 | /* ignore */ | 1389 | /* ignore */ |
@@ -1401,7 +1486,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1401 | vol->server_ino = 1; | 1486 | vol->server_ino = 1; |
1402 | } else if (strnicmp(data, "noserverino", 9) == 0) { | 1487 | } else if (strnicmp(data, "noserverino", 9) == 0) { |
1403 | vol->server_ino = 0; | 1488 | vol->server_ino = 0; |
1404 | } else if (strnicmp(data, "rwpidforward", 4) == 0) { | 1489 | } else if (strnicmp(data, "rwpidforward", 12) == 0) { |
1405 | vol->rwpidforward = 1; | 1490 | vol->rwpidforward = 1; |
1406 | } else if (strnicmp(data, "cifsacl", 7) == 0) { | 1491 | } else if (strnicmp(data, "cifsacl", 7) == 0) { |
1407 | vol->cifs_acl = 1; | 1492 | vol->cifs_acl = 1; |
@@ -1442,6 +1527,22 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1442 | vol->mfsymlinks = true; | 1527 | vol->mfsymlinks = true; |
1443 | } else if (strnicmp(data, "multiuser", 8) == 0) { | 1528 | } else if (strnicmp(data, "multiuser", 8) == 0) { |
1444 | vol->multiuser = true; | 1529 | vol->multiuser = true; |
1530 | } else if (!strnicmp(data, "backupuid", 9) && value && *value) { | ||
1531 | err = kstrtouint(value, 0, &vol->backupuid); | ||
1532 | if (err < 0) { | ||
1533 | cERROR(1, "%s: Invalid backupuid value", | ||
1534 | __func__); | ||
1535 | goto cifs_parse_mount_err; | ||
1536 | } | ||
1537 | vol->backupuid_specified = true; | ||
1538 | } else if (!strnicmp(data, "backupgid", 9) && value && *value) { | ||
1539 | err = kstrtouint(value, 0, &vol->backupgid); | ||
1540 | if (err < 0) { | ||
1541 | cERROR(1, "%s: Invalid backupgid value", | ||
1542 | __func__); | ||
1543 | goto cifs_parse_mount_err; | ||
1544 | } | ||
1545 | vol->backupgid_specified = true; | ||
1445 | } else | 1546 | } else |
1446 | printk(KERN_WARNING "CIFS: Unknown mount option %s\n", | 1547 | printk(KERN_WARNING "CIFS: Unknown mount option %s\n", |
1447 | data); | 1548 | data); |
@@ -2018,7 +2119,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
2018 | warned_on_ntlm = true; | 2119 | warned_on_ntlm = true; |
2019 | cERROR(1, "default security mechanism requested. The default " | 2120 | cERROR(1, "default security mechanism requested. The default " |
2020 | "security mechanism will be upgraded from ntlm to " | 2121 | "security mechanism will be upgraded from ntlm to " |
2021 | "ntlmv2 in kernel release 3.1"); | 2122 | "ntlmv2 in kernel release 3.2"); |
2022 | } | 2123 | } |
2023 | ses->overrideSecFlg = volume_info->secFlg; | 2124 | ses->overrideSecFlg = volume_info->secFlg; |
2024 | 2125 | ||
@@ -2209,16 +2310,16 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) | |||
2209 | (new->mnt_cifs_flags & CIFS_MOUNT_MASK)) | 2310 | (new->mnt_cifs_flags & CIFS_MOUNT_MASK)) |
2210 | return 0; | 2311 | return 0; |
2211 | 2312 | ||
2212 | if (old->rsize != new->rsize) | ||
2213 | return 0; | ||
2214 | |||
2215 | /* | 2313 | /* |
2216 | * We want to share sb only if we don't specify wsize or specified wsize | 2314 | * We want to share sb only if we don't specify an r/wsize or |
2217 | * is greater or equal than existing one. | 2315 | * specified r/wsize is greater than or equal to existing one. |
2218 | */ | 2316 | */ |
2219 | if (new->wsize && new->wsize < old->wsize) | 2317 | if (new->wsize && new->wsize < old->wsize) |
2220 | return 0; | 2318 | return 0; |
2221 | 2319 | ||
2320 | if (new->rsize && new->rsize < old->rsize) | ||
2321 | return 0; | ||
2322 | |||
2222 | if (old->mnt_uid != new->mnt_uid || old->mnt_gid != new->mnt_gid) | 2323 | if (old->mnt_uid != new->mnt_uid || old->mnt_gid != new->mnt_gid) |
2223 | return 0; | 2324 | return 0; |
2224 | 2325 | ||
@@ -2656,14 +2757,6 @@ void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, | |||
2656 | CIFS_MOUNT_POSIX_PATHS; | 2757 | CIFS_MOUNT_POSIX_PATHS; |
2657 | } | 2758 | } |
2658 | 2759 | ||
2659 | if (cifs_sb && (cifs_sb->rsize > 127 * 1024)) { | ||
2660 | if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { | ||
2661 | cifs_sb->rsize = 127 * 1024; | ||
2662 | cFYI(DBG2, "larger reads not supported by srv"); | ||
2663 | } | ||
2664 | } | ||
2665 | |||
2666 | |||
2667 | cFYI(1, "Negotiate caps 0x%x", (int)cap); | 2760 | cFYI(1, "Negotiate caps 0x%x", (int)cap); |
2668 | #ifdef CONFIG_CIFS_DEBUG2 | 2761 | #ifdef CONFIG_CIFS_DEBUG2 |
2669 | if (cap & CIFS_UNIX_FCNTL_CAP) | 2762 | if (cap & CIFS_UNIX_FCNTL_CAP) |
@@ -2708,31 +2801,19 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | |||
2708 | spin_lock_init(&cifs_sb->tlink_tree_lock); | 2801 | spin_lock_init(&cifs_sb->tlink_tree_lock); |
2709 | cifs_sb->tlink_tree = RB_ROOT; | 2802 | cifs_sb->tlink_tree = RB_ROOT; |
2710 | 2803 | ||
2711 | if (pvolume_info->rsize > CIFSMaxBufSize) { | ||
2712 | cERROR(1, "rsize %d too large, using MaxBufSize", | ||
2713 | pvolume_info->rsize); | ||
2714 | cifs_sb->rsize = CIFSMaxBufSize; | ||
2715 | } else if ((pvolume_info->rsize) && | ||
2716 | (pvolume_info->rsize <= CIFSMaxBufSize)) | ||
2717 | cifs_sb->rsize = pvolume_info->rsize; | ||
2718 | else /* default */ | ||
2719 | cifs_sb->rsize = CIFSMaxBufSize; | ||
2720 | |||
2721 | if (cifs_sb->rsize < 2048) { | ||
2722 | cifs_sb->rsize = 2048; | ||
2723 | /* Windows ME may prefer this */ | ||
2724 | cFYI(1, "readsize set to minimum: 2048"); | ||
2725 | } | ||
2726 | |||
2727 | /* | 2804 | /* |
2728 | * Temporarily set wsize for matching superblock. If we end up using | 2805 | * Temporarily set r/wsize for matching superblock. If we end up using |
2729 | * new sb then cifs_negotiate_wsize will later negotiate it downward | 2806 | * new sb then client will later negotiate it downward if needed. |
2730 | * if needed. | ||
2731 | */ | 2807 | */ |
2808 | cifs_sb->rsize = pvolume_info->rsize; | ||
2732 | cifs_sb->wsize = pvolume_info->wsize; | 2809 | cifs_sb->wsize = pvolume_info->wsize; |
2733 | 2810 | ||
2734 | cifs_sb->mnt_uid = pvolume_info->linux_uid; | 2811 | cifs_sb->mnt_uid = pvolume_info->linux_uid; |
2735 | cifs_sb->mnt_gid = pvolume_info->linux_gid; | 2812 | cifs_sb->mnt_gid = pvolume_info->linux_gid; |
2813 | if (pvolume_info->backupuid_specified) | ||
2814 | cifs_sb->mnt_backupuid = pvolume_info->backupuid; | ||
2815 | if (pvolume_info->backupgid_specified) | ||
2816 | cifs_sb->mnt_backupgid = pvolume_info->backupgid; | ||
2736 | cifs_sb->mnt_file_mode = pvolume_info->file_mode; | 2817 | cifs_sb->mnt_file_mode = pvolume_info->file_mode; |
2737 | cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; | 2818 | cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; |
2738 | cFYI(1, "file mode: 0x%x dir mode: 0x%x", | 2819 | cFYI(1, "file mode: 0x%x dir mode: 0x%x", |
@@ -2763,6 +2844,10 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | |||
2763 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD; | 2844 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD; |
2764 | if (pvolume_info->cifs_acl) | 2845 | if (pvolume_info->cifs_acl) |
2765 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; | 2846 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; |
2847 | if (pvolume_info->backupuid_specified) | ||
2848 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID; | ||
2849 | if (pvolume_info->backupgid_specified) | ||
2850 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID; | ||
2766 | if (pvolume_info->override_uid) | 2851 | if (pvolume_info->override_uid) |
2767 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; | 2852 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; |
2768 | if (pvolume_info->override_gid) | 2853 | if (pvolume_info->override_gid) |
@@ -2795,29 +2880,41 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | |||
2795 | } | 2880 | } |
2796 | 2881 | ||
2797 | /* | 2882 | /* |
2798 | * When the server supports very large writes via POSIX extensions, we can | 2883 | * When the server supports very large reads and writes via POSIX extensions, |
2799 | * allow up to 2^24-1, minus the size of a WRITE_AND_X header, not including | 2884 | * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not |
2800 | * the RFC1001 length. | 2885 | * including the RFC1001 length. |
2801 | * | 2886 | * |
2802 | * Note that this might make for "interesting" allocation problems during | 2887 | * Note that this might make for "interesting" allocation problems during |
2803 | * writeback however as we have to allocate an array of pointers for the | 2888 | * writeback however as we have to allocate an array of pointers for the |
2804 | * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. | 2889 | * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. |
2890 | * | ||
2891 | * For reads, there is a similar problem as we need to allocate an array | ||
2892 | * of kvecs to handle the receive, though that should only need to be done | ||
2893 | * once. | ||
2805 | */ | 2894 | */ |
2806 | #define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) | 2895 | #define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) |
2896 | #define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) | ||
2807 | 2897 | ||
2808 | /* | 2898 | /* |
2809 | * When the server doesn't allow large posix writes, only allow a wsize of | 2899 | * When the server doesn't allow large posix writes, only allow a rsize/wsize |
2810 | * 128k minus the size of the WRITE_AND_X header. That allows for a write up | 2900 | * of 2^17-1 minus the size of the call header. That allows for a read or |
2811 | * to the maximum size described by RFC1002. | 2901 | * write up to the maximum size described by RFC1002. |
2812 | */ | 2902 | */ |
2813 | #define CIFS_MAX_RFC1002_WSIZE (128 * 1024 - sizeof(WRITE_REQ) + 4) | 2903 | #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) |
2904 | #define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) | ||
2814 | 2905 | ||
2815 | /* | 2906 | /* |
2816 | * The default wsize is 1M. find_get_pages seems to return a maximum of 256 | 2907 | * The default wsize is 1M. find_get_pages seems to return a maximum of 256 |
2817 | * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill | 2908 | * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill |
2818 | * a single wsize request with a single call. | 2909 | * a single wsize request with a single call. |
2819 | */ | 2910 | */ |
2820 | #define CIFS_DEFAULT_WSIZE (1024 * 1024) | 2911 | #define CIFS_DEFAULT_IOSIZE (1024 * 1024) |
2912 | |||
2913 | /* | ||
2914 | * Windows only supports a max of 60k reads. Default to that when posix | ||
2915 | * extensions aren't in force. | ||
2916 | */ | ||
2917 | #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) | ||
2821 | 2918 | ||
2822 | static unsigned int | 2919 | static unsigned int |
2823 | cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) | 2920 | cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) |
@@ -2825,7 +2922,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) | |||
2825 | __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); | 2922 | __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); |
2826 | struct TCP_Server_Info *server = tcon->ses->server; | 2923 | struct TCP_Server_Info *server = tcon->ses->server; |
2827 | unsigned int wsize = pvolume_info->wsize ? pvolume_info->wsize : | 2924 | unsigned int wsize = pvolume_info->wsize ? pvolume_info->wsize : |
2828 | CIFS_DEFAULT_WSIZE; | 2925 | CIFS_DEFAULT_IOSIZE; |
2829 | 2926 | ||
2830 | /* can server support 24-bit write sizes? (via UNIX extensions) */ | 2927 | /* can server support 24-bit write sizes? (via UNIX extensions) */ |
2831 | if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) | 2928 | if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) |
@@ -2848,6 +2945,50 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) | |||
2848 | return wsize; | 2945 | return wsize; |
2849 | } | 2946 | } |
2850 | 2947 | ||
2948 | static unsigned int | ||
2949 | cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) | ||
2950 | { | ||
2951 | __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); | ||
2952 | struct TCP_Server_Info *server = tcon->ses->server; | ||
2953 | unsigned int rsize, defsize; | ||
2954 | |||
2955 | /* | ||
2956 | * Set default value... | ||
2957 | * | ||
2958 | * HACK alert! Ancient servers have very small buffers. Even though | ||
2959 | * MS-CIFS indicates that servers are only limited by the client's | ||
2960 | * bufsize for reads, testing against win98se shows that it throws | ||
2961 | * INVALID_PARAMETER errors if you try to request too large a read. | ||
2962 | * | ||
2963 | * If the server advertises a MaxBufferSize of less than one page, | ||
2964 | * assume that it also can't satisfy reads larger than that either. | ||
2965 | * | ||
2966 | * FIXME: Is there a better heuristic for this? | ||
2967 | */ | ||
2968 | if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) | ||
2969 | defsize = CIFS_DEFAULT_IOSIZE; | ||
2970 | else if (server->capabilities & CAP_LARGE_READ_X) | ||
2971 | defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; | ||
2972 | else if (server->maxBuf >= PAGE_CACHE_SIZE) | ||
2973 | defsize = CIFSMaxBufSize; | ||
2974 | else | ||
2975 | defsize = server->maxBuf - sizeof(READ_RSP); | ||
2976 | |||
2977 | rsize = pvolume_info->rsize ? pvolume_info->rsize : defsize; | ||
2978 | |||
2979 | /* | ||
2980 | * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to | ||
2981 | * the client's MaxBufferSize. | ||
2982 | */ | ||
2983 | if (!(server->capabilities & CAP_LARGE_READ_X)) | ||
2984 | rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); | ||
2985 | |||
2986 | /* hard limit of CIFS_MAX_RSIZE */ | ||
2987 | rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); | ||
2988 | |||
2989 | return rsize; | ||
2990 | } | ||
2991 | |||
2851 | static int | 2992 | static int |
2852 | is_path_accessible(int xid, struct cifs_tcon *tcon, | 2993 | is_path_accessible(int xid, struct cifs_tcon *tcon, |
2853 | struct cifs_sb_info *cifs_sb, const char *full_path) | 2994 | struct cifs_sb_info *cifs_sb, const char *full_path) |
@@ -2877,8 +3018,9 @@ cleanup_volume_info_contents(struct smb_vol *volume_info) | |||
2877 | { | 3018 | { |
2878 | kfree(volume_info->username); | 3019 | kfree(volume_info->username); |
2879 | kzfree(volume_info->password); | 3020 | kzfree(volume_info->password); |
3021 | if (volume_info->UNCip != volume_info->UNC + 2) | ||
3022 | kfree(volume_info->UNCip); | ||
2880 | kfree(volume_info->UNC); | 3023 | kfree(volume_info->UNC); |
2881 | kfree(volume_info->UNCip); | ||
2882 | kfree(volume_info->domainname); | 3024 | kfree(volume_info->domainname); |
2883 | kfree(volume_info->iocharset); | 3025 | kfree(volume_info->iocharset); |
2884 | kfree(volume_info->prepath); | 3026 | kfree(volume_info->prepath); |
@@ -3040,6 +3182,22 @@ cifs_get_volume_info(char *mount_data, const char *devname) | |||
3040 | return volume_info; | 3182 | return volume_info; |
3041 | } | 3183 | } |
3042 | 3184 | ||
3185 | /* make sure ra_pages is a multiple of rsize */ | ||
3186 | static inline unsigned int | ||
3187 | cifs_ra_pages(struct cifs_sb_info *cifs_sb) | ||
3188 | { | ||
3189 | unsigned int reads; | ||
3190 | unsigned int rsize_pages = cifs_sb->rsize / PAGE_CACHE_SIZE; | ||
3191 | |||
3192 | if (rsize_pages >= default_backing_dev_info.ra_pages) | ||
3193 | return default_backing_dev_info.ra_pages; | ||
3194 | else if (rsize_pages == 0) | ||
3195 | return rsize_pages; | ||
3196 | |||
3197 | reads = default_backing_dev_info.ra_pages / rsize_pages; | ||
3198 | return reads * rsize_pages; | ||
3199 | } | ||
3200 | |||
3043 | int | 3201 | int |
3044 | cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) | 3202 | cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) |
3045 | { | 3203 | { |
@@ -3058,8 +3216,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) | |||
3058 | if (rc) | 3216 | if (rc) |
3059 | return rc; | 3217 | return rc; |
3060 | 3218 | ||
3061 | cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; | ||
3062 | |||
3063 | #ifdef CONFIG_CIFS_DFS_UPCALL | 3219 | #ifdef CONFIG_CIFS_DFS_UPCALL |
3064 | try_mount_again: | 3220 | try_mount_again: |
3065 | /* cleanup activities if we're chasing a referral */ | 3221 | /* cleanup activities if we're chasing a referral */ |
@@ -3124,15 +3280,11 @@ try_mount_again: | |||
3124 | CIFSSMBQFSAttributeInfo(xid, tcon); | 3280 | CIFSSMBQFSAttributeInfo(xid, tcon); |
3125 | } | 3281 | } |
3126 | 3282 | ||
3127 | if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { | ||
3128 | cifs_sb->rsize = 1024 * 127; | ||
3129 | cFYI(DBG2, "no very large read support, rsize now 127K"); | ||
3130 | } | ||
3131 | if (!(tcon->ses->capabilities & CAP_LARGE_READ_X)) | ||
3132 | cifs_sb->rsize = min(cifs_sb->rsize, | ||
3133 | (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); | ||
3134 | |||
3135 | cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info); | 3283 | cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info); |
3284 | cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); | ||
3285 | |||
3286 | /* tune readahead according to rsize */ | ||
3287 | cifs_sb->bdi.ra_pages = cifs_ra_pages(cifs_sb); | ||
3136 | 3288 | ||
3137 | remote_path_check: | 3289 | remote_path_check: |
3138 | #ifdef CONFIG_CIFS_DFS_UPCALL | 3290 | #ifdef CONFIG_CIFS_DFS_UPCALL |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ae576fbb5142..d7eeb9d3ed6f 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -105,8 +105,8 @@ cifs_bp_rename_retry: | |||
105 | } | 105 | } |
106 | rcu_read_unlock(); | 106 | rcu_read_unlock(); |
107 | if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { | 107 | if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { |
108 | cERROR(1, "did not end path lookup where expected namelen is %d", | 108 | cFYI(1, "did not end path lookup where expected. namelen=%d " |
109 | namelen); | 109 | "dfsplen=%d", namelen, dfsplen); |
110 | /* presumably this is only possible if racing with a rename | 110 | /* presumably this is only possible if racing with a rename |
111 | of one of the parent directories (we can not lock the dentries | 111 | of one of the parent directories (we can not lock the dentries |
112 | above us to prevent this, but retrying should be harmless) */ | 112 | above us to prevent this, but retrying should be harmless) */ |
@@ -171,7 +171,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
171 | } | 171 | } |
172 | tcon = tlink_tcon(tlink); | 172 | tcon = tlink_tcon(tlink); |
173 | 173 | ||
174 | if (oplockEnabled) | 174 | if (enable_oplocks) |
175 | oplock = REQ_OPLOCK; | 175 | oplock = REQ_OPLOCK; |
176 | 176 | ||
177 | if (nd) | 177 | if (nd) |
@@ -244,6 +244,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
244 | if (!tcon->unix_ext && (mode & S_IWUGO) == 0) | 244 | if (!tcon->unix_ext && (mode & S_IWUGO) == 0) |
245 | create_options |= CREATE_OPTION_READONLY; | 245 | create_options |= CREATE_OPTION_READONLY; |
246 | 246 | ||
247 | if (backup_cred(cifs_sb)) | ||
248 | create_options |= CREATE_OPEN_BACKUP_INTENT; | ||
249 | |||
247 | if (tcon->ses->capabilities & CAP_NT_SMBS) | 250 | if (tcon->ses->capabilities & CAP_NT_SMBS) |
248 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, | 251 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, |
249 | desiredAccess, create_options, | 252 | desiredAccess, create_options, |
@@ -357,6 +360,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
357 | { | 360 | { |
358 | int rc = -EPERM; | 361 | int rc = -EPERM; |
359 | int xid; | 362 | int xid; |
363 | int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; | ||
360 | struct cifs_sb_info *cifs_sb; | 364 | struct cifs_sb_info *cifs_sb; |
361 | struct tcon_link *tlink; | 365 | struct tcon_link *tlink; |
362 | struct cifs_tcon *pTcon; | 366 | struct cifs_tcon *pTcon; |
@@ -431,9 +435,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
431 | return rc; | 435 | return rc; |
432 | } | 436 | } |
433 | 437 | ||
434 | /* FIXME: would WRITE_OWNER | WRITE_DAC be better? */ | 438 | if (backup_cred(cifs_sb)) |
439 | create_options |= CREATE_OPEN_BACKUP_INTENT; | ||
440 | |||
435 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_CREATE, | 441 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_CREATE, |
436 | GENERIC_WRITE, CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, | 442 | GENERIC_WRITE, create_options, |
437 | &fileHandle, &oplock, buf, cifs_sb->local_nls, | 443 | &fileHandle, &oplock, buf, cifs_sb->local_nls, |
438 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 444 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
439 | if (rc) | 445 | if (rc) |
@@ -642,8 +648,16 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) | |||
642 | if (direntry->d_inode) { | 648 | if (direntry->d_inode) { |
643 | if (cifs_revalidate_dentry(direntry)) | 649 | if (cifs_revalidate_dentry(direntry)) |
644 | return 0; | 650 | return 0; |
645 | else | 651 | else { |
652 | /* | ||
653 | * Forcibly invalidate automounting directory inodes | ||
654 | * (remote DFS directories) so to have them | ||
655 | * instantiated again for automount | ||
656 | */ | ||
657 | if (IS_AUTOMOUNT(direntry->d_inode)) | ||
658 | return 0; | ||
646 | return 1; | 659 | return 1; |
660 | } | ||
647 | } | 661 | } |
648 | 662 | ||
649 | /* | 663 | /* |
diff --git a/fs/cifs/export.c b/fs/cifs/export.c index 55d87ac52000..9c7ecdccf2f3 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c | |||
@@ -45,7 +45,7 @@ | |||
45 | #include "cifs_debug.h" | 45 | #include "cifs_debug.h" |
46 | #include "cifsfs.h" | 46 | #include "cifsfs.h" |
47 | 47 | ||
48 | #ifdef CIFS_NFSD_EXPORT | 48 | #ifdef CONFIG_CIFS_NFSD_EXPORT |
49 | static struct dentry *cifs_get_parent(struct dentry *dentry) | 49 | static struct dentry *cifs_get_parent(struct dentry *dentry) |
50 | { | 50 | { |
51 | /* BB need to add code here eventually to enable export via NFSD */ | 51 | /* BB need to add code here eventually to enable export via NFSD */ |
@@ -63,5 +63,5 @@ const struct export_operations cifs_export_ops = { | |||
63 | .encode_fs = */ | 63 | .encode_fs = */ |
64 | }; | 64 | }; |
65 | 65 | ||
66 | #endif /* CIFS_NFSD_EXPORT */ | 66 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ |
67 | 67 | ||
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9f41a10523a1..ea096ce5d4f7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/delay.h> | 32 | #include <linux/delay.h> |
33 | #include <linux/mount.h> | 33 | #include <linux/mount.h> |
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/swap.h> | ||
35 | #include <asm/div64.h> | 36 | #include <asm/div64.h> |
36 | #include "cifsfs.h" | 37 | #include "cifsfs.h" |
37 | #include "cifspdu.h" | 38 | #include "cifspdu.h" |
@@ -174,6 +175,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, | |||
174 | int rc; | 175 | int rc; |
175 | int desiredAccess; | 176 | int desiredAccess; |
176 | int disposition; | 177 | int disposition; |
178 | int create_options = CREATE_NOT_DIR; | ||
177 | FILE_ALL_INFO *buf; | 179 | FILE_ALL_INFO *buf; |
178 | 180 | ||
179 | desiredAccess = cifs_convert_flags(f_flags); | 181 | desiredAccess = cifs_convert_flags(f_flags); |
@@ -210,9 +212,12 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, | |||
210 | if (!buf) | 212 | if (!buf) |
211 | return -ENOMEM; | 213 | return -ENOMEM; |
212 | 214 | ||
215 | if (backup_cred(cifs_sb)) | ||
216 | create_options |= CREATE_OPEN_BACKUP_INTENT; | ||
217 | |||
213 | if (tcon->ses->capabilities & CAP_NT_SMBS) | 218 | if (tcon->ses->capabilities & CAP_NT_SMBS) |
214 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, | 219 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, |
215 | desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf, | 220 | desiredAccess, create_options, pnetfid, poplock, buf, |
216 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags | 221 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags |
217 | & CIFS_MOUNT_MAP_SPECIAL_CHR); | 222 | & CIFS_MOUNT_MAP_SPECIAL_CHR); |
218 | else | 223 | else |
@@ -258,8 +263,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file, | |||
258 | pCifsFile->invalidHandle = false; | 263 | pCifsFile->invalidHandle = false; |
259 | pCifsFile->tlink = cifs_get_tlink(tlink); | 264 | pCifsFile->tlink = cifs_get_tlink(tlink); |
260 | mutex_init(&pCifsFile->fh_mutex); | 265 | mutex_init(&pCifsFile->fh_mutex); |
261 | mutex_init(&pCifsFile->lock_mutex); | ||
262 | INIT_LIST_HEAD(&pCifsFile->llist); | ||
263 | INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); | 266 | INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); |
264 | 267 | ||
265 | spin_lock(&cifs_file_list_lock); | 268 | spin_lock(&cifs_file_list_lock); |
@@ -272,11 +275,14 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file, | |||
272 | spin_unlock(&cifs_file_list_lock); | 275 | spin_unlock(&cifs_file_list_lock); |
273 | 276 | ||
274 | cifs_set_oplock_level(pCifsInode, oplock); | 277 | cifs_set_oplock_level(pCifsInode, oplock); |
278 | pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll; | ||
275 | 279 | ||
276 | file->private_data = pCifsFile; | 280 | file->private_data = pCifsFile; |
277 | return pCifsFile; | 281 | return pCifsFile; |
278 | } | 282 | } |
279 | 283 | ||
284 | static void cifs_del_lock_waiters(struct cifsLockInfo *lock); | ||
285 | |||
280 | /* | 286 | /* |
281 | * Release a reference on the file private data. This may involve closing | 287 | * Release a reference on the file private data. This may involve closing |
282 | * the filehandle out on the server. Must be called without holding | 288 | * the filehandle out on the server. Must be called without holding |
@@ -327,12 +333,15 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
327 | /* Delete any outstanding lock records. We'll lose them when the file | 333 | /* Delete any outstanding lock records. We'll lose them when the file |
328 | * is closed anyway. | 334 | * is closed anyway. |
329 | */ | 335 | */ |
330 | mutex_lock(&cifs_file->lock_mutex); | 336 | mutex_lock(&cifsi->lock_mutex); |
331 | list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) { | 337 | list_for_each_entry_safe(li, tmp, &cifsi->llist, llist) { |
338 | if (li->netfid != cifs_file->netfid) | ||
339 | continue; | ||
332 | list_del(&li->llist); | 340 | list_del(&li->llist); |
341 | cifs_del_lock_waiters(li); | ||
333 | kfree(li); | 342 | kfree(li); |
334 | } | 343 | } |
335 | mutex_unlock(&cifs_file->lock_mutex); | 344 | mutex_unlock(&cifsi->lock_mutex); |
336 | 345 | ||
337 | cifs_put_tlink(cifs_file->tlink); | 346 | cifs_put_tlink(cifs_file->tlink); |
338 | dput(cifs_file->dentry); | 347 | dput(cifs_file->dentry); |
@@ -371,7 +380,7 @@ int cifs_open(struct inode *inode, struct file *file) | |||
371 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", | 380 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", |
372 | inode, file->f_flags, full_path); | 381 | inode, file->f_flags, full_path); |
373 | 382 | ||
374 | if (oplockEnabled) | 383 | if (enable_oplocks) |
375 | oplock = REQ_OPLOCK; | 384 | oplock = REQ_OPLOCK; |
376 | else | 385 | else |
377 | oplock = 0; | 386 | oplock = 0; |
@@ -465,6 +474,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) | |||
465 | char *full_path = NULL; | 474 | char *full_path = NULL; |
466 | int desiredAccess; | 475 | int desiredAccess; |
467 | int disposition = FILE_OPEN; | 476 | int disposition = FILE_OPEN; |
477 | int create_options = CREATE_NOT_DIR; | ||
468 | __u16 netfid; | 478 | __u16 netfid; |
469 | 479 | ||
470 | xid = GetXid(); | 480 | xid = GetXid(); |
@@ -495,7 +505,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) | |||
495 | cFYI(1, "inode = 0x%p file flags 0x%x for %s", | 505 | cFYI(1, "inode = 0x%p file flags 0x%x for %s", |
496 | inode, pCifsFile->f_flags, full_path); | 506 | inode, pCifsFile->f_flags, full_path); |
497 | 507 | ||
498 | if (oplockEnabled) | 508 | if (enable_oplocks) |
499 | oplock = REQ_OPLOCK; | 509 | oplock = REQ_OPLOCK; |
500 | else | 510 | else |
501 | oplock = 0; | 511 | oplock = 0; |
@@ -524,6 +534,9 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) | |||
524 | 534 | ||
525 | desiredAccess = cifs_convert_flags(pCifsFile->f_flags); | 535 | desiredAccess = cifs_convert_flags(pCifsFile->f_flags); |
526 | 536 | ||
537 | if (backup_cred(cifs_sb)) | ||
538 | create_options |= CREATE_OPEN_BACKUP_INTENT; | ||
539 | |||
527 | /* Can not refresh inode by passing in file_info buf to be returned | 540 | /* Can not refresh inode by passing in file_info buf to be returned |
528 | by SMBOpen and then calling get_inode_info with returned buf | 541 | by SMBOpen and then calling get_inode_info with returned buf |
529 | since file might have write behind data that needs to be flushed | 542 | since file might have write behind data that needs to be flushed |
@@ -531,7 +544,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) | |||
531 | that inode was not dirty locally we could do this */ | 544 | that inode was not dirty locally we could do this */ |
532 | 545 | ||
533 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess, | 546 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess, |
534 | CREATE_NOT_DIR, &netfid, &oplock, NULL, | 547 | create_options, &netfid, &oplock, NULL, |
535 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 548 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & |
536 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 549 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
537 | if (rc) { | 550 | if (rc) { |
@@ -631,219 +644,687 @@ int cifs_closedir(struct inode *inode, struct file *file) | |||
631 | return rc; | 644 | return rc; |
632 | } | 645 | } |
633 | 646 | ||
634 | static int store_file_lock(struct cifsFileInfo *fid, __u64 len, | 647 | static struct cifsLockInfo * |
635 | __u64 offset, __u8 lockType) | 648 | cifs_lock_init(__u64 len, __u64 offset, __u8 type, __u16 netfid) |
636 | { | 649 | { |
637 | struct cifsLockInfo *li = | 650 | struct cifsLockInfo *li = |
638 | kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); | 651 | kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); |
639 | if (li == NULL) | 652 | if (!li) |
640 | return -ENOMEM; | 653 | return li; |
654 | li->netfid = netfid; | ||
641 | li->offset = offset; | 655 | li->offset = offset; |
642 | li->length = len; | 656 | li->length = len; |
643 | li->type = lockType; | 657 | li->type = type; |
644 | mutex_lock(&fid->lock_mutex); | 658 | li->pid = current->tgid; |
645 | list_add(&li->llist, &fid->llist); | 659 | INIT_LIST_HEAD(&li->blist); |
646 | mutex_unlock(&fid->lock_mutex); | 660 | init_waitqueue_head(&li->block_q); |
661 | return li; | ||
662 | } | ||
663 | |||
664 | static void | ||
665 | cifs_del_lock_waiters(struct cifsLockInfo *lock) | ||
666 | { | ||
667 | struct cifsLockInfo *li, *tmp; | ||
668 | list_for_each_entry_safe(li, tmp, &lock->blist, blist) { | ||
669 | list_del_init(&li->blist); | ||
670 | wake_up(&li->block_q); | ||
671 | } | ||
672 | } | ||
673 | |||
674 | static bool | ||
675 | cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset, | ||
676 | __u64 length, __u8 type, __u16 netfid, | ||
677 | struct cifsLockInfo **conf_lock) | ||
678 | { | ||
679 | struct cifsLockInfo *li, *tmp; | ||
680 | |||
681 | list_for_each_entry_safe(li, tmp, &cinode->llist, llist) { | ||
682 | if (offset + length <= li->offset || | ||
683 | offset >= li->offset + li->length) | ||
684 | continue; | ||
685 | else if ((type & LOCKING_ANDX_SHARED_LOCK) && | ||
686 | ((netfid == li->netfid && current->tgid == li->pid) || | ||
687 | type == li->type)) | ||
688 | continue; | ||
689 | else { | ||
690 | *conf_lock = li; | ||
691 | return true; | ||
692 | } | ||
693 | } | ||
694 | return false; | ||
695 | } | ||
696 | |||
697 | static int | ||
698 | cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length, | ||
699 | __u8 type, __u16 netfid, struct file_lock *flock) | ||
700 | { | ||
701 | int rc = 0; | ||
702 | struct cifsLockInfo *conf_lock; | ||
703 | bool exist; | ||
704 | |||
705 | mutex_lock(&cinode->lock_mutex); | ||
706 | |||
707 | exist = cifs_find_lock_conflict(cinode, offset, length, type, netfid, | ||
708 | &conf_lock); | ||
709 | if (exist) { | ||
710 | flock->fl_start = conf_lock->offset; | ||
711 | flock->fl_end = conf_lock->offset + conf_lock->length - 1; | ||
712 | flock->fl_pid = conf_lock->pid; | ||
713 | if (conf_lock->type & LOCKING_ANDX_SHARED_LOCK) | ||
714 | flock->fl_type = F_RDLCK; | ||
715 | else | ||
716 | flock->fl_type = F_WRLCK; | ||
717 | } else if (!cinode->can_cache_brlcks) | ||
718 | rc = 1; | ||
719 | else | ||
720 | flock->fl_type = F_UNLCK; | ||
721 | |||
722 | mutex_unlock(&cinode->lock_mutex); | ||
723 | return rc; | ||
724 | } | ||
725 | |||
726 | static int | ||
727 | cifs_lock_add(struct cifsInodeInfo *cinode, __u64 len, __u64 offset, | ||
728 | __u8 type, __u16 netfid) | ||
729 | { | ||
730 | struct cifsLockInfo *li; | ||
731 | |||
732 | li = cifs_lock_init(len, offset, type, netfid); | ||
733 | if (!li) | ||
734 | return -ENOMEM; | ||
735 | |||
736 | mutex_lock(&cinode->lock_mutex); | ||
737 | list_add_tail(&li->llist, &cinode->llist); | ||
738 | mutex_unlock(&cinode->lock_mutex); | ||
647 | return 0; | 739 | return 0; |
648 | } | 740 | } |
649 | 741 | ||
650 | int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | 742 | static int |
743 | cifs_lock_add_if(struct cifsInodeInfo *cinode, __u64 offset, __u64 length, | ||
744 | __u8 type, __u16 netfid, bool wait) | ||
651 | { | 745 | { |
652 | int rc, xid; | 746 | struct cifsLockInfo *lock, *conf_lock; |
653 | __u32 numLock = 0; | 747 | bool exist; |
654 | __u32 numUnlock = 0; | 748 | int rc = 0; |
655 | __u64 length; | 749 | |
656 | bool wait_flag = false; | 750 | lock = cifs_lock_init(length, offset, type, netfid); |
657 | struct cifs_sb_info *cifs_sb; | 751 | if (!lock) |
752 | return -ENOMEM; | ||
753 | |||
754 | try_again: | ||
755 | exist = false; | ||
756 | mutex_lock(&cinode->lock_mutex); | ||
757 | |||
758 | exist = cifs_find_lock_conflict(cinode, offset, length, type, netfid, | ||
759 | &conf_lock); | ||
760 | if (!exist && cinode->can_cache_brlcks) { | ||
761 | list_add_tail(&lock->llist, &cinode->llist); | ||
762 | mutex_unlock(&cinode->lock_mutex); | ||
763 | return rc; | ||
764 | } | ||
765 | |||
766 | if (!exist) | ||
767 | rc = 1; | ||
768 | else if (!wait) | ||
769 | rc = -EACCES; | ||
770 | else { | ||
771 | list_add_tail(&lock->blist, &conf_lock->blist); | ||
772 | mutex_unlock(&cinode->lock_mutex); | ||
773 | rc = wait_event_interruptible(lock->block_q, | ||
774 | (lock->blist.prev == &lock->blist) && | ||
775 | (lock->blist.next == &lock->blist)); | ||
776 | if (!rc) | ||
777 | goto try_again; | ||
778 | else { | ||
779 | mutex_lock(&cinode->lock_mutex); | ||
780 | list_del_init(&lock->blist); | ||
781 | mutex_unlock(&cinode->lock_mutex); | ||
782 | } | ||
783 | } | ||
784 | |||
785 | kfree(lock); | ||
786 | mutex_unlock(&cinode->lock_mutex); | ||
787 | return rc; | ||
788 | } | ||
789 | |||
790 | static int | ||
791 | cifs_posix_lock_test(struct file *file, struct file_lock *flock) | ||
792 | { | ||
793 | int rc = 0; | ||
794 | struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); | ||
795 | unsigned char saved_type = flock->fl_type; | ||
796 | |||
797 | mutex_lock(&cinode->lock_mutex); | ||
798 | posix_test_lock(file, flock); | ||
799 | |||
800 | if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { | ||
801 | flock->fl_type = saved_type; | ||
802 | rc = 1; | ||
803 | } | ||
804 | |||
805 | mutex_unlock(&cinode->lock_mutex); | ||
806 | return rc; | ||
807 | } | ||
808 | |||
809 | static int | ||
810 | cifs_posix_lock_set(struct file *file, struct file_lock *flock) | ||
811 | { | ||
812 | struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); | ||
813 | int rc; | ||
814 | |||
815 | mutex_lock(&cinode->lock_mutex); | ||
816 | if (!cinode->can_cache_brlcks) { | ||
817 | mutex_unlock(&cinode->lock_mutex); | ||
818 | return 1; | ||
819 | } | ||
820 | rc = posix_lock_file_wait(file, flock); | ||
821 | mutex_unlock(&cinode->lock_mutex); | ||
822 | return rc; | ||
823 | } | ||
824 | |||
825 | static int | ||
826 | cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | ||
827 | { | ||
828 | int xid, rc = 0, stored_rc; | ||
829 | struct cifsLockInfo *li, *tmp; | ||
658 | struct cifs_tcon *tcon; | 830 | struct cifs_tcon *tcon; |
659 | __u16 netfid; | 831 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); |
660 | __u8 lockType = LOCKING_ANDX_LARGE_FILES; | 832 | unsigned int num, max_num; |
661 | bool posix_locking = 0; | 833 | LOCKING_ANDX_RANGE *buf, *cur; |
834 | int types[] = {LOCKING_ANDX_LARGE_FILES, | ||
835 | LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; | ||
836 | int i; | ||
837 | |||
838 | xid = GetXid(); | ||
839 | tcon = tlink_tcon(cfile->tlink); | ||
840 | |||
841 | mutex_lock(&cinode->lock_mutex); | ||
842 | if (!cinode->can_cache_brlcks) { | ||
843 | mutex_unlock(&cinode->lock_mutex); | ||
844 | FreeXid(xid); | ||
845 | return rc; | ||
846 | } | ||
847 | |||
848 | max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / | ||
849 | sizeof(LOCKING_ANDX_RANGE); | ||
850 | buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); | ||
851 | if (!buf) { | ||
852 | mutex_unlock(&cinode->lock_mutex); | ||
853 | FreeXid(xid); | ||
854 | return rc; | ||
855 | } | ||
856 | |||
857 | for (i = 0; i < 2; i++) { | ||
858 | cur = buf; | ||
859 | num = 0; | ||
860 | list_for_each_entry_safe(li, tmp, &cinode->llist, llist) { | ||
861 | if (li->type != types[i]) | ||
862 | continue; | ||
863 | cur->Pid = cpu_to_le16(li->pid); | ||
864 | cur->LengthLow = cpu_to_le32((u32)li->length); | ||
865 | cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); | ||
866 | cur->OffsetLow = cpu_to_le32((u32)li->offset); | ||
867 | cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); | ||
868 | if (++num == max_num) { | ||
869 | stored_rc = cifs_lockv(xid, tcon, cfile->netfid, | ||
870 | li->type, 0, num, buf); | ||
871 | if (stored_rc) | ||
872 | rc = stored_rc; | ||
873 | cur = buf; | ||
874 | num = 0; | ||
875 | } else | ||
876 | cur++; | ||
877 | } | ||
878 | |||
879 | if (num) { | ||
880 | stored_rc = cifs_lockv(xid, tcon, cfile->netfid, | ||
881 | types[i], 0, num, buf); | ||
882 | if (stored_rc) | ||
883 | rc = stored_rc; | ||
884 | } | ||
885 | } | ||
886 | |||
887 | cinode->can_cache_brlcks = false; | ||
888 | mutex_unlock(&cinode->lock_mutex); | ||
889 | |||
890 | kfree(buf); | ||
891 | FreeXid(xid); | ||
892 | return rc; | ||
893 | } | ||
894 | |||
895 | /* copied from fs/locks.c with a name change */ | ||
896 | #define cifs_for_each_lock(inode, lockp) \ | ||
897 | for (lockp = &inode->i_flock; *lockp != NULL; \ | ||
898 | lockp = &(*lockp)->fl_next) | ||
899 | |||
900 | static int | ||
901 | cifs_push_posix_locks(struct cifsFileInfo *cfile) | ||
902 | { | ||
903 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
904 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
905 | struct file_lock *flock, **before; | ||
906 | struct cifsLockInfo *lck, *tmp; | ||
907 | int rc = 0, xid, type; | ||
908 | __u64 length; | ||
909 | struct list_head locks_to_send; | ||
662 | 910 | ||
663 | length = 1 + pfLock->fl_end - pfLock->fl_start; | ||
664 | rc = -EACCES; | ||
665 | xid = GetXid(); | 911 | xid = GetXid(); |
666 | 912 | ||
667 | cFYI(1, "Lock parm: 0x%x flockflags: " | 913 | mutex_lock(&cinode->lock_mutex); |
668 | "0x%x flocktype: 0x%x start: %lld end: %lld", | 914 | if (!cinode->can_cache_brlcks) { |
669 | cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start, | 915 | mutex_unlock(&cinode->lock_mutex); |
670 | pfLock->fl_end); | 916 | FreeXid(xid); |
917 | return rc; | ||
918 | } | ||
919 | |||
920 | INIT_LIST_HEAD(&locks_to_send); | ||
671 | 921 | ||
672 | if (pfLock->fl_flags & FL_POSIX) | 922 | lock_flocks(); |
923 | cifs_for_each_lock(cfile->dentry->d_inode, before) { | ||
924 | flock = *before; | ||
925 | length = 1 + flock->fl_end - flock->fl_start; | ||
926 | if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) | ||
927 | type = CIFS_RDLCK; | ||
928 | else | ||
929 | type = CIFS_WRLCK; | ||
930 | |||
931 | lck = cifs_lock_init(length, flock->fl_start, type, | ||
932 | cfile->netfid); | ||
933 | if (!lck) { | ||
934 | rc = -ENOMEM; | ||
935 | goto send_locks; | ||
936 | } | ||
937 | lck->pid = flock->fl_pid; | ||
938 | |||
939 | list_add_tail(&lck->llist, &locks_to_send); | ||
940 | } | ||
941 | |||
942 | send_locks: | ||
943 | unlock_flocks(); | ||
944 | |||
945 | list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { | ||
946 | struct file_lock tmp_lock; | ||
947 | int stored_rc; | ||
948 | |||
949 | tmp_lock.fl_start = lck->offset; | ||
950 | stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, | ||
951 | 0, lck->length, &tmp_lock, | ||
952 | lck->type, 0); | ||
953 | if (stored_rc) | ||
954 | rc = stored_rc; | ||
955 | list_del(&lck->llist); | ||
956 | kfree(lck); | ||
957 | } | ||
958 | |||
959 | cinode->can_cache_brlcks = false; | ||
960 | mutex_unlock(&cinode->lock_mutex); | ||
961 | |||
962 | FreeXid(xid); | ||
963 | return rc; | ||
964 | } | ||
965 | |||
966 | static int | ||
967 | cifs_push_locks(struct cifsFileInfo *cfile) | ||
968 | { | ||
969 | struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); | ||
970 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
971 | |||
972 | if ((tcon->ses->capabilities & CAP_UNIX) && | ||
973 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && | ||
974 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | ||
975 | return cifs_push_posix_locks(cfile); | ||
976 | |||
977 | return cifs_push_mandatory_locks(cfile); | ||
978 | } | ||
979 | |||
980 | static void | ||
981 | cifs_read_flock(struct file_lock *flock, __u8 *type, int *lock, int *unlock, | ||
982 | bool *wait_flag) | ||
983 | { | ||
984 | if (flock->fl_flags & FL_POSIX) | ||
673 | cFYI(1, "Posix"); | 985 | cFYI(1, "Posix"); |
674 | if (pfLock->fl_flags & FL_FLOCK) | 986 | if (flock->fl_flags & FL_FLOCK) |
675 | cFYI(1, "Flock"); | 987 | cFYI(1, "Flock"); |
676 | if (pfLock->fl_flags & FL_SLEEP) { | 988 | if (flock->fl_flags & FL_SLEEP) { |
677 | cFYI(1, "Blocking lock"); | 989 | cFYI(1, "Blocking lock"); |
678 | wait_flag = true; | 990 | *wait_flag = true; |
679 | } | 991 | } |
680 | if (pfLock->fl_flags & FL_ACCESS) | 992 | if (flock->fl_flags & FL_ACCESS) |
681 | cFYI(1, "Process suspended by mandatory locking - " | 993 | cFYI(1, "Process suspended by mandatory locking - " |
682 | "not implemented yet"); | 994 | "not implemented yet"); |
683 | if (pfLock->fl_flags & FL_LEASE) | 995 | if (flock->fl_flags & FL_LEASE) |
684 | cFYI(1, "Lease on file - not implemented yet"); | 996 | cFYI(1, "Lease on file - not implemented yet"); |
685 | if (pfLock->fl_flags & | 997 | if (flock->fl_flags & |
686 | (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) | 998 | (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) |
687 | cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags); | 999 | cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags); |
688 | 1000 | ||
689 | if (pfLock->fl_type == F_WRLCK) { | 1001 | *type = LOCKING_ANDX_LARGE_FILES; |
1002 | if (flock->fl_type == F_WRLCK) { | ||
690 | cFYI(1, "F_WRLCK "); | 1003 | cFYI(1, "F_WRLCK "); |
691 | numLock = 1; | 1004 | *lock = 1; |
692 | } else if (pfLock->fl_type == F_UNLCK) { | 1005 | } else if (flock->fl_type == F_UNLCK) { |
693 | cFYI(1, "F_UNLCK"); | 1006 | cFYI(1, "F_UNLCK"); |
694 | numUnlock = 1; | 1007 | *unlock = 1; |
695 | /* Check if unlock includes more than | 1008 | /* Check if unlock includes more than one lock range */ |
696 | one lock range */ | 1009 | } else if (flock->fl_type == F_RDLCK) { |
697 | } else if (pfLock->fl_type == F_RDLCK) { | ||
698 | cFYI(1, "F_RDLCK"); | 1010 | cFYI(1, "F_RDLCK"); |
699 | lockType |= LOCKING_ANDX_SHARED_LOCK; | 1011 | *type |= LOCKING_ANDX_SHARED_LOCK; |
700 | numLock = 1; | 1012 | *lock = 1; |
701 | } else if (pfLock->fl_type == F_EXLCK) { | 1013 | } else if (flock->fl_type == F_EXLCK) { |
702 | cFYI(1, "F_EXLCK"); | 1014 | cFYI(1, "F_EXLCK"); |
703 | numLock = 1; | 1015 | *lock = 1; |
704 | } else if (pfLock->fl_type == F_SHLCK) { | 1016 | } else if (flock->fl_type == F_SHLCK) { |
705 | cFYI(1, "F_SHLCK"); | 1017 | cFYI(1, "F_SHLCK"); |
706 | lockType |= LOCKING_ANDX_SHARED_LOCK; | 1018 | *type |= LOCKING_ANDX_SHARED_LOCK; |
707 | numLock = 1; | 1019 | *lock = 1; |
708 | } else | 1020 | } else |
709 | cFYI(1, "Unknown type of lock"); | 1021 | cFYI(1, "Unknown type of lock"); |
1022 | } | ||
710 | 1023 | ||
711 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 1024 | static int |
712 | tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink); | 1025 | cifs_getlk(struct file *file, struct file_lock *flock, __u8 type, |
713 | netfid = ((struct cifsFileInfo *)file->private_data)->netfid; | 1026 | bool wait_flag, bool posix_lck, int xid) |
1027 | { | ||
1028 | int rc = 0; | ||
1029 | __u64 length = 1 + flock->fl_end - flock->fl_start; | ||
1030 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; | ||
1031 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
1032 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
1033 | __u16 netfid = cfile->netfid; | ||
714 | 1034 | ||
715 | if ((tcon->ses->capabilities & CAP_UNIX) && | 1035 | if (posix_lck) { |
716 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && | 1036 | int posix_lock_type; |
717 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | 1037 | |
718 | posix_locking = 1; | 1038 | rc = cifs_posix_lock_test(file, flock); |
719 | /* BB add code here to normalize offset and length to | 1039 | if (!rc) |
720 | account for negative length which we can not accept over the | ||
721 | wire */ | ||
722 | if (IS_GETLK(cmd)) { | ||
723 | if (posix_locking) { | ||
724 | int posix_lock_type; | ||
725 | if (lockType & LOCKING_ANDX_SHARED_LOCK) | ||
726 | posix_lock_type = CIFS_RDLCK; | ||
727 | else | ||
728 | posix_lock_type = CIFS_WRLCK; | ||
729 | rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */, | ||
730 | length, pfLock, posix_lock_type, | ||
731 | wait_flag); | ||
732 | FreeXid(xid); | ||
733 | return rc; | 1040 | return rc; |
734 | } | ||
735 | 1041 | ||
736 | /* BB we could chain these into one lock request BB */ | 1042 | if (type & LOCKING_ANDX_SHARED_LOCK) |
737 | rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start, | 1043 | posix_lock_type = CIFS_RDLCK; |
738 | 0, 1, lockType, 0 /* wait flag */, 0); | 1044 | else |
739 | if (rc == 0) { | 1045 | posix_lock_type = CIFS_WRLCK; |
740 | rc = CIFSSMBLock(xid, tcon, netfid, length, | 1046 | rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, |
741 | pfLock->fl_start, 1 /* numUnlock */ , | 1047 | 1 /* get */, length, flock, |
742 | 0 /* numLock */ , lockType, | 1048 | posix_lock_type, wait_flag); |
743 | 0 /* wait flag */, 0); | 1049 | return rc; |
744 | pfLock->fl_type = F_UNLCK; | 1050 | } |
745 | if (rc != 0) | ||
746 | cERROR(1, "Error unlocking previously locked " | ||
747 | "range %d during test of lock", rc); | ||
748 | rc = 0; | ||
749 | 1051 | ||
750 | } else { | 1052 | rc = cifs_lock_test(cinode, flock->fl_start, length, type, netfid, |
751 | /* if rc == ERR_SHARING_VIOLATION ? */ | 1053 | flock); |
752 | rc = 0; | 1054 | if (!rc) |
1055 | return rc; | ||
753 | 1056 | ||
754 | if (lockType & LOCKING_ANDX_SHARED_LOCK) { | 1057 | /* BB we could chain these into one lock request BB */ |
755 | pfLock->fl_type = F_WRLCK; | 1058 | rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, |
756 | } else { | 1059 | flock->fl_start, 0, 1, type, 0, 0); |
757 | rc = CIFSSMBLock(xid, tcon, netfid, length, | 1060 | if (rc == 0) { |
758 | pfLock->fl_start, 0, 1, | 1061 | rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, |
759 | lockType | LOCKING_ANDX_SHARED_LOCK, | 1062 | length, flock->fl_start, 1, 0, |
760 | 0 /* wait flag */, 0); | 1063 | type, 0, 0); |
761 | if (rc == 0) { | 1064 | flock->fl_type = F_UNLCK; |
762 | rc = CIFSSMBLock(xid, tcon, netfid, | 1065 | if (rc != 0) |
763 | length, pfLock->fl_start, 1, 0, | 1066 | cERROR(1, "Error unlocking previously locked " |
764 | lockType | | 1067 | "range %d during test of lock", rc); |
765 | LOCKING_ANDX_SHARED_LOCK, | 1068 | rc = 0; |
766 | 0 /* wait flag */, 0); | 1069 | return rc; |
767 | pfLock->fl_type = F_RDLCK; | 1070 | } |
768 | if (rc != 0) | ||
769 | cERROR(1, "Error unlocking " | ||
770 | "previously locked range %d " | ||
771 | "during test of lock", rc); | ||
772 | rc = 0; | ||
773 | } else { | ||
774 | pfLock->fl_type = F_WRLCK; | ||
775 | rc = 0; | ||
776 | } | ||
777 | } | ||
778 | } | ||
779 | 1071 | ||
780 | FreeXid(xid); | 1072 | if (type & LOCKING_ANDX_SHARED_LOCK) { |
1073 | flock->fl_type = F_WRLCK; | ||
1074 | rc = 0; | ||
781 | return rc; | 1075 | return rc; |
782 | } | 1076 | } |
783 | 1077 | ||
784 | if (!numLock && !numUnlock) { | 1078 | rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, |
785 | /* if no lock or unlock then nothing | 1079 | flock->fl_start, 0, 1, |
786 | to do since we do not know what it is */ | 1080 | type | LOCKING_ANDX_SHARED_LOCK, 0, 0); |
787 | FreeXid(xid); | 1081 | if (rc == 0) { |
788 | return -EOPNOTSUPP; | 1082 | rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, |
1083 | length, flock->fl_start, 1, 0, | ||
1084 | type | LOCKING_ANDX_SHARED_LOCK, | ||
1085 | 0, 0); | ||
1086 | flock->fl_type = F_RDLCK; | ||
1087 | if (rc != 0) | ||
1088 | cERROR(1, "Error unlocking previously locked " | ||
1089 | "range %d during test of lock", rc); | ||
1090 | } else | ||
1091 | flock->fl_type = F_WRLCK; | ||
1092 | |||
1093 | rc = 0; | ||
1094 | return rc; | ||
1095 | } | ||
1096 | |||
1097 | static void | ||
1098 | cifs_move_llist(struct list_head *source, struct list_head *dest) | ||
1099 | { | ||
1100 | struct list_head *li, *tmp; | ||
1101 | list_for_each_safe(li, tmp, source) | ||
1102 | list_move(li, dest); | ||
1103 | } | ||
1104 | |||
1105 | static void | ||
1106 | cifs_free_llist(struct list_head *llist) | ||
1107 | { | ||
1108 | struct cifsLockInfo *li, *tmp; | ||
1109 | list_for_each_entry_safe(li, tmp, llist, llist) { | ||
1110 | cifs_del_lock_waiters(li); | ||
1111 | list_del(&li->llist); | ||
1112 | kfree(li); | ||
789 | } | 1113 | } |
1114 | } | ||
1115 | |||
1116 | static int | ||
1117 | cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) | ||
1118 | { | ||
1119 | int rc = 0, stored_rc; | ||
1120 | int types[] = {LOCKING_ANDX_LARGE_FILES, | ||
1121 | LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; | ||
1122 | unsigned int i; | ||
1123 | unsigned int max_num, num; | ||
1124 | LOCKING_ANDX_RANGE *buf, *cur; | ||
1125 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
1126 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
1127 | struct cifsLockInfo *li, *tmp; | ||
1128 | __u64 length = 1 + flock->fl_end - flock->fl_start; | ||
1129 | struct list_head tmp_llist; | ||
1130 | |||
1131 | INIT_LIST_HEAD(&tmp_llist); | ||
1132 | |||
1133 | max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / | ||
1134 | sizeof(LOCKING_ANDX_RANGE); | ||
1135 | buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); | ||
1136 | if (!buf) | ||
1137 | return -ENOMEM; | ||
1138 | |||
1139 | mutex_lock(&cinode->lock_mutex); | ||
1140 | for (i = 0; i < 2; i++) { | ||
1141 | cur = buf; | ||
1142 | num = 0; | ||
1143 | list_for_each_entry_safe(li, tmp, &cinode->llist, llist) { | ||
1144 | if (flock->fl_start > li->offset || | ||
1145 | (flock->fl_start + length) < | ||
1146 | (li->offset + li->length)) | ||
1147 | continue; | ||
1148 | if (current->tgid != li->pid) | ||
1149 | continue; | ||
1150 | if (cfile->netfid != li->netfid) | ||
1151 | continue; | ||
1152 | if (types[i] != li->type) | ||
1153 | continue; | ||
1154 | if (!cinode->can_cache_brlcks) { | ||
1155 | cur->Pid = cpu_to_le16(li->pid); | ||
1156 | cur->LengthLow = cpu_to_le32((u32)li->length); | ||
1157 | cur->LengthHigh = | ||
1158 | cpu_to_le32((u32)(li->length>>32)); | ||
1159 | cur->OffsetLow = cpu_to_le32((u32)li->offset); | ||
1160 | cur->OffsetHigh = | ||
1161 | cpu_to_le32((u32)(li->offset>>32)); | ||
1162 | /* | ||
1163 | * We need to save a lock here to let us add | ||
1164 | * it again to the inode list if the unlock | ||
1165 | * range request fails on the server. | ||
1166 | */ | ||
1167 | list_move(&li->llist, &tmp_llist); | ||
1168 | if (++num == max_num) { | ||
1169 | stored_rc = cifs_lockv(xid, tcon, | ||
1170 | cfile->netfid, | ||
1171 | li->type, num, | ||
1172 | 0, buf); | ||
1173 | if (stored_rc) { | ||
1174 | /* | ||
1175 | * We failed on the unlock range | ||
1176 | * request - add all locks from | ||
1177 | * the tmp list to the head of | ||
1178 | * the inode list. | ||
1179 | */ | ||
1180 | cifs_move_llist(&tmp_llist, | ||
1181 | &cinode->llist); | ||
1182 | rc = stored_rc; | ||
1183 | } else | ||
1184 | /* | ||
1185 | * The unlock range request | ||
1186 | * succeed - free the tmp list. | ||
1187 | */ | ||
1188 | cifs_free_llist(&tmp_llist); | ||
1189 | cur = buf; | ||
1190 | num = 0; | ||
1191 | } else | ||
1192 | cur++; | ||
1193 | } else { | ||
1194 | /* | ||
1195 | * We can cache brlock requests - simply remove | ||
1196 | * a lock from the inode list. | ||
1197 | */ | ||
1198 | list_del(&li->llist); | ||
1199 | cifs_del_lock_waiters(li); | ||
1200 | kfree(li); | ||
1201 | } | ||
1202 | } | ||
1203 | if (num) { | ||
1204 | stored_rc = cifs_lockv(xid, tcon, cfile->netfid, | ||
1205 | types[i], num, 0, buf); | ||
1206 | if (stored_rc) { | ||
1207 | cifs_move_llist(&tmp_llist, &cinode->llist); | ||
1208 | rc = stored_rc; | ||
1209 | } else | ||
1210 | cifs_free_llist(&tmp_llist); | ||
1211 | } | ||
1212 | } | ||
1213 | |||
1214 | mutex_unlock(&cinode->lock_mutex); | ||
1215 | kfree(buf); | ||
1216 | return rc; | ||
1217 | } | ||
790 | 1218 | ||
791 | if (posix_locking) { | 1219 | static int |
1220 | cifs_setlk(struct file *file, struct file_lock *flock, __u8 type, | ||
1221 | bool wait_flag, bool posix_lck, int lock, int unlock, int xid) | ||
1222 | { | ||
1223 | int rc = 0; | ||
1224 | __u64 length = 1 + flock->fl_end - flock->fl_start; | ||
1225 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; | ||
1226 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
1227 | struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); | ||
1228 | __u16 netfid = cfile->netfid; | ||
1229 | |||
1230 | if (posix_lck) { | ||
792 | int posix_lock_type; | 1231 | int posix_lock_type; |
793 | if (lockType & LOCKING_ANDX_SHARED_LOCK) | 1232 | |
1233 | rc = cifs_posix_lock_set(file, flock); | ||
1234 | if (!rc || rc < 0) | ||
1235 | return rc; | ||
1236 | |||
1237 | if (type & LOCKING_ANDX_SHARED_LOCK) | ||
794 | posix_lock_type = CIFS_RDLCK; | 1238 | posix_lock_type = CIFS_RDLCK; |
795 | else | 1239 | else |
796 | posix_lock_type = CIFS_WRLCK; | 1240 | posix_lock_type = CIFS_WRLCK; |
797 | 1241 | ||
798 | if (numUnlock == 1) | 1242 | if (unlock == 1) |
799 | posix_lock_type = CIFS_UNLCK; | 1243 | posix_lock_type = CIFS_UNLCK; |
800 | 1244 | ||
801 | rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */, | 1245 | rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, |
802 | length, pfLock, posix_lock_type, | 1246 | 0 /* set */, length, flock, |
803 | wait_flag); | 1247 | posix_lock_type, wait_flag); |
804 | } else { | 1248 | goto out; |
805 | struct cifsFileInfo *fid = file->private_data; | 1249 | } |
806 | |||
807 | if (numLock) { | ||
808 | rc = CIFSSMBLock(xid, tcon, netfid, length, | ||
809 | pfLock->fl_start, 0, numLock, lockType, | ||
810 | wait_flag, 0); | ||
811 | 1250 | ||
812 | if (rc == 0) { | 1251 | if (lock) { |
813 | /* For Windows locks we must store them. */ | 1252 | rc = cifs_lock_add_if(cinode, flock->fl_start, length, |
814 | rc = store_file_lock(fid, length, | 1253 | type, netfid, wait_flag); |
815 | pfLock->fl_start, lockType); | 1254 | if (rc < 0) |
816 | } | 1255 | return rc; |
817 | } else if (numUnlock) { | 1256 | else if (!rc) |
818 | /* For each stored lock that this unlock overlaps | 1257 | goto out; |
819 | completely, unlock it. */ | ||
820 | int stored_rc = 0; | ||
821 | struct cifsLockInfo *li, *tmp; | ||
822 | 1258 | ||
823 | rc = 0; | 1259 | rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, |
824 | mutex_lock(&fid->lock_mutex); | 1260 | flock->fl_start, 0, 1, type, wait_flag, 0); |
825 | list_for_each_entry_safe(li, tmp, &fid->llist, llist) { | 1261 | if (rc == 0) { |
826 | if (pfLock->fl_start <= li->offset && | 1262 | /* For Windows locks we must store them. */ |
827 | (pfLock->fl_start + length) >= | 1263 | rc = cifs_lock_add(cinode, length, flock->fl_start, |
828 | (li->offset + li->length)) { | 1264 | type, netfid); |
829 | stored_rc = CIFSSMBLock(xid, tcon, | ||
830 | netfid, li->length, | ||
831 | li->offset, 1, 0, | ||
832 | li->type, false, 0); | ||
833 | if (stored_rc) | ||
834 | rc = stored_rc; | ||
835 | else { | ||
836 | list_del(&li->llist); | ||
837 | kfree(li); | ||
838 | } | ||
839 | } | ||
840 | } | ||
841 | mutex_unlock(&fid->lock_mutex); | ||
842 | } | 1265 | } |
1266 | } else if (unlock) | ||
1267 | rc = cifs_unlock_range(cfile, flock, xid); | ||
1268 | |||
1269 | out: | ||
1270 | if (flock->fl_flags & FL_POSIX) | ||
1271 | posix_lock_file_wait(file, flock); | ||
1272 | return rc; | ||
1273 | } | ||
1274 | |||
1275 | int cifs_lock(struct file *file, int cmd, struct file_lock *flock) | ||
1276 | { | ||
1277 | int rc, xid; | ||
1278 | int lock = 0, unlock = 0; | ||
1279 | bool wait_flag = false; | ||
1280 | bool posix_lck = false; | ||
1281 | struct cifs_sb_info *cifs_sb; | ||
1282 | struct cifs_tcon *tcon; | ||
1283 | struct cifsInodeInfo *cinode; | ||
1284 | struct cifsFileInfo *cfile; | ||
1285 | __u16 netfid; | ||
1286 | __u8 type; | ||
1287 | |||
1288 | rc = -EACCES; | ||
1289 | xid = GetXid(); | ||
1290 | |||
1291 | cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld " | ||
1292 | "end: %lld", cmd, flock->fl_flags, flock->fl_type, | ||
1293 | flock->fl_start, flock->fl_end); | ||
1294 | |||
1295 | cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag); | ||
1296 | |||
1297 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | ||
1298 | cfile = (struct cifsFileInfo *)file->private_data; | ||
1299 | tcon = tlink_tcon(cfile->tlink); | ||
1300 | netfid = cfile->netfid; | ||
1301 | cinode = CIFS_I(file->f_path.dentry->d_inode); | ||
1302 | |||
1303 | if ((tcon->ses->capabilities & CAP_UNIX) && | ||
1304 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && | ||
1305 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | ||
1306 | posix_lck = true; | ||
1307 | /* | ||
1308 | * BB add code here to normalize offset and length to account for | ||
1309 | * negative length which we can not accept over the wire. | ||
1310 | */ | ||
1311 | if (IS_GETLK(cmd)) { | ||
1312 | rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); | ||
1313 | FreeXid(xid); | ||
1314 | return rc; | ||
843 | } | 1315 | } |
844 | 1316 | ||
845 | if (pfLock->fl_flags & FL_POSIX) | 1317 | if (!lock && !unlock) { |
846 | posix_lock_file_wait(file, pfLock); | 1318 | /* |
1319 | * if no lock or unlock then nothing to do since we do not | ||
1320 | * know what it is | ||
1321 | */ | ||
1322 | FreeXid(xid); | ||
1323 | return -EOPNOTSUPP; | ||
1324 | } | ||
1325 | |||
1326 | rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, | ||
1327 | xid); | ||
847 | FreeXid(xid); | 1328 | FreeXid(xid); |
848 | return rc; | 1329 | return rc; |
849 | } | 1330 | } |
@@ -1714,6 +2195,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, | |||
1714 | struct smb_com_read_rsp *pSMBr; | 2195 | struct smb_com_read_rsp *pSMBr; |
1715 | struct cifs_io_parms io_parms; | 2196 | struct cifs_io_parms io_parms; |
1716 | char *read_data; | 2197 | char *read_data; |
2198 | unsigned int rsize; | ||
1717 | __u32 pid; | 2199 | __u32 pid; |
1718 | 2200 | ||
1719 | if (!nr_segs) | 2201 | if (!nr_segs) |
@@ -1726,6 +2208,9 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, | |||
1726 | xid = GetXid(); | 2208 | xid = GetXid(); |
1727 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 2209 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
1728 | 2210 | ||
2211 | /* FIXME: set up handlers for larger reads and/or convert to async */ | ||
2212 | rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize); | ||
2213 | |||
1729 | open_file = file->private_data; | 2214 | open_file = file->private_data; |
1730 | pTcon = tlink_tcon(open_file->tlink); | 2215 | pTcon = tlink_tcon(open_file->tlink); |
1731 | 2216 | ||
@@ -1738,7 +2223,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, | |||
1738 | cFYI(1, "attempting read on write only file instance"); | 2223 | cFYI(1, "attempting read on write only file instance"); |
1739 | 2224 | ||
1740 | for (total_read = 0; total_read < len; total_read += bytes_read) { | 2225 | for (total_read = 0; total_read < len; total_read += bytes_read) { |
1741 | cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); | 2226 | cur_len = min_t(const size_t, len - total_read, rsize); |
1742 | rc = -EAGAIN; | 2227 | rc = -EAGAIN; |
1743 | read_data = NULL; | 2228 | read_data = NULL; |
1744 | 2229 | ||
@@ -1830,6 +2315,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
1830 | unsigned int bytes_read = 0; | 2315 | unsigned int bytes_read = 0; |
1831 | unsigned int total_read; | 2316 | unsigned int total_read; |
1832 | unsigned int current_read_size; | 2317 | unsigned int current_read_size; |
2318 | unsigned int rsize; | ||
1833 | struct cifs_sb_info *cifs_sb; | 2319 | struct cifs_sb_info *cifs_sb; |
1834 | struct cifs_tcon *pTcon; | 2320 | struct cifs_tcon *pTcon; |
1835 | int xid; | 2321 | int xid; |
@@ -1842,6 +2328,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
1842 | xid = GetXid(); | 2328 | xid = GetXid(); |
1843 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 2329 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
1844 | 2330 | ||
2331 | /* FIXME: set up handlers for larger reads and/or convert to async */ | ||
2332 | rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize); | ||
2333 | |||
1845 | if (file->private_data == NULL) { | 2334 | if (file->private_data == NULL) { |
1846 | rc = -EBADF; | 2335 | rc = -EBADF; |
1847 | FreeXid(xid); | 2336 | FreeXid(xid); |
@@ -1861,14 +2350,14 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
1861 | for (total_read = 0, current_offset = read_data; | 2350 | for (total_read = 0, current_offset = read_data; |
1862 | read_size > total_read; | 2351 | read_size > total_read; |
1863 | total_read += bytes_read, current_offset += bytes_read) { | 2352 | total_read += bytes_read, current_offset += bytes_read) { |
1864 | current_read_size = min_t(const int, read_size - total_read, | 2353 | current_read_size = min_t(uint, read_size - total_read, rsize); |
1865 | cifs_sb->rsize); | 2354 | |
1866 | /* For windows me and 9x we do not want to request more | 2355 | /* For windows me and 9x we do not want to request more |
1867 | than it negotiated since it will refuse the read then */ | 2356 | than it negotiated since it will refuse the read then */ |
1868 | if ((pTcon->ses) && | 2357 | if ((pTcon->ses) && |
1869 | !(pTcon->ses->capabilities & CAP_LARGE_FILES)) { | 2358 | !(pTcon->ses->capabilities & CAP_LARGE_FILES)) { |
1870 | current_read_size = min_t(const int, current_read_size, | 2359 | current_read_size = min_t(uint, current_read_size, |
1871 | pTcon->ses->server->maxBuf - 128); | 2360 | CIFSMaxBufSize); |
1872 | } | 2361 | } |
1873 | rc = -EAGAIN; | 2362 | rc = -EAGAIN; |
1874 | while (rc == -EAGAIN) { | 2363 | while (rc == -EAGAIN) { |
@@ -1957,82 +2446,24 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
1957 | return rc; | 2446 | return rc; |
1958 | } | 2447 | } |
1959 | 2448 | ||
1960 | |||
1961 | static void cifs_copy_cache_pages(struct address_space *mapping, | ||
1962 | struct list_head *pages, int bytes_read, char *data) | ||
1963 | { | ||
1964 | struct page *page; | ||
1965 | char *target; | ||
1966 | |||
1967 | while (bytes_read > 0) { | ||
1968 | if (list_empty(pages)) | ||
1969 | break; | ||
1970 | |||
1971 | page = list_entry(pages->prev, struct page, lru); | ||
1972 | list_del(&page->lru); | ||
1973 | |||
1974 | if (add_to_page_cache_lru(page, mapping, page->index, | ||
1975 | GFP_KERNEL)) { | ||
1976 | page_cache_release(page); | ||
1977 | cFYI(1, "Add page cache failed"); | ||
1978 | data += PAGE_CACHE_SIZE; | ||
1979 | bytes_read -= PAGE_CACHE_SIZE; | ||
1980 | continue; | ||
1981 | } | ||
1982 | page_cache_release(page); | ||
1983 | |||
1984 | target = kmap_atomic(page, KM_USER0); | ||
1985 | |||
1986 | if (PAGE_CACHE_SIZE > bytes_read) { | ||
1987 | memcpy(target, data, bytes_read); | ||
1988 | /* zero the tail end of this partial page */ | ||
1989 | memset(target + bytes_read, 0, | ||
1990 | PAGE_CACHE_SIZE - bytes_read); | ||
1991 | bytes_read = 0; | ||
1992 | } else { | ||
1993 | memcpy(target, data, PAGE_CACHE_SIZE); | ||
1994 | bytes_read -= PAGE_CACHE_SIZE; | ||
1995 | } | ||
1996 | kunmap_atomic(target, KM_USER0); | ||
1997 | |||
1998 | flush_dcache_page(page); | ||
1999 | SetPageUptodate(page); | ||
2000 | unlock_page(page); | ||
2001 | data += PAGE_CACHE_SIZE; | ||
2002 | |||
2003 | /* add page to FS-Cache */ | ||
2004 | cifs_readpage_to_fscache(mapping->host, page); | ||
2005 | } | ||
2006 | return; | ||
2007 | } | ||
2008 | |||
2009 | static int cifs_readpages(struct file *file, struct address_space *mapping, | 2449 | static int cifs_readpages(struct file *file, struct address_space *mapping, |
2010 | struct list_head *page_list, unsigned num_pages) | 2450 | struct list_head *page_list, unsigned num_pages) |
2011 | { | 2451 | { |
2012 | int rc = -EACCES; | 2452 | int rc; |
2013 | int xid; | 2453 | struct list_head tmplist; |
2014 | loff_t offset; | 2454 | struct cifsFileInfo *open_file = file->private_data; |
2015 | struct page *page; | 2455 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
2016 | struct cifs_sb_info *cifs_sb; | 2456 | unsigned int rsize = cifs_sb->rsize; |
2017 | struct cifs_tcon *pTcon; | 2457 | pid_t pid; |
2018 | unsigned int bytes_read = 0; | ||
2019 | unsigned int read_size, i; | ||
2020 | char *smb_read_data = NULL; | ||
2021 | struct smb_com_read_rsp *pSMBr; | ||
2022 | struct cifsFileInfo *open_file; | ||
2023 | struct cifs_io_parms io_parms; | ||
2024 | int buf_type = CIFS_NO_BUFFER; | ||
2025 | __u32 pid; | ||
2026 | 2458 | ||
2027 | xid = GetXid(); | 2459 | /* |
2028 | if (file->private_data == NULL) { | 2460 | * Give up immediately if rsize is too small to read an entire page. |
2029 | rc = -EBADF; | 2461 | * The VFS will fall back to readpage. We should never reach this |
2030 | FreeXid(xid); | 2462 | * point however since we set ra_pages to 0 when the rsize is smaller |
2031 | return rc; | 2463 | * than a cache page. |
2032 | } | 2464 | */ |
2033 | open_file = file->private_data; | 2465 | if (unlikely(rsize < PAGE_CACHE_SIZE)) |
2034 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 2466 | return 0; |
2035 | pTcon = tlink_tcon(open_file->tlink); | ||
2036 | 2467 | ||
2037 | /* | 2468 | /* |
2038 | * Reads as many pages as possible from fscache. Returns -ENOBUFS | 2469 | * Reads as many pages as possible from fscache. Returns -ENOBUFS |
@@ -2041,125 +2472,127 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, | |||
2041 | rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, | 2472 | rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, |
2042 | &num_pages); | 2473 | &num_pages); |
2043 | if (rc == 0) | 2474 | if (rc == 0) |
2044 | goto read_complete; | 2475 | return rc; |
2045 | 2476 | ||
2046 | cFYI(DBG2, "rpages: num pages %d", num_pages); | ||
2047 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) | 2477 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) |
2048 | pid = open_file->pid; | 2478 | pid = open_file->pid; |
2049 | else | 2479 | else |
2050 | pid = current->tgid; | 2480 | pid = current->tgid; |
2051 | 2481 | ||
2052 | for (i = 0; i < num_pages; ) { | 2482 | rc = 0; |
2053 | unsigned contig_pages; | 2483 | INIT_LIST_HEAD(&tmplist); |
2054 | struct page *tmp_page; | ||
2055 | unsigned long expected_index; | ||
2056 | 2484 | ||
2057 | if (list_empty(page_list)) | 2485 | cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file, |
2058 | break; | 2486 | mapping, num_pages); |
2487 | |||
2488 | /* | ||
2489 | * Start with the page at end of list and move it to private | ||
2490 | * list. Do the same with any following pages until we hit | ||
2491 | * the rsize limit, hit an index discontinuity, or run out of | ||
2492 | * pages. Issue the async read and then start the loop again | ||
2493 | * until the list is empty. | ||
2494 | * | ||
2495 | * Note that list order is important. The page_list is in | ||
2496 | * the order of declining indexes. When we put the pages in | ||
2497 | * the rdata->pages, then we want them in increasing order. | ||
2498 | */ | ||
2499 | while (!list_empty(page_list)) { | ||
2500 | unsigned int bytes = PAGE_CACHE_SIZE; | ||
2501 | unsigned int expected_index; | ||
2502 | unsigned int nr_pages = 1; | ||
2503 | loff_t offset; | ||
2504 | struct page *page, *tpage; | ||
2505 | struct cifs_readdata *rdata; | ||
2059 | 2506 | ||
2060 | page = list_entry(page_list->prev, struct page, lru); | 2507 | page = list_entry(page_list->prev, struct page, lru); |
2508 | |||
2509 | /* | ||
2510 | * Lock the page and put it in the cache. Since no one else | ||
2511 | * should have access to this page, we're safe to simply set | ||
2512 | * PG_locked without checking it first. | ||
2513 | */ | ||
2514 | __set_page_locked(page); | ||
2515 | rc = add_to_page_cache_locked(page, mapping, | ||
2516 | page->index, GFP_KERNEL); | ||
2517 | |||
2518 | /* give up if we can't stick it in the cache */ | ||
2519 | if (rc) { | ||
2520 | __clear_page_locked(page); | ||
2521 | break; | ||
2522 | } | ||
2523 | |||
2524 | /* move first page to the tmplist */ | ||
2061 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | 2525 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; |
2526 | list_move_tail(&page->lru, &tmplist); | ||
2062 | 2527 | ||
2063 | /* count adjacent pages that we will read into */ | 2528 | /* now try and add more pages onto the request */ |
2064 | contig_pages = 0; | 2529 | expected_index = page->index + 1; |
2065 | expected_index = | 2530 | list_for_each_entry_safe_reverse(page, tpage, page_list, lru) { |
2066 | list_entry(page_list->prev, struct page, lru)->index; | 2531 | /* discontinuity ? */ |
2067 | list_for_each_entry_reverse(tmp_page, page_list, lru) { | 2532 | if (page->index != expected_index) |
2068 | if (tmp_page->index == expected_index) { | ||
2069 | contig_pages++; | ||
2070 | expected_index++; | ||
2071 | } else | ||
2072 | break; | 2533 | break; |
2534 | |||
2535 | /* would this page push the read over the rsize? */ | ||
2536 | if (bytes + PAGE_CACHE_SIZE > rsize) | ||
2537 | break; | ||
2538 | |||
2539 | __set_page_locked(page); | ||
2540 | if (add_to_page_cache_locked(page, mapping, | ||
2541 | page->index, GFP_KERNEL)) { | ||
2542 | __clear_page_locked(page); | ||
2543 | break; | ||
2544 | } | ||
2545 | list_move_tail(&page->lru, &tmplist); | ||
2546 | bytes += PAGE_CACHE_SIZE; | ||
2547 | expected_index++; | ||
2548 | nr_pages++; | ||
2073 | } | 2549 | } |
2074 | if (contig_pages + i > num_pages) | 2550 | |
2075 | contig_pages = num_pages - i; | 2551 | rdata = cifs_readdata_alloc(nr_pages); |
2076 | 2552 | if (!rdata) { | |
2077 | /* for reads over a certain size could initiate async | 2553 | /* best to give up if we're out of mem */ |
2078 | read ahead */ | 2554 | list_for_each_entry_safe(page, tpage, &tmplist, lru) { |
2079 | 2555 | list_del(&page->lru); | |
2080 | read_size = contig_pages * PAGE_CACHE_SIZE; | 2556 | lru_cache_add_file(page); |
2081 | /* Read size needs to be in multiples of one page */ | 2557 | unlock_page(page); |
2082 | read_size = min_t(const unsigned int, read_size, | 2558 | page_cache_release(page); |
2083 | cifs_sb->rsize & PAGE_CACHE_MASK); | 2559 | } |
2084 | cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d", | 2560 | rc = -ENOMEM; |
2085 | read_size, contig_pages); | 2561 | break; |
2086 | rc = -EAGAIN; | 2562 | } |
2087 | while (rc == -EAGAIN) { | 2563 | |
2564 | spin_lock(&cifs_file_list_lock); | ||
2565 | cifsFileInfo_get(open_file); | ||
2566 | spin_unlock(&cifs_file_list_lock); | ||
2567 | rdata->cfile = open_file; | ||
2568 | rdata->mapping = mapping; | ||
2569 | rdata->offset = offset; | ||
2570 | rdata->bytes = bytes; | ||
2571 | rdata->pid = pid; | ||
2572 | list_splice_init(&tmplist, &rdata->pages); | ||
2573 | |||
2574 | do { | ||
2088 | if (open_file->invalidHandle) { | 2575 | if (open_file->invalidHandle) { |
2089 | rc = cifs_reopen_file(open_file, true); | 2576 | rc = cifs_reopen_file(open_file, true); |
2090 | if (rc != 0) | 2577 | if (rc != 0) |
2091 | break; | 2578 | continue; |
2092 | } | 2579 | } |
2093 | io_parms.netfid = open_file->netfid; | 2580 | rc = cifs_async_readv(rdata); |
2094 | io_parms.pid = pid; | 2581 | } while (rc == -EAGAIN); |
2095 | io_parms.tcon = pTcon; | ||
2096 | io_parms.offset = offset; | ||
2097 | io_parms.length = read_size; | ||
2098 | rc = CIFSSMBRead(xid, &io_parms, &bytes_read, | ||
2099 | &smb_read_data, &buf_type); | ||
2100 | /* BB more RC checks ? */ | ||
2101 | if (rc == -EAGAIN) { | ||
2102 | if (smb_read_data) { | ||
2103 | if (buf_type == CIFS_SMALL_BUFFER) | ||
2104 | cifs_small_buf_release(smb_read_data); | ||
2105 | else if (buf_type == CIFS_LARGE_BUFFER) | ||
2106 | cifs_buf_release(smb_read_data); | ||
2107 | smb_read_data = NULL; | ||
2108 | } | ||
2109 | } | ||
2110 | } | ||
2111 | if ((rc < 0) || (smb_read_data == NULL)) { | ||
2112 | cFYI(1, "Read error in readpages: %d", rc); | ||
2113 | break; | ||
2114 | } else if (bytes_read > 0) { | ||
2115 | task_io_account_read(bytes_read); | ||
2116 | pSMBr = (struct smb_com_read_rsp *)smb_read_data; | ||
2117 | cifs_copy_cache_pages(mapping, page_list, bytes_read, | ||
2118 | smb_read_data + 4 /* RFC1001 hdr */ + | ||
2119 | le16_to_cpu(pSMBr->DataOffset)); | ||
2120 | |||
2121 | i += bytes_read >> PAGE_CACHE_SHIFT; | ||
2122 | cifs_stats_bytes_read(pTcon, bytes_read); | ||
2123 | if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) { | ||
2124 | i++; /* account for partial page */ | ||
2125 | |||
2126 | /* server copy of file can have smaller size | ||
2127 | than client */ | ||
2128 | /* BB do we need to verify this common case ? | ||
2129 | this case is ok - if we are at server EOF | ||
2130 | we will hit it on next read */ | ||
2131 | 2582 | ||
2132 | /* break; */ | 2583 | if (rc != 0) { |
2584 | list_for_each_entry_safe(page, tpage, &rdata->pages, | ||
2585 | lru) { | ||
2586 | list_del(&page->lru); | ||
2587 | lru_cache_add_file(page); | ||
2588 | unlock_page(page); | ||
2589 | page_cache_release(page); | ||
2133 | } | 2590 | } |
2134 | } else { | 2591 | cifs_readdata_free(rdata); |
2135 | cFYI(1, "No bytes read (%d) at offset %lld . " | ||
2136 | "Cleaning remaining pages from readahead list", | ||
2137 | bytes_read, offset); | ||
2138 | /* BB turn off caching and do new lookup on | ||
2139 | file size at server? */ | ||
2140 | break; | 2592 | break; |
2141 | } | 2593 | } |
2142 | if (smb_read_data) { | ||
2143 | if (buf_type == CIFS_SMALL_BUFFER) | ||
2144 | cifs_small_buf_release(smb_read_data); | ||
2145 | else if (buf_type == CIFS_LARGE_BUFFER) | ||
2146 | cifs_buf_release(smb_read_data); | ||
2147 | smb_read_data = NULL; | ||
2148 | } | ||
2149 | bytes_read = 0; | ||
2150 | } | 2594 | } |
2151 | 2595 | ||
2152 | /* need to free smb_read_data buf before exit */ | ||
2153 | if (smb_read_data) { | ||
2154 | if (buf_type == CIFS_SMALL_BUFFER) | ||
2155 | cifs_small_buf_release(smb_read_data); | ||
2156 | else if (buf_type == CIFS_LARGE_BUFFER) | ||
2157 | cifs_buf_release(smb_read_data); | ||
2158 | smb_read_data = NULL; | ||
2159 | } | ||
2160 | |||
2161 | read_complete: | ||
2162 | FreeXid(xid); | ||
2163 | return rc; | 2596 | return rc; |
2164 | } | 2597 | } |
2165 | 2598 | ||
@@ -2408,6 +2841,10 @@ void cifs_oplock_break(struct work_struct *work) | |||
2408 | cFYI(1, "Oplock flush inode %p rc %d", inode, rc); | 2841 | cFYI(1, "Oplock flush inode %p rc %d", inode, rc); |
2409 | } | 2842 | } |
2410 | 2843 | ||
2844 | rc = cifs_push_locks(cfile); | ||
2845 | if (rc) | ||
2846 | cERROR(1, "Push locks rc = %d", rc); | ||
2847 | |||
2411 | /* | 2848 | /* |
2412 | * releasing stale oplock after recent reconnect of smb session using | 2849 | * releasing stale oplock after recent reconnect of smb session using |
2413 | * a now incorrect file handle is not a data integrity issue but do | 2850 | * a now incorrect file handle is not a data integrity issue but do |
@@ -2415,8 +2852,9 @@ void cifs_oplock_break(struct work_struct *work) | |||
2415 | * disconnected since oplock already released by the server | 2852 | * disconnected since oplock already released by the server |
2416 | */ | 2853 | */ |
2417 | if (!cfile->oplock_break_cancelled) { | 2854 | if (!cfile->oplock_break_cancelled) { |
2418 | rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, 0, | 2855 | rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, |
2419 | 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false, | 2856 | current->tgid, 0, 0, 0, 0, |
2857 | LOCKING_ANDX_OPLOCK_RELEASE, false, | ||
2420 | cinode->clientCanCacheRead ? 1 : 0); | 2858 | cinode->clientCanCacheRead ? 1 : 0); |
2421 | cFYI(1, "Oplock release rc = %d", rc); | 2859 | cFYI(1, "Oplock release rc = %d", rc); |
2422 | } | 2860 | } |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a7b2dcd4a53e..2c50bd2f65d1 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -562,7 +562,16 @@ int cifs_get_file_info(struct file *filp) | |||
562 | 562 | ||
563 | xid = GetXid(); | 563 | xid = GetXid(); |
564 | rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); | 564 | rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); |
565 | if (rc == -EOPNOTSUPP || rc == -EINVAL) { | 565 | switch (rc) { |
566 | case 0: | ||
567 | cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); | ||
568 | break; | ||
569 | case -EREMOTE: | ||
570 | cifs_create_dfs_fattr(&fattr, inode->i_sb); | ||
571 | rc = 0; | ||
572 | break; | ||
573 | case -EOPNOTSUPP: | ||
574 | case -EINVAL: | ||
566 | /* | 575 | /* |
567 | * FIXME: legacy server -- fall back to path-based call? | 576 | * FIXME: legacy server -- fall back to path-based call? |
568 | * for now, just skip revalidating and mark inode for | 577 | * for now, just skip revalidating and mark inode for |
@@ -570,18 +579,14 @@ int cifs_get_file_info(struct file *filp) | |||
570 | */ | 579 | */ |
571 | rc = 0; | 580 | rc = 0; |
572 | CIFS_I(inode)->time = 0; | 581 | CIFS_I(inode)->time = 0; |
582 | default: | ||
573 | goto cgfi_exit; | 583 | goto cgfi_exit; |
574 | } else if (rc == -EREMOTE) { | 584 | } |
575 | cifs_create_dfs_fattr(&fattr, inode->i_sb); | ||
576 | rc = 0; | ||
577 | } else if (rc) | ||
578 | goto cgfi_exit; | ||
579 | 585 | ||
580 | /* | 586 | /* |
581 | * don't bother with SFU junk here -- just mark inode as needing | 587 | * don't bother with SFU junk here -- just mark inode as needing |
582 | * revalidation. | 588 | * revalidation. |
583 | */ | 589 | */ |
584 | cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); | ||
585 | fattr.cf_uniqueid = CIFS_I(inode)->uniqueid; | 590 | fattr.cf_uniqueid = CIFS_I(inode)->uniqueid; |
586 | fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; | 591 | fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; |
587 | cifs_fattr_to_inode(inode, &fattr); | 592 | cifs_fattr_to_inode(inode, &fattr); |
@@ -2096,6 +2101,8 @@ static int | |||
2096 | cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) | 2101 | cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) |
2097 | { | 2102 | { |
2098 | int xid; | 2103 | int xid; |
2104 | uid_t uid = NO_CHANGE_32; | ||
2105 | gid_t gid = NO_CHANGE_32; | ||
2099 | struct inode *inode = direntry->d_inode; | 2106 | struct inode *inode = direntry->d_inode; |
2100 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 2107 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
2101 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | 2108 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
@@ -2146,13 +2153,25 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) | |||
2146 | goto cifs_setattr_exit; | 2153 | goto cifs_setattr_exit; |
2147 | } | 2154 | } |
2148 | 2155 | ||
2149 | /* | 2156 | if (attrs->ia_valid & ATTR_UID) |
2150 | * Without unix extensions we can't send ownership changes to the | 2157 | uid = attrs->ia_uid; |
2151 | * server, so silently ignore them. This is consistent with how | 2158 | |
2152 | * local DOS/Windows filesystems behave (VFAT, NTFS, etc). With | 2159 | if (attrs->ia_valid & ATTR_GID) |
2153 | * CIFSACL support + proper Windows to Unix idmapping, we may be | 2160 | gid = attrs->ia_gid; |
2154 | * able to support this in the future. | 2161 | |
2155 | */ | 2162 | #ifdef CONFIG_CIFS_ACL |
2163 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { | ||
2164 | if (uid != NO_CHANGE_32 || gid != NO_CHANGE_32) { | ||
2165 | rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64, | ||
2166 | uid, gid); | ||
2167 | if (rc) { | ||
2168 | cFYI(1, "%s: Setting id failed with error: %d", | ||
2169 | __func__, rc); | ||
2170 | goto cifs_setattr_exit; | ||
2171 | } | ||
2172 | } | ||
2173 | } else | ||
2174 | #endif /* CONFIG_CIFS_ACL */ | ||
2156 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) | 2175 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) |
2157 | attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); | 2176 | attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); |
2158 | 2177 | ||
@@ -2161,15 +2180,12 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) | |||
2161 | attrs->ia_valid &= ~ATTR_MODE; | 2180 | attrs->ia_valid &= ~ATTR_MODE; |
2162 | 2181 | ||
2163 | if (attrs->ia_valid & ATTR_MODE) { | 2182 | if (attrs->ia_valid & ATTR_MODE) { |
2164 | cFYI(1, "Mode changed to 0%o", attrs->ia_mode); | ||
2165 | mode = attrs->ia_mode; | 2183 | mode = attrs->ia_mode; |
2166 | } | ||
2167 | |||
2168 | if (attrs->ia_valid & ATTR_MODE) { | ||
2169 | rc = 0; | 2184 | rc = 0; |
2170 | #ifdef CONFIG_CIFS_ACL | 2185 | #ifdef CONFIG_CIFS_ACL |
2171 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { | 2186 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { |
2172 | rc = mode_to_cifs_acl(inode, full_path, mode); | 2187 | rc = id_mode_to_cifs_acl(inode, full_path, mode, |
2188 | NO_CHANGE_32, NO_CHANGE_32); | ||
2173 | if (rc) { | 2189 | if (rc) { |
2174 | cFYI(1, "%s: Setting ACL failed with error: %d", | 2190 | cFYI(1, "%s: Setting ACL failed with error: %d", |
2175 | __func__, rc); | 2191 | __func__, rc); |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index db3f18cdf024..8693b5d0e180 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -183,14 +183,20 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) | |||
183 | static int | 183 | static int |
184 | CIFSCreateMFSymLink(const int xid, struct cifs_tcon *tcon, | 184 | CIFSCreateMFSymLink(const int xid, struct cifs_tcon *tcon, |
185 | const char *fromName, const char *toName, | 185 | const char *fromName, const char *toName, |
186 | const struct nls_table *nls_codepage, int remap) | 186 | struct cifs_sb_info *cifs_sb) |
187 | { | 187 | { |
188 | int rc; | 188 | int rc; |
189 | int oplock = 0; | 189 | int oplock = 0; |
190 | int remap; | ||
191 | int create_options = CREATE_NOT_DIR; | ||
190 | __u16 netfid = 0; | 192 | __u16 netfid = 0; |
191 | u8 *buf; | 193 | u8 *buf; |
192 | unsigned int bytes_written = 0; | 194 | unsigned int bytes_written = 0; |
193 | struct cifs_io_parms io_parms; | 195 | struct cifs_io_parms io_parms; |
196 | struct nls_table *nls_codepage; | ||
197 | |||
198 | nls_codepage = cifs_sb->local_nls; | ||
199 | remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; | ||
194 | 200 | ||
195 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); | 201 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); |
196 | if (!buf) | 202 | if (!buf) |
@@ -202,8 +208,11 @@ CIFSCreateMFSymLink(const int xid, struct cifs_tcon *tcon, | |||
202 | return rc; | 208 | return rc; |
203 | } | 209 | } |
204 | 210 | ||
211 | if (backup_cred(cifs_sb)) | ||
212 | create_options |= CREATE_OPEN_BACKUP_INTENT; | ||
213 | |||
205 | rc = CIFSSMBOpen(xid, tcon, fromName, FILE_CREATE, GENERIC_WRITE, | 214 | rc = CIFSSMBOpen(xid, tcon, fromName, FILE_CREATE, GENERIC_WRITE, |
206 | CREATE_NOT_DIR, &netfid, &oplock, NULL, | 215 | create_options, &netfid, &oplock, NULL, |
207 | nls_codepage, remap); | 216 | nls_codepage, remap); |
208 | if (rc != 0) { | 217 | if (rc != 0) { |
209 | kfree(buf); | 218 | kfree(buf); |
@@ -559,9 +568,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) | |||
559 | /* BB what if DFS and this volume is on different share? BB */ | 568 | /* BB what if DFS and this volume is on different share? BB */ |
560 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) | 569 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) |
561 | rc = CIFSCreateMFSymLink(xid, pTcon, full_path, symname, | 570 | rc = CIFSCreateMFSymLink(xid, pTcon, full_path, symname, |
562 | cifs_sb->local_nls, | 571 | cifs_sb); |
563 | cifs_sb->mnt_cifs_flags & | ||
564 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
565 | else if (pTcon->unix_ext) | 572 | else if (pTcon->unix_ext) |
566 | rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname, | 573 | rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname, |
567 | cifs_sb->local_nls); | 574 | cifs_sb->local_nls); |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 7c1693392598..703ef5c6fdb1 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -420,19 +420,22 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid) | |||
420 | } | 420 | } |
421 | 421 | ||
422 | int | 422 | int |
423 | checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) | 423 | checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int total_read) |
424 | { | 424 | { |
425 | __u32 len = be32_to_cpu(smb->smb_buf_length); | 425 | __u32 rfclen = be32_to_cpu(smb->smb_buf_length); |
426 | __u32 clc_len; /* calculated length */ | 426 | __u32 clc_len; /* calculated length */ |
427 | cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len); | 427 | cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", |
428 | total_read, rfclen); | ||
428 | 429 | ||
429 | if (length < 2 + sizeof(struct smb_hdr)) { | 430 | /* is this frame too small to even get to a BCC? */ |
430 | if ((length >= sizeof(struct smb_hdr) - 1) | 431 | if (total_read < 2 + sizeof(struct smb_hdr)) { |
432 | if ((total_read >= sizeof(struct smb_hdr) - 1) | ||
431 | && (smb->Status.CifsError != 0)) { | 433 | && (smb->Status.CifsError != 0)) { |
434 | /* it's an error return */ | ||
432 | smb->WordCount = 0; | 435 | smb->WordCount = 0; |
433 | /* some error cases do not return wct and bcc */ | 436 | /* some error cases do not return wct and bcc */ |
434 | return 0; | 437 | return 0; |
435 | } else if ((length == sizeof(struct smb_hdr) + 1) && | 438 | } else if ((total_read == sizeof(struct smb_hdr) + 1) && |
436 | (smb->WordCount == 0)) { | 439 | (smb->WordCount == 0)) { |
437 | char *tmp = (char *)smb; | 440 | char *tmp = (char *)smb; |
438 | /* Need to work around a bug in two servers here */ | 441 | /* Need to work around a bug in two servers here */ |
@@ -452,39 +455,35 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) | |||
452 | } else { | 455 | } else { |
453 | cERROR(1, "Length less than smb header size"); | 456 | cERROR(1, "Length less than smb header size"); |
454 | } | 457 | } |
455 | return 1; | 458 | return -EIO; |
456 | } | ||
457 | if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { | ||
458 | cERROR(1, "smb length greater than MaxBufSize, mid=%d", | ||
459 | smb->Mid); | ||
460 | return 1; | ||
461 | } | 459 | } |
462 | 460 | ||
461 | /* otherwise, there is enough to get to the BCC */ | ||
463 | if (check_smb_hdr(smb, mid)) | 462 | if (check_smb_hdr(smb, mid)) |
464 | return 1; | 463 | return -EIO; |
465 | clc_len = smbCalcSize(smb); | 464 | clc_len = smbCalcSize(smb); |
466 | 465 | ||
467 | if (4 + len != length) { | 466 | if (4 + rfclen != total_read) { |
468 | cERROR(1, "Length read does not match RFC1001 length %d", | 467 | cERROR(1, "Length read does not match RFC1001 length %d", |
469 | len); | 468 | rfclen); |
470 | return 1; | 469 | return -EIO; |
471 | } | 470 | } |
472 | 471 | ||
473 | if (4 + len != clc_len) { | 472 | if (4 + rfclen != clc_len) { |
474 | /* check if bcc wrapped around for large read responses */ | 473 | /* check if bcc wrapped around for large read responses */ |
475 | if ((len > 64 * 1024) && (len > clc_len)) { | 474 | if ((rfclen > 64 * 1024) && (rfclen > clc_len)) { |
476 | /* check if lengths match mod 64K */ | 475 | /* check if lengths match mod 64K */ |
477 | if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) | 476 | if (((4 + rfclen) & 0xFFFF) == (clc_len & 0xFFFF)) |
478 | return 0; /* bcc wrapped */ | 477 | return 0; /* bcc wrapped */ |
479 | } | 478 | } |
480 | cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u", | 479 | cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u", |
481 | clc_len, 4 + len, smb->Mid); | 480 | clc_len, 4 + rfclen, smb->Mid); |
482 | 481 | ||
483 | if (4 + len < clc_len) { | 482 | if (4 + rfclen < clc_len) { |
484 | cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u", | 483 | cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u", |
485 | len, smb->Mid); | 484 | rfclen, smb->Mid); |
486 | return 1; | 485 | return -EIO; |
487 | } else if (len > clc_len + 512) { | 486 | } else if (rfclen > clc_len + 512) { |
488 | /* | 487 | /* |
489 | * Some servers (Windows XP in particular) send more | 488 | * Some servers (Windows XP in particular) send more |
490 | * data than the lengths in the SMB packet would | 489 | * data than the lengths in the SMB packet would |
@@ -495,8 +494,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) | |||
495 | * data to 512 bytes. | 494 | * data to 512 bytes. |
496 | */ | 495 | */ |
497 | cERROR(1, "RFC1001 size %u more than 512 bytes larger " | 496 | cERROR(1, "RFC1001 size %u more than 512 bytes larger " |
498 | "than SMB for mid=%u", len, smb->Mid); | 497 | "than SMB for mid=%u", rfclen, smb->Mid); |
499 | return 1; | 498 | return -EIO; |
500 | } | 499 | } |
501 | } | 500 | } |
502 | return 0; | 501 | return 0; |
@@ -676,3 +675,18 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) | |||
676 | cinode->clientCanCacheRead = false; | 675 | cinode->clientCanCacheRead = false; |
677 | } | 676 | } |
678 | } | 677 | } |
678 | |||
679 | bool | ||
680 | backup_cred(struct cifs_sb_info *cifs_sb) | ||
681 | { | ||
682 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) { | ||
683 | if (cifs_sb->mnt_backupuid == current_fsuid()) | ||
684 | return true; | ||
685 | } | ||
686 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) { | ||
687 | if (in_group_p(cifs_sb->mnt_backupgid)) | ||
688 | return true; | ||
689 | } | ||
690 | |||
691 | return false; | ||
692 | } | ||
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d3e619692ee0..c7d80e24f24e 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -124,7 +124,9 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) | |||
124 | /* that we use in next few lines */ | 124 | /* that we use in next few lines */ |
125 | /* Note that header is initialized to zero in header_assemble */ | 125 | /* Note that header is initialized to zero in header_assemble */ |
126 | pSMB->req.AndXCommand = 0xFF; | 126 | pSMB->req.AndXCommand = 0xFF; |
127 | pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); | 127 | pSMB->req.MaxBufferSize = cpu_to_le16(min_t(u32, |
128 | CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4, | ||
129 | USHRT_MAX)); | ||
128 | pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); | 130 | pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); |
129 | pSMB->req.VcNumber = get_next_vcnum(ses); | 131 | pSMB->req.VcNumber = get_next_vcnum(ses); |
130 | 132 | ||
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 42b9fff48751..ac1221d969d6 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c | |||
@@ -265,91 +265,6 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16) | |||
265 | return rc; | 265 | return rc; |
266 | } | 266 | } |
267 | 267 | ||
268 | #if 0 /* currently unused */ | ||
269 | /* Does both the NT and LM owfs of a user's password */ | ||
270 | static void | ||
271 | nt_lm_owf_gen(char *pwd, unsigned char nt_p16[16], unsigned char p16[16]) | ||
272 | { | ||
273 | char passwd[514]; | ||
274 | |||
275 | memset(passwd, '\0', 514); | ||
276 | if (strlen(pwd) < 513) | ||
277 | strcpy(passwd, pwd); | ||
278 | else | ||
279 | memcpy(passwd, pwd, 512); | ||
280 | /* Calculate the MD4 hash (NT compatible) of the password */ | ||
281 | memset(nt_p16, '\0', 16); | ||
282 | E_md4hash(passwd, nt_p16); | ||
283 | |||
284 | /* Mangle the passwords into Lanman format */ | ||
285 | passwd[14] = '\0'; | ||
286 | /* strupper(passwd); */ | ||
287 | |||
288 | /* Calculate the SMB (lanman) hash functions of the password */ | ||
289 | |||
290 | memset(p16, '\0', 16); | ||
291 | E_P16((unsigned char *) passwd, (unsigned char *) p16); | ||
292 | |||
293 | /* clear out local copy of user's password (just being paranoid). */ | ||
294 | memset(passwd, '\0', sizeof(passwd)); | ||
295 | } | ||
296 | #endif | ||
297 | |||
298 | /* Does the NTLMv2 owfs of a user's password */ | ||
299 | #if 0 /* function not needed yet - but will be soon */ | ||
300 | static void | ||
301 | ntv2_owf_gen(const unsigned char owf[16], const char *user_n, | ||
302 | const char *domain_n, unsigned char kr_buf[16], | ||
303 | const struct nls_table *nls_codepage) | ||
304 | { | ||
305 | wchar_t *user_u; | ||
306 | wchar_t *dom_u; | ||
307 | int user_l, domain_l; | ||
308 | struct HMACMD5Context ctx; | ||
309 | |||
310 | /* might as well do one alloc to hold both (user_u and dom_u) */ | ||
311 | user_u = kmalloc(2048 * sizeof(wchar_t), GFP_KERNEL); | ||
312 | if (user_u == NULL) | ||
313 | return; | ||
314 | dom_u = user_u + 1024; | ||
315 | |||
316 | /* push_ucs2(NULL, user_u, user_n, (user_l+1)*2, | ||
317 | STR_UNICODE|STR_NOALIGN|STR_TERMINATE|STR_UPPER); | ||
318 | push_ucs2(NULL, dom_u, domain_n, (domain_l+1)*2, | ||
319 | STR_UNICODE|STR_NOALIGN|STR_TERMINATE|STR_UPPER); */ | ||
320 | |||
321 | /* BB user and domain may need to be uppercased */ | ||
322 | user_l = cifs_strtoUCS(user_u, user_n, 511, nls_codepage); | ||
323 | domain_l = cifs_strtoUCS(dom_u, domain_n, 511, nls_codepage); | ||
324 | |||
325 | user_l++; /* trailing null */ | ||
326 | domain_l++; | ||
327 | |||
328 | hmac_md5_init_limK_to_64(owf, 16, &ctx); | ||
329 | hmac_md5_update((const unsigned char *) user_u, user_l * 2, &ctx); | ||
330 | hmac_md5_update((const unsigned char *) dom_u, domain_l * 2, &ctx); | ||
331 | hmac_md5_final(kr_buf, &ctx); | ||
332 | |||
333 | kfree(user_u); | ||
334 | } | ||
335 | #endif | ||
336 | |||
337 | /* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */ | ||
338 | #if 0 /* currently unused */ | ||
339 | static void | ||
340 | NTLMSSPOWFencrypt(unsigned char passwd[8], | ||
341 | unsigned char *ntlmchalresp, unsigned char p24[24]) | ||
342 | { | ||
343 | unsigned char p21[21]; | ||
344 | |||
345 | memset(p21, '\0', 21); | ||
346 | memcpy(p21, passwd, 8); | ||
347 | memset(p21 + 8, 0xbd, 8); | ||
348 | |||
349 | E_P24(p21, ntlmchalresp, p24); | ||
350 | } | ||
351 | #endif | ||
352 | |||
353 | /* Does the NT MD4 hash then des encryption. */ | 268 | /* Does the NT MD4 hash then des encryption. */ |
354 | int | 269 | int |
355 | SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) | 270 | SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) |
@@ -369,39 +284,3 @@ SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) | |||
369 | rc = E_P24(p21, c8, p24); | 284 | rc = E_P24(p21, c8, p24); |
370 | return rc; | 285 | return rc; |
371 | } | 286 | } |
372 | |||
373 | |||
374 | /* Does the md5 encryption from the NT hash for NTLMv2. */ | ||
375 | /* These routines will be needed later */ | ||
376 | #if 0 | ||
377 | static void | ||
378 | SMBOWFencrypt_ntv2(const unsigned char kr[16], | ||
379 | const struct data_blob *srv_chal, | ||
380 | const struct data_blob *cli_chal, unsigned char resp_buf[16]) | ||
381 | { | ||
382 | struct HMACMD5Context ctx; | ||
383 | |||
384 | hmac_md5_init_limK_to_64(kr, 16, &ctx); | ||
385 | hmac_md5_update(srv_chal->data, srv_chal->length, &ctx); | ||
386 | hmac_md5_update(cli_chal->data, cli_chal->length, &ctx); | ||
387 | hmac_md5_final(resp_buf, &ctx); | ||
388 | } | ||
389 | |||
390 | static void | ||
391 | SMBsesskeygen_ntv2(const unsigned char kr[16], | ||
392 | const unsigned char *nt_resp, __u8 sess_key[16]) | ||
393 | { | ||
394 | struct HMACMD5Context ctx; | ||
395 | |||
396 | hmac_md5_init_limK_to_64(kr, 16, &ctx); | ||
397 | hmac_md5_update(nt_resp, 16, &ctx); | ||
398 | hmac_md5_final((unsigned char *) sess_key, &ctx); | ||
399 | } | ||
400 | |||
401 | static void | ||
402 | SMBsesskeygen_ntv1(const unsigned char kr[16], | ||
403 | const unsigned char *nt_resp, __u8 sess_key[16]) | ||
404 | { | ||
405 | mdfour((unsigned char *) sess_key, (unsigned char *) kr, 16); | ||
406 | } | ||
407 | #endif | ||
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index c1b9c4b10739..0cc9584f5889 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/wait.h> | 26 | #include <linux/wait.h> |
27 | #include <linux/net.h> | 27 | #include <linux/net.h> |
28 | #include <linux/delay.h> | 28 | #include <linux/delay.h> |
29 | #include <linux/freezer.h> | ||
29 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
30 | #include <asm/processor.h> | 31 | #include <asm/processor.h> |
31 | #include <linux/mempool.h> | 32 | #include <linux/mempool.h> |
@@ -266,15 +267,11 @@ static int wait_for_free_request(struct TCP_Server_Info *server, | |||
266 | while (1) { | 267 | while (1) { |
267 | if (atomic_read(&server->inFlight) >= cifs_max_pending) { | 268 | if (atomic_read(&server->inFlight) >= cifs_max_pending) { |
268 | spin_unlock(&GlobalMid_Lock); | 269 | spin_unlock(&GlobalMid_Lock); |
269 | #ifdef CONFIG_CIFS_STATS2 | 270 | cifs_num_waiters_inc(server); |
270 | atomic_inc(&server->num_waiters); | ||
271 | #endif | ||
272 | wait_event(server->request_q, | 271 | wait_event(server->request_q, |
273 | atomic_read(&server->inFlight) | 272 | atomic_read(&server->inFlight) |
274 | < cifs_max_pending); | 273 | < cifs_max_pending); |
275 | #ifdef CONFIG_CIFS_STATS2 | 274 | cifs_num_waiters_dec(server); |
276 | atomic_dec(&server->num_waiters); | ||
277 | #endif | ||
278 | spin_lock(&GlobalMid_Lock); | 275 | spin_lock(&GlobalMid_Lock); |
279 | } else { | 276 | } else { |
280 | if (server->tcpStatus == CifsExiting) { | 277 | if (server->tcpStatus == CifsExiting) { |
@@ -328,7 +325,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) | |||
328 | { | 325 | { |
329 | int error; | 326 | int error; |
330 | 327 | ||
331 | error = wait_event_killable(server->response_q, | 328 | error = wait_event_freezekillable(server->response_q, |
332 | midQ->midState != MID_REQUEST_SUBMITTED); | 329 | midQ->midState != MID_REQUEST_SUBMITTED); |
333 | if (error < 0) | 330 | if (error < 0) |
334 | return -ERESTARTSYS; | 331 | return -ERESTARTSYS; |
@@ -343,8 +340,8 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) | |||
343 | */ | 340 | */ |
344 | int | 341 | int |
345 | cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | 342 | cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, |
346 | unsigned int nvec, mid_callback_t *callback, void *cbdata, | 343 | unsigned int nvec, mid_receive_t *receive, |
347 | bool ignore_pend) | 344 | mid_callback_t *callback, void *cbdata, bool ignore_pend) |
348 | { | 345 | { |
349 | int rc; | 346 | int rc; |
350 | struct mid_q_entry *mid; | 347 | struct mid_q_entry *mid; |
@@ -378,18 +375,17 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | |||
378 | goto out_err; | 375 | goto out_err; |
379 | } | 376 | } |
380 | 377 | ||
378 | mid->receive = receive; | ||
381 | mid->callback = callback; | 379 | mid->callback = callback; |
382 | mid->callback_data = cbdata; | 380 | mid->callback_data = cbdata; |
383 | mid->midState = MID_REQUEST_SUBMITTED; | 381 | mid->midState = MID_REQUEST_SUBMITTED; |
384 | #ifdef CONFIG_CIFS_STATS2 | 382 | |
385 | atomic_inc(&server->inSend); | 383 | cifs_in_send_inc(server); |
386 | #endif | ||
387 | rc = smb_sendv(server, iov, nvec); | 384 | rc = smb_sendv(server, iov, nvec); |
388 | #ifdef CONFIG_CIFS_STATS2 | 385 | cifs_in_send_dec(server); |
389 | atomic_dec(&server->inSend); | 386 | cifs_save_when_sent(mid); |
390 | mid->when_sent = jiffies; | ||
391 | #endif | ||
392 | mutex_unlock(&server->srv_mutex); | 387 | mutex_unlock(&server->srv_mutex); |
388 | |||
393 | if (rc) | 389 | if (rc) |
394 | goto out_err; | 390 | goto out_err; |
395 | 391 | ||
@@ -502,13 +498,18 @@ int | |||
502 | cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, | 498 | cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, |
503 | bool log_error) | 499 | bool log_error) |
504 | { | 500 | { |
505 | dump_smb(mid->resp_buf, | 501 | unsigned int len = be32_to_cpu(mid->resp_buf->smb_buf_length) + 4; |
506 | min_t(u32, 92, be32_to_cpu(mid->resp_buf->smb_buf_length))); | 502 | |
503 | dump_smb(mid->resp_buf, min_t(u32, 92, len)); | ||
507 | 504 | ||
508 | /* convert the length into a more usable form */ | 505 | /* convert the length into a more usable form */ |
509 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | 506 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { |
507 | struct kvec iov; | ||
508 | |||
509 | iov.iov_base = mid->resp_buf; | ||
510 | iov.iov_len = len; | ||
510 | /* FIXME: add code to kill session */ | 511 | /* FIXME: add code to kill session */ |
511 | if (cifs_verify_signature(mid->resp_buf, server, | 512 | if (cifs_verify_signature(&iov, 1, server, |
512 | mid->sequence_number + 1) != 0) | 513 | mid->sequence_number + 1) != 0) |
513 | cERROR(1, "Unexpected SMB signature"); | 514 | cERROR(1, "Unexpected SMB signature"); |
514 | } | 515 | } |
@@ -575,14 +576,10 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
575 | } | 576 | } |
576 | 577 | ||
577 | midQ->midState = MID_REQUEST_SUBMITTED; | 578 | midQ->midState = MID_REQUEST_SUBMITTED; |
578 | #ifdef CONFIG_CIFS_STATS2 | 579 | cifs_in_send_inc(ses->server); |
579 | atomic_inc(&ses->server->inSend); | ||
580 | #endif | ||
581 | rc = smb_sendv(ses->server, iov, n_vec); | 580 | rc = smb_sendv(ses->server, iov, n_vec); |
582 | #ifdef CONFIG_CIFS_STATS2 | 581 | cifs_in_send_dec(ses->server); |
583 | atomic_dec(&ses->server->inSend); | 582 | cifs_save_when_sent(midQ); |
584 | midQ->when_sent = jiffies; | ||
585 | #endif | ||
586 | 583 | ||
587 | mutex_unlock(&ses->server->srv_mutex); | 584 | mutex_unlock(&ses->server->srv_mutex); |
588 | 585 | ||
@@ -703,14 +700,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, | |||
703 | } | 700 | } |
704 | 701 | ||
705 | midQ->midState = MID_REQUEST_SUBMITTED; | 702 | midQ->midState = MID_REQUEST_SUBMITTED; |
706 | #ifdef CONFIG_CIFS_STATS2 | 703 | |
707 | atomic_inc(&ses->server->inSend); | 704 | cifs_in_send_inc(ses->server); |
708 | #endif | ||
709 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); | 705 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); |
710 | #ifdef CONFIG_CIFS_STATS2 | 706 | cifs_in_send_dec(ses->server); |
711 | atomic_dec(&ses->server->inSend); | 707 | cifs_save_when_sent(midQ); |
712 | midQ->when_sent = jiffies; | ||
713 | #endif | ||
714 | mutex_unlock(&ses->server->srv_mutex); | 708 | mutex_unlock(&ses->server->srv_mutex); |
715 | 709 | ||
716 | if (rc < 0) | 710 | if (rc < 0) |
@@ -843,14 +837,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, | |||
843 | } | 837 | } |
844 | 838 | ||
845 | midQ->midState = MID_REQUEST_SUBMITTED; | 839 | midQ->midState = MID_REQUEST_SUBMITTED; |
846 | #ifdef CONFIG_CIFS_STATS2 | 840 | cifs_in_send_inc(ses->server); |
847 | atomic_inc(&ses->server->inSend); | ||
848 | #endif | ||
849 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); | 841 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); |
850 | #ifdef CONFIG_CIFS_STATS2 | 842 | cifs_in_send_dec(ses->server); |
851 | atomic_dec(&ses->server->inSend); | 843 | cifs_save_when_sent(midQ); |
852 | midQ->when_sent = jiffies; | ||
853 | #endif | ||
854 | mutex_unlock(&ses->server->srv_mutex); | 844 | mutex_unlock(&ses->server->srv_mutex); |
855 | 845 | ||
856 | if (rc < 0) { | 846 | if (rc < 0) { |
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 2a22fb2989e4..45f07c46f3ed 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/posix_acl_xattr.h> | 23 | #include <linux/posix_acl_xattr.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/xattr.h> | ||
25 | #include "cifsfs.h" | 26 | #include "cifsfs.h" |
26 | #include "cifspdu.h" | 27 | #include "cifspdu.h" |
27 | #include "cifsglob.h" | 28 | #include "cifsglob.h" |
@@ -31,16 +32,8 @@ | |||
31 | #define MAX_EA_VALUE_SIZE 65535 | 32 | #define MAX_EA_VALUE_SIZE 65535 |
32 | #define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib" | 33 | #define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib" |
33 | #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" | 34 | #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" |
34 | #define CIFS_XATTR_USER_PREFIX "user." | ||
35 | #define CIFS_XATTR_SYSTEM_PREFIX "system." | ||
36 | #define CIFS_XATTR_OS2_PREFIX "os2." | ||
37 | #define CIFS_XATTR_SECURITY_PREFIX "security." | ||
38 | #define CIFS_XATTR_TRUSTED_PREFIX "trusted." | ||
39 | #define XATTR_TRUSTED_PREFIX_LEN 8 | ||
40 | #define XATTR_SECURITY_PREFIX_LEN 9 | ||
41 | /* BB need to add server (Samba e.g) support for security and trusted prefix */ | ||
42 | |||
43 | 35 | ||
36 | /* BB need to add server (Samba e.g) support for security and trusted prefix */ | ||
44 | 37 | ||
45 | int cifs_removexattr(struct dentry *direntry, const char *ea_name) | 38 | int cifs_removexattr(struct dentry *direntry, const char *ea_name) |
46 | { | 39 | { |
@@ -76,8 +69,8 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) | |||
76 | } | 69 | } |
77 | if (ea_name == NULL) { | 70 | if (ea_name == NULL) { |
78 | cFYI(1, "Null xattr names not supported"); | 71 | cFYI(1, "Null xattr names not supported"); |
79 | } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) | 72 | } else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) |
80 | && (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4))) { | 73 | && (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))) { |
81 | cFYI(1, | 74 | cFYI(1, |
82 | "illegal xattr request %s (only user namespace supported)", | 75 | "illegal xattr request %s (only user namespace supported)", |
83 | ea_name); | 76 | ea_name); |
@@ -88,7 +81,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) | |||
88 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) | 81 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) |
89 | goto remove_ea_exit; | 82 | goto remove_ea_exit; |
90 | 83 | ||
91 | ea_name += 5; /* skip past user. prefix */ | 84 | ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ |
92 | rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL, | 85 | rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL, |
93 | (__u16)0, cifs_sb->local_nls, | 86 | (__u16)0, cifs_sb->local_nls, |
94 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 87 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
@@ -149,21 +142,23 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, | |||
149 | 142 | ||
150 | if (ea_name == NULL) { | 143 | if (ea_name == NULL) { |
151 | cFYI(1, "Null xattr names not supported"); | 144 | cFYI(1, "Null xattr names not supported"); |
152 | } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { | 145 | } else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) |
146 | == 0) { | ||
153 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) | 147 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) |
154 | goto set_ea_exit; | 148 | goto set_ea_exit; |
155 | if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) | 149 | if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) |
156 | cFYI(1, "attempt to set cifs inode metadata"); | 150 | cFYI(1, "attempt to set cifs inode metadata"); |
157 | 151 | ||
158 | ea_name += 5; /* skip past user. prefix */ | 152 | ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ |
159 | rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, | 153 | rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, |
160 | (__u16)value_size, cifs_sb->local_nls, | 154 | (__u16)value_size, cifs_sb->local_nls, |
161 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 155 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
162 | } else if (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4) == 0) { | 156 | } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) |
157 | == 0) { | ||
163 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) | 158 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) |
164 | goto set_ea_exit; | 159 | goto set_ea_exit; |
165 | 160 | ||
166 | ea_name += 4; /* skip past os2. prefix */ | 161 | ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */ |
167 | rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, | 162 | rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, |
168 | (__u16)value_size, cifs_sb->local_nls, | 163 | (__u16)value_size, cifs_sb->local_nls, |
169 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 164 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
@@ -178,7 +173,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, | |||
178 | #ifdef CONFIG_CIFS_ACL | 173 | #ifdef CONFIG_CIFS_ACL |
179 | memcpy(pacl, ea_value, value_size); | 174 | memcpy(pacl, ea_value, value_size); |
180 | rc = set_cifs_acl(pacl, value_size, | 175 | rc = set_cifs_acl(pacl, value_size, |
181 | direntry->d_inode, full_path); | 176 | direntry->d_inode, full_path, CIFS_ACL_DACL); |
182 | if (rc == 0) /* force revalidate of the inode */ | 177 | if (rc == 0) /* force revalidate of the inode */ |
183 | CIFS_I(direntry->d_inode)->time = 0; | 178 | CIFS_I(direntry->d_inode)->time = 0; |
184 | kfree(pacl); | 179 | kfree(pacl); |
@@ -269,7 +264,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, | |||
269 | /* return alt name if available as pseudo attr */ | 264 | /* return alt name if available as pseudo attr */ |
270 | if (ea_name == NULL) { | 265 | if (ea_name == NULL) { |
271 | cFYI(1, "Null xattr names not supported"); | 266 | cFYI(1, "Null xattr names not supported"); |
272 | } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { | 267 | } else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) |
268 | == 0) { | ||
273 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) | 269 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) |
274 | goto get_ea_exit; | 270 | goto get_ea_exit; |
275 | 271 | ||
@@ -277,15 +273,15 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, | |||
277 | cFYI(1, "attempt to query cifs inode metadata"); | 273 | cFYI(1, "attempt to query cifs inode metadata"); |
278 | /* revalidate/getattr then populate from inode */ | 274 | /* revalidate/getattr then populate from inode */ |
279 | } /* BB add else when above is implemented */ | 275 | } /* BB add else when above is implemented */ |
280 | ea_name += 5; /* skip past user. prefix */ | 276 | ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ |
281 | rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, | 277 | rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, |
282 | buf_size, cifs_sb->local_nls, | 278 | buf_size, cifs_sb->local_nls, |
283 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 279 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
284 | } else if (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4) == 0) { | 280 | } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { |
285 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) | 281 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) |
286 | goto get_ea_exit; | 282 | goto get_ea_exit; |
287 | 283 | ||
288 | ea_name += 4; /* skip past os2. prefix */ | 284 | ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */ |
289 | rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, | 285 | rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, |
290 | buf_size, cifs_sb->local_nls, | 286 | buf_size, cifs_sb->local_nls, |
291 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 287 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
@@ -339,10 +335,10 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, | |||
339 | cFYI(1, "Query CIFS ACL not supported yet"); | 335 | cFYI(1, "Query CIFS ACL not supported yet"); |
340 | #endif /* CONFIG_CIFS_ACL */ | 336 | #endif /* CONFIG_CIFS_ACL */ |
341 | } else if (strncmp(ea_name, | 337 | } else if (strncmp(ea_name, |
342 | CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { | 338 | XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { |
343 | cFYI(1, "Trusted xattr namespace not supported yet"); | 339 | cFYI(1, "Trusted xattr namespace not supported yet"); |
344 | } else if (strncmp(ea_name, | 340 | } else if (strncmp(ea_name, |
345 | CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) { | 341 | XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) { |
346 | cFYI(1, "Security xattr namespace not supported yet"); | 342 | cFYI(1, "Security xattr namespace not supported yet"); |
347 | } else | 343 | } else |
348 | cFYI(1, | 344 | cFYI(1, |
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index 44e17e9c21ae..cc0ea9fe5ecf 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h | |||
@@ -59,12 +59,11 @@ void coda_sysctl_clean(void); | |||
59 | 59 | ||
60 | #define CODA_ALLOC(ptr, cast, size) do { \ | 60 | #define CODA_ALLOC(ptr, cast, size) do { \ |
61 | if (size < PAGE_SIZE) \ | 61 | if (size < PAGE_SIZE) \ |
62 | ptr = kmalloc((unsigned long) size, GFP_KERNEL); \ | 62 | ptr = kzalloc((unsigned long) size, GFP_KERNEL); \ |
63 | else \ | 63 | else \ |
64 | ptr = (cast)vmalloc((unsigned long) size); \ | 64 | ptr = (cast)vzalloc((unsigned long) size); \ |
65 | if (!ptr) \ | 65 | if (!ptr) \ |
66 | printk("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \ | 66 | printk("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \ |
67 | else memset( ptr, 0, size ); \ | ||
68 | } while (0) | 67 | } while (0) |
69 | 68 | ||
70 | 69 | ||
diff --git a/fs/compat.c b/fs/compat.c index 0b48d018e38a..302e761bd0aa 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include <linux/dirent.h> | 37 | #include <linux/dirent.h> |
38 | #include <linux/fsnotify.h> | 38 | #include <linux/fsnotify.h> |
39 | #include <linux/highuid.h> | 39 | #include <linux/highuid.h> |
40 | #include <linux/nfsd/syscall.h> | ||
41 | #include <linux/personality.h> | 40 | #include <linux/personality.h> |
42 | #include <linux/rwsem.h> | 41 | #include <linux/rwsem.h> |
43 | #include <linux/tsacct_kern.h> | 42 | #include <linux/tsacct_kern.h> |
@@ -247,11 +246,8 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * | |||
247 | __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || | 246 | __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || |
248 | __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || | 247 | __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || |
249 | __put_user(kbuf->f_frsize, &ubuf->f_frsize) || | 248 | __put_user(kbuf->f_frsize, &ubuf->f_frsize) || |
250 | __put_user(0, &ubuf->f_spare[0]) || | 249 | __put_user(kbuf->f_flags, &ubuf->f_flags) || |
251 | __put_user(0, &ubuf->f_spare[1]) || | 250 | __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) |
252 | __put_user(0, &ubuf->f_spare[2]) || | ||
253 | __put_user(0, &ubuf->f_spare[3]) || | ||
254 | __put_user(0, &ubuf->f_spare[4])) | ||
255 | return -EFAULT; | 251 | return -EFAULT; |
256 | return 0; | 252 | return 0; |
257 | } | 253 | } |
@@ -1675,11 +1671,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, | |||
1675 | } | 1671 | } |
1676 | #endif /* HAVE_SET_RESTORE_SIGMASK */ | 1672 | #endif /* HAVE_SET_RESTORE_SIGMASK */ |
1677 | 1673 | ||
1678 | long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) | ||
1679 | { | ||
1680 | return sys_ni_syscall(); | ||
1681 | } | ||
1682 | |||
1683 | #ifdef CONFIG_EPOLL | 1674 | #ifdef CONFIG_EPOLL |
1684 | 1675 | ||
1685 | #ifdef HAVE_SET_RESTORE_SIGMASK | 1676 | #ifdef HAVE_SET_RESTORE_SIGMASK |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 8be086e9abe4..51352de88ef1 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -1003,6 +1003,7 @@ COMPATIBLE_IOCTL(PPPIOCCONNECT) | |||
1003 | COMPATIBLE_IOCTL(PPPIOCDISCONN) | 1003 | COMPATIBLE_IOCTL(PPPIOCDISCONN) |
1004 | COMPATIBLE_IOCTL(PPPIOCATTCHAN) | 1004 | COMPATIBLE_IOCTL(PPPIOCATTCHAN) |
1005 | COMPATIBLE_IOCTL(PPPIOCGCHAN) | 1005 | COMPATIBLE_IOCTL(PPPIOCGCHAN) |
1006 | COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS) | ||
1006 | /* PPPOX */ | 1007 | /* PPPOX */ |
1007 | COMPATIBLE_IOCTL(PPPOEIOCSFWD) | 1008 | COMPATIBLE_IOCTL(PPPOEIOCSFWD) |
1008 | COMPATIBLE_IOCTL(PPPOEIOCDFWD) | 1009 | COMPATIBLE_IOCTL(PPPOEIOCDFWD) |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index c83f4768eeaa..ca418aaf6352 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -23,7 +23,8 @@ | |||
23 | * | 23 | * |
24 | * configfs Copyright (C) 2005 Oracle. All rights reserved. | 24 | * configfs Copyright (C) 2005 Oracle. All rights reserved. |
25 | * | 25 | * |
26 | * Please see Documentation/filesystems/configfs.txt for more information. | 26 | * Please see Documentation/filesystems/configfs/configfs.txt for more |
27 | * information. | ||
27 | */ | 28 | */ |
28 | 29 | ||
29 | #undef DEBUG | 30 | #undef DEBUG |
diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 76dc4c3e5d51..50cee7f9110b 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c | |||
@@ -23,7 +23,7 @@ | |||
23 | * | 23 | * |
24 | * configfs Copyright (C) 2005 Oracle. All rights reserved. | 24 | * configfs Copyright (C) 2005 Oracle. All rights reserved. |
25 | * | 25 | * |
26 | * Please see the file Documentation/filesystems/configfs.txt for | 26 | * Please see the file Documentation/filesystems/configfs/configfs.txt for |
27 | * critical information about using the config_item interface. | 27 | * critical information about using the config_item interface. |
28 | */ | 28 | */ |
29 | 29 | ||
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e7a7a2f07324..f3a257d7a985 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * file.c - part of debugfs, a tiny little debug file system | 2 | * inode.c - part of debugfs, a tiny little debug file system |
3 | * | 3 | * |
4 | * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> | 4 | * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> |
5 | * Copyright (C) 2004 IBM Inc. | 5 | * Copyright (C) 2004 IBM Inc. |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 44a360ca8046..d740ab67ff6e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -39,7 +39,7 @@ | |||
39 | 39 | ||
40 | /* | 40 | /* |
41 | * How many user pages to map in one call to get_user_pages(). This determines | 41 | * How many user pages to map in one call to get_user_pages(). This determines |
42 | * the size of a structure on the stack. | 42 | * the size of a structure in the slab cache |
43 | */ | 43 | */ |
44 | #define DIO_PAGES 64 | 44 | #define DIO_PAGES 64 |
45 | 45 | ||
@@ -55,13 +55,10 @@ | |||
55 | * blocksize. | 55 | * blocksize. |
56 | */ | 56 | */ |
57 | 57 | ||
58 | struct dio { | 58 | /* dio_state only used in the submission path */ |
59 | /* BIO submission state */ | 59 | |
60 | struct dio_submit { | ||
60 | struct bio *bio; /* bio under assembly */ | 61 | struct bio *bio; /* bio under assembly */ |
61 | struct inode *inode; | ||
62 | int rw; | ||
63 | loff_t i_size; /* i_size when submitted */ | ||
64 | int flags; /* doesn't change */ | ||
65 | unsigned blkbits; /* doesn't change */ | 62 | unsigned blkbits; /* doesn't change */ |
66 | unsigned blkfactor; /* When we're using an alignment which | 63 | unsigned blkfactor; /* When we're using an alignment which |
67 | is finer than the filesystem's soft | 64 | is finer than the filesystem's soft |
@@ -76,18 +73,17 @@ struct dio { | |||
76 | sector_t block_in_file; /* Current offset into the underlying | 73 | sector_t block_in_file; /* Current offset into the underlying |
77 | file in dio_block units. */ | 74 | file in dio_block units. */ |
78 | unsigned blocks_available; /* At block_in_file. changes */ | 75 | unsigned blocks_available; /* At block_in_file. changes */ |
76 | int reap_counter; /* rate limit reaping */ | ||
79 | sector_t final_block_in_request;/* doesn't change */ | 77 | sector_t final_block_in_request;/* doesn't change */ |
80 | unsigned first_block_in_page; /* doesn't change, Used only once */ | 78 | unsigned first_block_in_page; /* doesn't change, Used only once */ |
81 | int boundary; /* prev block is at a boundary */ | 79 | int boundary; /* prev block is at a boundary */ |
82 | int reap_counter; /* rate limit reaping */ | ||
83 | get_block_t *get_block; /* block mapping function */ | 80 | get_block_t *get_block; /* block mapping function */ |
84 | dio_iodone_t *end_io; /* IO completion function */ | ||
85 | dio_submit_t *submit_io; /* IO submition function */ | 81 | dio_submit_t *submit_io; /* IO submition function */ |
82 | |||
86 | loff_t logical_offset_in_bio; /* current first logical block in bio */ | 83 | loff_t logical_offset_in_bio; /* current first logical block in bio */ |
87 | sector_t final_block_in_bio; /* current final block in bio + 1 */ | 84 | sector_t final_block_in_bio; /* current final block in bio + 1 */ |
88 | sector_t next_block_for_io; /* next block to be put under IO, | 85 | sector_t next_block_for_io; /* next block to be put under IO, |
89 | in dio_blocks units */ | 86 | in dio_blocks units */ |
90 | struct buffer_head map_bh; /* last get_block() result */ | ||
91 | 87 | ||
92 | /* | 88 | /* |
93 | * Deferred addition of a page to the dio. These variables are | 89 | * Deferred addition of a page to the dio. These variables are |
@@ -100,18 +96,6 @@ struct dio { | |||
100 | sector_t cur_page_block; /* Where it starts */ | 96 | sector_t cur_page_block; /* Where it starts */ |
101 | loff_t cur_page_fs_offset; /* Offset in file */ | 97 | loff_t cur_page_fs_offset; /* Offset in file */ |
102 | 98 | ||
103 | /* BIO completion state */ | ||
104 | spinlock_t bio_lock; /* protects BIO fields below */ | ||
105 | unsigned long refcount; /* direct_io_worker() and bios */ | ||
106 | struct bio *bio_list; /* singly linked via bi_private */ | ||
107 | struct task_struct *waiter; /* waiting task (NULL if none) */ | ||
108 | |||
109 | /* AIO related stuff */ | ||
110 | struct kiocb *iocb; /* kiocb */ | ||
111 | int is_async; /* is IO async ? */ | ||
112 | int io_error; /* IO error in completion path */ | ||
113 | ssize_t result; /* IO result */ | ||
114 | |||
115 | /* | 99 | /* |
116 | * Page fetching state. These variables belong to dio_refill_pages(). | 100 | * Page fetching state. These variables belong to dio_refill_pages(). |
117 | */ | 101 | */ |
@@ -125,7 +109,30 @@ struct dio { | |||
125 | */ | 109 | */ |
126 | unsigned head; /* next page to process */ | 110 | unsigned head; /* next page to process */ |
127 | unsigned tail; /* last valid page + 1 */ | 111 | unsigned tail; /* last valid page + 1 */ |
112 | }; | ||
113 | |||
114 | /* dio_state communicated between submission path and end_io */ | ||
115 | struct dio { | ||
116 | int flags; /* doesn't change */ | ||
117 | int rw; | ||
118 | struct inode *inode; | ||
119 | loff_t i_size; /* i_size when submitted */ | ||
120 | dio_iodone_t *end_io; /* IO completion function */ | ||
121 | |||
122 | void *private; /* copy from map_bh.b_private */ | ||
123 | |||
124 | /* BIO completion state */ | ||
125 | spinlock_t bio_lock; /* protects BIO fields below */ | ||
128 | int page_errors; /* errno from get_user_pages() */ | 126 | int page_errors; /* errno from get_user_pages() */ |
127 | int is_async; /* is IO async ? */ | ||
128 | int io_error; /* IO error in completion path */ | ||
129 | unsigned long refcount; /* direct_io_worker() and bios */ | ||
130 | struct bio *bio_list; /* singly linked via bi_private */ | ||
131 | struct task_struct *waiter; /* waiting task (NULL if none) */ | ||
132 | |||
133 | /* AIO related stuff */ | ||
134 | struct kiocb *iocb; /* kiocb */ | ||
135 | ssize_t result; /* IO result */ | ||
129 | 136 | ||
130 | /* | 137 | /* |
131 | * pages[] (and any fields placed after it) are not zeroed out at | 138 | * pages[] (and any fields placed after it) are not zeroed out at |
@@ -133,7 +140,9 @@ struct dio { | |||
133 | * wish that they not be zeroed. | 140 | * wish that they not be zeroed. |
134 | */ | 141 | */ |
135 | struct page *pages[DIO_PAGES]; /* page buffer */ | 142 | struct page *pages[DIO_PAGES]; /* page buffer */ |
136 | }; | 143 | } ____cacheline_aligned_in_smp; |
144 | |||
145 | static struct kmem_cache *dio_cache __read_mostly; | ||
137 | 146 | ||
138 | static void __inode_dio_wait(struct inode *inode) | 147 | static void __inode_dio_wait(struct inode *inode) |
139 | { | 148 | { |
@@ -182,27 +191,27 @@ EXPORT_SYMBOL_GPL(inode_dio_done); | |||
182 | /* | 191 | /* |
183 | * How many pages are in the queue? | 192 | * How many pages are in the queue? |
184 | */ | 193 | */ |
185 | static inline unsigned dio_pages_present(struct dio *dio) | 194 | static inline unsigned dio_pages_present(struct dio_submit *sdio) |
186 | { | 195 | { |
187 | return dio->tail - dio->head; | 196 | return sdio->tail - sdio->head; |
188 | } | 197 | } |
189 | 198 | ||
190 | /* | 199 | /* |
191 | * Go grab and pin some userspace pages. Typically we'll get 64 at a time. | 200 | * Go grab and pin some userspace pages. Typically we'll get 64 at a time. |
192 | */ | 201 | */ |
193 | static int dio_refill_pages(struct dio *dio) | 202 | static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) |
194 | { | 203 | { |
195 | int ret; | 204 | int ret; |
196 | int nr_pages; | 205 | int nr_pages; |
197 | 206 | ||
198 | nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES); | 207 | nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES); |
199 | ret = get_user_pages_fast( | 208 | ret = get_user_pages_fast( |
200 | dio->curr_user_address, /* Where from? */ | 209 | sdio->curr_user_address, /* Where from? */ |
201 | nr_pages, /* How many pages? */ | 210 | nr_pages, /* How many pages? */ |
202 | dio->rw == READ, /* Write to memory? */ | 211 | dio->rw == READ, /* Write to memory? */ |
203 | &dio->pages[0]); /* Put results here */ | 212 | &dio->pages[0]); /* Put results here */ |
204 | 213 | ||
205 | if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { | 214 | if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { |
206 | struct page *page = ZERO_PAGE(0); | 215 | struct page *page = ZERO_PAGE(0); |
207 | /* | 216 | /* |
208 | * A memory fault, but the filesystem has some outstanding | 217 | * A memory fault, but the filesystem has some outstanding |
@@ -213,17 +222,17 @@ static int dio_refill_pages(struct dio *dio) | |||
213 | dio->page_errors = ret; | 222 | dio->page_errors = ret; |
214 | page_cache_get(page); | 223 | page_cache_get(page); |
215 | dio->pages[0] = page; | 224 | dio->pages[0] = page; |
216 | dio->head = 0; | 225 | sdio->head = 0; |
217 | dio->tail = 1; | 226 | sdio->tail = 1; |
218 | ret = 0; | 227 | ret = 0; |
219 | goto out; | 228 | goto out; |
220 | } | 229 | } |
221 | 230 | ||
222 | if (ret >= 0) { | 231 | if (ret >= 0) { |
223 | dio->curr_user_address += ret * PAGE_SIZE; | 232 | sdio->curr_user_address += ret * PAGE_SIZE; |
224 | dio->curr_page += ret; | 233 | sdio->curr_page += ret; |
225 | dio->head = 0; | 234 | sdio->head = 0; |
226 | dio->tail = ret; | 235 | sdio->tail = ret; |
227 | ret = 0; | 236 | ret = 0; |
228 | } | 237 | } |
229 | out: | 238 | out: |
@@ -236,17 +245,18 @@ out: | |||
236 | * decent number of pages, less frequently. To provide nicer use of the | 245 | * decent number of pages, less frequently. To provide nicer use of the |
237 | * L1 cache. | 246 | * L1 cache. |
238 | */ | 247 | */ |
239 | static struct page *dio_get_page(struct dio *dio) | 248 | static inline struct page *dio_get_page(struct dio *dio, |
249 | struct dio_submit *sdio) | ||
240 | { | 250 | { |
241 | if (dio_pages_present(dio) == 0) { | 251 | if (dio_pages_present(sdio) == 0) { |
242 | int ret; | 252 | int ret; |
243 | 253 | ||
244 | ret = dio_refill_pages(dio); | 254 | ret = dio_refill_pages(dio, sdio); |
245 | if (ret) | 255 | if (ret) |
246 | return ERR_PTR(ret); | 256 | return ERR_PTR(ret); |
247 | BUG_ON(dio_pages_present(dio) == 0); | 257 | BUG_ON(dio_pages_present(sdio) == 0); |
248 | } | 258 | } |
249 | return dio->pages[dio->head++]; | 259 | return dio->pages[sdio->head++]; |
250 | } | 260 | } |
251 | 261 | ||
252 | /** | 262 | /** |
@@ -292,7 +302,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is | |||
292 | 302 | ||
293 | if (dio->end_io && dio->result) { | 303 | if (dio->end_io && dio->result) { |
294 | dio->end_io(dio->iocb, offset, transferred, | 304 | dio->end_io(dio->iocb, offset, transferred, |
295 | dio->map_bh.b_private, ret, is_async); | 305 | dio->private, ret, is_async); |
296 | } else { | 306 | } else { |
297 | if (is_async) | 307 | if (is_async) |
298 | aio_complete(dio->iocb, ret, 0); | 308 | aio_complete(dio->iocb, ret, 0); |
@@ -323,7 +333,7 @@ static void dio_bio_end_aio(struct bio *bio, int error) | |||
323 | 333 | ||
324 | if (remaining == 0) { | 334 | if (remaining == 0) { |
325 | dio_complete(dio, dio->iocb->ki_pos, 0, true); | 335 | dio_complete(dio, dio->iocb->ki_pos, 0, true); |
326 | kfree(dio); | 336 | kmem_cache_free(dio_cache, dio); |
327 | } | 337 | } |
328 | } | 338 | } |
329 | 339 | ||
@@ -367,9 +377,10 @@ void dio_end_io(struct bio *bio, int error) | |||
367 | } | 377 | } |
368 | EXPORT_SYMBOL_GPL(dio_end_io); | 378 | EXPORT_SYMBOL_GPL(dio_end_io); |
369 | 379 | ||
370 | static void | 380 | static inline void |
371 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, | 381 | dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, |
372 | sector_t first_sector, int nr_vecs) | 382 | struct block_device *bdev, |
383 | sector_t first_sector, int nr_vecs) | ||
373 | { | 384 | { |
374 | struct bio *bio; | 385 | struct bio *bio; |
375 | 386 | ||
@@ -386,8 +397,8 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
386 | else | 397 | else |
387 | bio->bi_end_io = dio_bio_end_io; | 398 | bio->bi_end_io = dio_bio_end_io; |
388 | 399 | ||
389 | dio->bio = bio; | 400 | sdio->bio = bio; |
390 | dio->logical_offset_in_bio = dio->cur_page_fs_offset; | 401 | sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; |
391 | } | 402 | } |
392 | 403 | ||
393 | /* | 404 | /* |
@@ -397,9 +408,9 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
397 | * | 408 | * |
398 | * bios hold a dio reference between submit_bio and ->end_io. | 409 | * bios hold a dio reference between submit_bio and ->end_io. |
399 | */ | 410 | */ |
400 | static void dio_bio_submit(struct dio *dio) | 411 | static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) |
401 | { | 412 | { |
402 | struct bio *bio = dio->bio; | 413 | struct bio *bio = sdio->bio; |
403 | unsigned long flags; | 414 | unsigned long flags; |
404 | 415 | ||
405 | bio->bi_private = dio; | 416 | bio->bi_private = dio; |
@@ -411,24 +422,24 @@ static void dio_bio_submit(struct dio *dio) | |||
411 | if (dio->is_async && dio->rw == READ) | 422 | if (dio->is_async && dio->rw == READ) |
412 | bio_set_pages_dirty(bio); | 423 | bio_set_pages_dirty(bio); |
413 | 424 | ||
414 | if (dio->submit_io) | 425 | if (sdio->submit_io) |
415 | dio->submit_io(dio->rw, bio, dio->inode, | 426 | sdio->submit_io(dio->rw, bio, dio->inode, |
416 | dio->logical_offset_in_bio); | 427 | sdio->logical_offset_in_bio); |
417 | else | 428 | else |
418 | submit_bio(dio->rw, bio); | 429 | submit_bio(dio->rw, bio); |
419 | 430 | ||
420 | dio->bio = NULL; | 431 | sdio->bio = NULL; |
421 | dio->boundary = 0; | 432 | sdio->boundary = 0; |
422 | dio->logical_offset_in_bio = 0; | 433 | sdio->logical_offset_in_bio = 0; |
423 | } | 434 | } |
424 | 435 | ||
425 | /* | 436 | /* |
426 | * Release any resources in case of a failure | 437 | * Release any resources in case of a failure |
427 | */ | 438 | */ |
428 | static void dio_cleanup(struct dio *dio) | 439 | static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) |
429 | { | 440 | { |
430 | while (dio_pages_present(dio)) | 441 | while (dio_pages_present(sdio)) |
431 | page_cache_release(dio_get_page(dio)); | 442 | page_cache_release(dio_get_page(dio, sdio)); |
432 | } | 443 | } |
433 | 444 | ||
434 | /* | 445 | /* |
@@ -518,11 +529,11 @@ static void dio_await_completion(struct dio *dio) | |||
518 | * | 529 | * |
519 | * This also helps to limit the peak amount of pinned userspace memory. | 530 | * This also helps to limit the peak amount of pinned userspace memory. |
520 | */ | 531 | */ |
521 | static int dio_bio_reap(struct dio *dio) | 532 | static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) |
522 | { | 533 | { |
523 | int ret = 0; | 534 | int ret = 0; |
524 | 535 | ||
525 | if (dio->reap_counter++ >= 64) { | 536 | if (sdio->reap_counter++ >= 64) { |
526 | while (dio->bio_list) { | 537 | while (dio->bio_list) { |
527 | unsigned long flags; | 538 | unsigned long flags; |
528 | struct bio *bio; | 539 | struct bio *bio; |
@@ -536,14 +547,14 @@ static int dio_bio_reap(struct dio *dio) | |||
536 | if (ret == 0) | 547 | if (ret == 0) |
537 | ret = ret2; | 548 | ret = ret2; |
538 | } | 549 | } |
539 | dio->reap_counter = 0; | 550 | sdio->reap_counter = 0; |
540 | } | 551 | } |
541 | return ret; | 552 | return ret; |
542 | } | 553 | } |
543 | 554 | ||
544 | /* | 555 | /* |
545 | * Call into the fs to map some more disk blocks. We record the current number | 556 | * Call into the fs to map some more disk blocks. We record the current number |
546 | * of available blocks at dio->blocks_available. These are in units of the | 557 | * of available blocks at sdio->blocks_available. These are in units of the |
547 | * fs blocksize, (1 << inode->i_blkbits). | 558 | * fs blocksize, (1 << inode->i_blkbits). |
548 | * | 559 | * |
549 | * The fs is allowed to map lots of blocks at once. If it wants to do that, | 560 | * The fs is allowed to map lots of blocks at once. If it wants to do that, |
@@ -564,10 +575,10 @@ static int dio_bio_reap(struct dio *dio) | |||
564 | * buffer_mapped(). However the direct-io code will only process holes one | 575 | * buffer_mapped(). However the direct-io code will only process holes one |
565 | * block at a time - it will repeatedly call get_block() as it walks the hole. | 576 | * block at a time - it will repeatedly call get_block() as it walks the hole. |
566 | */ | 577 | */ |
567 | static int get_more_blocks(struct dio *dio) | 578 | static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, |
579 | struct buffer_head *map_bh) | ||
568 | { | 580 | { |
569 | int ret; | 581 | int ret; |
570 | struct buffer_head *map_bh = &dio->map_bh; | ||
571 | sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ | 582 | sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ |
572 | unsigned long fs_count; /* Number of filesystem-sized blocks */ | 583 | unsigned long fs_count; /* Number of filesystem-sized blocks */ |
573 | unsigned long dio_count;/* Number of dio_block-sized blocks */ | 584 | unsigned long dio_count;/* Number of dio_block-sized blocks */ |
@@ -580,11 +591,11 @@ static int get_more_blocks(struct dio *dio) | |||
580 | */ | 591 | */ |
581 | ret = dio->page_errors; | 592 | ret = dio->page_errors; |
582 | if (ret == 0) { | 593 | if (ret == 0) { |
583 | BUG_ON(dio->block_in_file >= dio->final_block_in_request); | 594 | BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); |
584 | fs_startblk = dio->block_in_file >> dio->blkfactor; | 595 | fs_startblk = sdio->block_in_file >> sdio->blkfactor; |
585 | dio_count = dio->final_block_in_request - dio->block_in_file; | 596 | dio_count = sdio->final_block_in_request - sdio->block_in_file; |
586 | fs_count = dio_count >> dio->blkfactor; | 597 | fs_count = dio_count >> sdio->blkfactor; |
587 | blkmask = (1 << dio->blkfactor) - 1; | 598 | blkmask = (1 << sdio->blkfactor) - 1; |
588 | if (dio_count & blkmask) | 599 | if (dio_count & blkmask) |
589 | fs_count++; | 600 | fs_count++; |
590 | 601 | ||
@@ -604,13 +615,16 @@ static int get_more_blocks(struct dio *dio) | |||
604 | */ | 615 | */ |
605 | create = dio->rw & WRITE; | 616 | create = dio->rw & WRITE; |
606 | if (dio->flags & DIO_SKIP_HOLES) { | 617 | if (dio->flags & DIO_SKIP_HOLES) { |
607 | if (dio->block_in_file < (i_size_read(dio->inode) >> | 618 | if (sdio->block_in_file < (i_size_read(dio->inode) >> |
608 | dio->blkbits)) | 619 | sdio->blkbits)) |
609 | create = 0; | 620 | create = 0; |
610 | } | 621 | } |
611 | 622 | ||
612 | ret = (*dio->get_block)(dio->inode, fs_startblk, | 623 | ret = (*sdio->get_block)(dio->inode, fs_startblk, |
613 | map_bh, create); | 624 | map_bh, create); |
625 | |||
626 | /* Store for completion */ | ||
627 | dio->private = map_bh->b_private; | ||
614 | } | 628 | } |
615 | return ret; | 629 | return ret; |
616 | } | 630 | } |
@@ -618,20 +632,21 @@ static int get_more_blocks(struct dio *dio) | |||
618 | /* | 632 | /* |
619 | * There is no bio. Make one now. | 633 | * There is no bio. Make one now. |
620 | */ | 634 | */ |
621 | static int dio_new_bio(struct dio *dio, sector_t start_sector) | 635 | static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio, |
636 | sector_t start_sector, struct buffer_head *map_bh) | ||
622 | { | 637 | { |
623 | sector_t sector; | 638 | sector_t sector; |
624 | int ret, nr_pages; | 639 | int ret, nr_pages; |
625 | 640 | ||
626 | ret = dio_bio_reap(dio); | 641 | ret = dio_bio_reap(dio, sdio); |
627 | if (ret) | 642 | if (ret) |
628 | goto out; | 643 | goto out; |
629 | sector = start_sector << (dio->blkbits - 9); | 644 | sector = start_sector << (sdio->blkbits - 9); |
630 | nr_pages = min(dio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev)); | 645 | nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev)); |
631 | nr_pages = min(nr_pages, BIO_MAX_PAGES); | 646 | nr_pages = min(nr_pages, BIO_MAX_PAGES); |
632 | BUG_ON(nr_pages <= 0); | 647 | BUG_ON(nr_pages <= 0); |
633 | dio_bio_alloc(dio, dio->map_bh.b_bdev, sector, nr_pages); | 648 | dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages); |
634 | dio->boundary = 0; | 649 | sdio->boundary = 0; |
635 | out: | 650 | out: |
636 | return ret; | 651 | return ret; |
637 | } | 652 | } |
@@ -643,21 +658,21 @@ out: | |||
643 | * | 658 | * |
644 | * Return zero on success. Non-zero means the caller needs to start a new BIO. | 659 | * Return zero on success. Non-zero means the caller needs to start a new BIO. |
645 | */ | 660 | */ |
646 | static int dio_bio_add_page(struct dio *dio) | 661 | static inline int dio_bio_add_page(struct dio_submit *sdio) |
647 | { | 662 | { |
648 | int ret; | 663 | int ret; |
649 | 664 | ||
650 | ret = bio_add_page(dio->bio, dio->cur_page, | 665 | ret = bio_add_page(sdio->bio, sdio->cur_page, |
651 | dio->cur_page_len, dio->cur_page_offset); | 666 | sdio->cur_page_len, sdio->cur_page_offset); |
652 | if (ret == dio->cur_page_len) { | 667 | if (ret == sdio->cur_page_len) { |
653 | /* | 668 | /* |
654 | * Decrement count only, if we are done with this page | 669 | * Decrement count only, if we are done with this page |
655 | */ | 670 | */ |
656 | if ((dio->cur_page_len + dio->cur_page_offset) == PAGE_SIZE) | 671 | if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE) |
657 | dio->pages_in_io--; | 672 | sdio->pages_in_io--; |
658 | page_cache_get(dio->cur_page); | 673 | page_cache_get(sdio->cur_page); |
659 | dio->final_block_in_bio = dio->cur_page_block + | 674 | sdio->final_block_in_bio = sdio->cur_page_block + |
660 | (dio->cur_page_len >> dio->blkbits); | 675 | (sdio->cur_page_len >> sdio->blkbits); |
661 | ret = 0; | 676 | ret = 0; |
662 | } else { | 677 | } else { |
663 | ret = 1; | 678 | ret = 1; |
@@ -675,14 +690,15 @@ static int dio_bio_add_page(struct dio *dio) | |||
675 | * The caller of this function is responsible for removing cur_page from the | 690 | * The caller of this function is responsible for removing cur_page from the |
676 | * dio, and for dropping the refcount which came from that presence. | 691 | * dio, and for dropping the refcount which came from that presence. |
677 | */ | 692 | */ |
678 | static int dio_send_cur_page(struct dio *dio) | 693 | static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, |
694 | struct buffer_head *map_bh) | ||
679 | { | 695 | { |
680 | int ret = 0; | 696 | int ret = 0; |
681 | 697 | ||
682 | if (dio->bio) { | 698 | if (sdio->bio) { |
683 | loff_t cur_offset = dio->cur_page_fs_offset; | 699 | loff_t cur_offset = sdio->cur_page_fs_offset; |
684 | loff_t bio_next_offset = dio->logical_offset_in_bio + | 700 | loff_t bio_next_offset = sdio->logical_offset_in_bio + |
685 | dio->bio->bi_size; | 701 | sdio->bio->bi_size; |
686 | 702 | ||
687 | /* | 703 | /* |
688 | * See whether this new request is contiguous with the old. | 704 | * See whether this new request is contiguous with the old. |
@@ -698,28 +714,28 @@ static int dio_send_cur_page(struct dio *dio) | |||
698 | * be the next logical offset in the bio, submit the bio we | 714 | * be the next logical offset in the bio, submit the bio we |
699 | * have. | 715 | * have. |
700 | */ | 716 | */ |
701 | if (dio->final_block_in_bio != dio->cur_page_block || | 717 | if (sdio->final_block_in_bio != sdio->cur_page_block || |
702 | cur_offset != bio_next_offset) | 718 | cur_offset != bio_next_offset) |
703 | dio_bio_submit(dio); | 719 | dio_bio_submit(dio, sdio); |
704 | /* | 720 | /* |
705 | * Submit now if the underlying fs is about to perform a | 721 | * Submit now if the underlying fs is about to perform a |
706 | * metadata read | 722 | * metadata read |
707 | */ | 723 | */ |
708 | else if (dio->boundary) | 724 | else if (sdio->boundary) |
709 | dio_bio_submit(dio); | 725 | dio_bio_submit(dio, sdio); |
710 | } | 726 | } |
711 | 727 | ||
712 | if (dio->bio == NULL) { | 728 | if (sdio->bio == NULL) { |
713 | ret = dio_new_bio(dio, dio->cur_page_block); | 729 | ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh); |
714 | if (ret) | 730 | if (ret) |
715 | goto out; | 731 | goto out; |
716 | } | 732 | } |
717 | 733 | ||
718 | if (dio_bio_add_page(dio) != 0) { | 734 | if (dio_bio_add_page(sdio) != 0) { |
719 | dio_bio_submit(dio); | 735 | dio_bio_submit(dio, sdio); |
720 | ret = dio_new_bio(dio, dio->cur_page_block); | 736 | ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh); |
721 | if (ret == 0) { | 737 | if (ret == 0) { |
722 | ret = dio_bio_add_page(dio); | 738 | ret = dio_bio_add_page(sdio); |
723 | BUG_ON(ret != 0); | 739 | BUG_ON(ret != 0); |
724 | } | 740 | } |
725 | } | 741 | } |
@@ -744,9 +760,10 @@ out: | |||
744 | * If that doesn't work out then we put the old page into the bio and add this | 760 | * If that doesn't work out then we put the old page into the bio and add this |
745 | * page to the dio instead. | 761 | * page to the dio instead. |
746 | */ | 762 | */ |
747 | static int | 763 | static inline int |
748 | submit_page_section(struct dio *dio, struct page *page, | 764 | submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, |
749 | unsigned offset, unsigned len, sector_t blocknr) | 765 | unsigned offset, unsigned len, sector_t blocknr, |
766 | struct buffer_head *map_bh) | ||
750 | { | 767 | { |
751 | int ret = 0; | 768 | int ret = 0; |
752 | 769 | ||
@@ -760,20 +777,20 @@ submit_page_section(struct dio *dio, struct page *page, | |||
760 | /* | 777 | /* |
761 | * Can we just grow the current page's presence in the dio? | 778 | * Can we just grow the current page's presence in the dio? |
762 | */ | 779 | */ |
763 | if ( (dio->cur_page == page) && | 780 | if (sdio->cur_page == page && |
764 | (dio->cur_page_offset + dio->cur_page_len == offset) && | 781 | sdio->cur_page_offset + sdio->cur_page_len == offset && |
765 | (dio->cur_page_block + | 782 | sdio->cur_page_block + |
766 | (dio->cur_page_len >> dio->blkbits) == blocknr)) { | 783 | (sdio->cur_page_len >> sdio->blkbits) == blocknr) { |
767 | dio->cur_page_len += len; | 784 | sdio->cur_page_len += len; |
768 | 785 | ||
769 | /* | 786 | /* |
770 | * If dio->boundary then we want to schedule the IO now to | 787 | * If sdio->boundary then we want to schedule the IO now to |
771 | * avoid metadata seeks. | 788 | * avoid metadata seeks. |
772 | */ | 789 | */ |
773 | if (dio->boundary) { | 790 | if (sdio->boundary) { |
774 | ret = dio_send_cur_page(dio); | 791 | ret = dio_send_cur_page(dio, sdio, map_bh); |
775 | page_cache_release(dio->cur_page); | 792 | page_cache_release(sdio->cur_page); |
776 | dio->cur_page = NULL; | 793 | sdio->cur_page = NULL; |
777 | } | 794 | } |
778 | goto out; | 795 | goto out; |
779 | } | 796 | } |
@@ -781,20 +798,20 @@ submit_page_section(struct dio *dio, struct page *page, | |||
781 | /* | 798 | /* |
782 | * If there's a deferred page already there then send it. | 799 | * If there's a deferred page already there then send it. |
783 | */ | 800 | */ |
784 | if (dio->cur_page) { | 801 | if (sdio->cur_page) { |
785 | ret = dio_send_cur_page(dio); | 802 | ret = dio_send_cur_page(dio, sdio, map_bh); |
786 | page_cache_release(dio->cur_page); | 803 | page_cache_release(sdio->cur_page); |
787 | dio->cur_page = NULL; | 804 | sdio->cur_page = NULL; |
788 | if (ret) | 805 | if (ret) |
789 | goto out; | 806 | goto out; |
790 | } | 807 | } |
791 | 808 | ||
792 | page_cache_get(page); /* It is in dio */ | 809 | page_cache_get(page); /* It is in dio */ |
793 | dio->cur_page = page; | 810 | sdio->cur_page = page; |
794 | dio->cur_page_offset = offset; | 811 | sdio->cur_page_offset = offset; |
795 | dio->cur_page_len = len; | 812 | sdio->cur_page_len = len; |
796 | dio->cur_page_block = blocknr; | 813 | sdio->cur_page_block = blocknr; |
797 | dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; | 814 | sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits; |
798 | out: | 815 | out: |
799 | return ret; | 816 | return ret; |
800 | } | 817 | } |
@@ -804,16 +821,16 @@ out: | |||
804 | * file blocks. Only called for S_ISREG files - blockdevs do not set | 821 | * file blocks. Only called for S_ISREG files - blockdevs do not set |
805 | * buffer_new | 822 | * buffer_new |
806 | */ | 823 | */ |
807 | static void clean_blockdev_aliases(struct dio *dio) | 824 | static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh) |
808 | { | 825 | { |
809 | unsigned i; | 826 | unsigned i; |
810 | unsigned nblocks; | 827 | unsigned nblocks; |
811 | 828 | ||
812 | nblocks = dio->map_bh.b_size >> dio->inode->i_blkbits; | 829 | nblocks = map_bh->b_size >> dio->inode->i_blkbits; |
813 | 830 | ||
814 | for (i = 0; i < nblocks; i++) { | 831 | for (i = 0; i < nblocks; i++) { |
815 | unmap_underlying_metadata(dio->map_bh.b_bdev, | 832 | unmap_underlying_metadata(map_bh->b_bdev, |
816 | dio->map_bh.b_blocknr + i); | 833 | map_bh->b_blocknr + i); |
817 | } | 834 | } |
818 | } | 835 | } |
819 | 836 | ||
@@ -826,19 +843,20 @@ static void clean_blockdev_aliases(struct dio *dio) | |||
826 | * `end' is zero if we're doing the start of the IO, 1 at the end of the | 843 | * `end' is zero if we're doing the start of the IO, 1 at the end of the |
827 | * IO. | 844 | * IO. |
828 | */ | 845 | */ |
829 | static void dio_zero_block(struct dio *dio, int end) | 846 | static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio, |
847 | int end, struct buffer_head *map_bh) | ||
830 | { | 848 | { |
831 | unsigned dio_blocks_per_fs_block; | 849 | unsigned dio_blocks_per_fs_block; |
832 | unsigned this_chunk_blocks; /* In dio_blocks */ | 850 | unsigned this_chunk_blocks; /* In dio_blocks */ |
833 | unsigned this_chunk_bytes; | 851 | unsigned this_chunk_bytes; |
834 | struct page *page; | 852 | struct page *page; |
835 | 853 | ||
836 | dio->start_zero_done = 1; | 854 | sdio->start_zero_done = 1; |
837 | if (!dio->blkfactor || !buffer_new(&dio->map_bh)) | 855 | if (!sdio->blkfactor || !buffer_new(map_bh)) |
838 | return; | 856 | return; |
839 | 857 | ||
840 | dio_blocks_per_fs_block = 1 << dio->blkfactor; | 858 | dio_blocks_per_fs_block = 1 << sdio->blkfactor; |
841 | this_chunk_blocks = dio->block_in_file & (dio_blocks_per_fs_block - 1); | 859 | this_chunk_blocks = sdio->block_in_file & (dio_blocks_per_fs_block - 1); |
842 | 860 | ||
843 | if (!this_chunk_blocks) | 861 | if (!this_chunk_blocks) |
844 | return; | 862 | return; |
@@ -850,14 +868,14 @@ static void dio_zero_block(struct dio *dio, int end) | |||
850 | if (end) | 868 | if (end) |
851 | this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks; | 869 | this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks; |
852 | 870 | ||
853 | this_chunk_bytes = this_chunk_blocks << dio->blkbits; | 871 | this_chunk_bytes = this_chunk_blocks << sdio->blkbits; |
854 | 872 | ||
855 | page = ZERO_PAGE(0); | 873 | page = ZERO_PAGE(0); |
856 | if (submit_page_section(dio, page, 0, this_chunk_bytes, | 874 | if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes, |
857 | dio->next_block_for_io)) | 875 | sdio->next_block_for_io, map_bh)) |
858 | return; | 876 | return; |
859 | 877 | ||
860 | dio->next_block_for_io += this_chunk_blocks; | 878 | sdio->next_block_for_io += this_chunk_blocks; |
861 | } | 879 | } |
862 | 880 | ||
863 | /* | 881 | /* |
@@ -876,20 +894,20 @@ static void dio_zero_block(struct dio *dio, int end) | |||
876 | * it should set b_size to PAGE_SIZE or more inside get_block(). This gives | 894 | * it should set b_size to PAGE_SIZE or more inside get_block(). This gives |
877 | * fine alignment but still allows this function to work in PAGE_SIZE units. | 895 | * fine alignment but still allows this function to work in PAGE_SIZE units. |
878 | */ | 896 | */ |
879 | static int do_direct_IO(struct dio *dio) | 897 | static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, |
898 | struct buffer_head *map_bh) | ||
880 | { | 899 | { |
881 | const unsigned blkbits = dio->blkbits; | 900 | const unsigned blkbits = sdio->blkbits; |
882 | const unsigned blocks_per_page = PAGE_SIZE >> blkbits; | 901 | const unsigned blocks_per_page = PAGE_SIZE >> blkbits; |
883 | struct page *page; | 902 | struct page *page; |
884 | unsigned block_in_page; | 903 | unsigned block_in_page; |
885 | struct buffer_head *map_bh = &dio->map_bh; | ||
886 | int ret = 0; | 904 | int ret = 0; |
887 | 905 | ||
888 | /* The I/O can start at any block offset within the first page */ | 906 | /* The I/O can start at any block offset within the first page */ |
889 | block_in_page = dio->first_block_in_page; | 907 | block_in_page = sdio->first_block_in_page; |
890 | 908 | ||
891 | while (dio->block_in_file < dio->final_block_in_request) { | 909 | while (sdio->block_in_file < sdio->final_block_in_request) { |
892 | page = dio_get_page(dio); | 910 | page = dio_get_page(dio, sdio); |
893 | if (IS_ERR(page)) { | 911 | if (IS_ERR(page)) { |
894 | ret = PTR_ERR(page); | 912 | ret = PTR_ERR(page); |
895 | goto out; | 913 | goto out; |
@@ -901,14 +919,14 @@ static int do_direct_IO(struct dio *dio) | |||
901 | unsigned this_chunk_blocks; /* # of blocks */ | 919 | unsigned this_chunk_blocks; /* # of blocks */ |
902 | unsigned u; | 920 | unsigned u; |
903 | 921 | ||
904 | if (dio->blocks_available == 0) { | 922 | if (sdio->blocks_available == 0) { |
905 | /* | 923 | /* |
906 | * Need to go and map some more disk | 924 | * Need to go and map some more disk |
907 | */ | 925 | */ |
908 | unsigned long blkmask; | 926 | unsigned long blkmask; |
909 | unsigned long dio_remainder; | 927 | unsigned long dio_remainder; |
910 | 928 | ||
911 | ret = get_more_blocks(dio); | 929 | ret = get_more_blocks(dio, sdio, map_bh); |
912 | if (ret) { | 930 | if (ret) { |
913 | page_cache_release(page); | 931 | page_cache_release(page); |
914 | goto out; | 932 | goto out; |
@@ -916,18 +934,18 @@ static int do_direct_IO(struct dio *dio) | |||
916 | if (!buffer_mapped(map_bh)) | 934 | if (!buffer_mapped(map_bh)) |
917 | goto do_holes; | 935 | goto do_holes; |
918 | 936 | ||
919 | dio->blocks_available = | 937 | sdio->blocks_available = |
920 | map_bh->b_size >> dio->blkbits; | 938 | map_bh->b_size >> sdio->blkbits; |
921 | dio->next_block_for_io = | 939 | sdio->next_block_for_io = |
922 | map_bh->b_blocknr << dio->blkfactor; | 940 | map_bh->b_blocknr << sdio->blkfactor; |
923 | if (buffer_new(map_bh)) | 941 | if (buffer_new(map_bh)) |
924 | clean_blockdev_aliases(dio); | 942 | clean_blockdev_aliases(dio, map_bh); |
925 | 943 | ||
926 | if (!dio->blkfactor) | 944 | if (!sdio->blkfactor) |
927 | goto do_holes; | 945 | goto do_holes; |
928 | 946 | ||
929 | blkmask = (1 << dio->blkfactor) - 1; | 947 | blkmask = (1 << sdio->blkfactor) - 1; |
930 | dio_remainder = (dio->block_in_file & blkmask); | 948 | dio_remainder = (sdio->block_in_file & blkmask); |
931 | 949 | ||
932 | /* | 950 | /* |
933 | * If we are at the start of IO and that IO | 951 | * If we are at the start of IO and that IO |
@@ -941,8 +959,8 @@ static int do_direct_IO(struct dio *dio) | |||
941 | * on-disk | 959 | * on-disk |
942 | */ | 960 | */ |
943 | if (!buffer_new(map_bh)) | 961 | if (!buffer_new(map_bh)) |
944 | dio->next_block_for_io += dio_remainder; | 962 | sdio->next_block_for_io += dio_remainder; |
945 | dio->blocks_available -= dio_remainder; | 963 | sdio->blocks_available -= dio_remainder; |
946 | } | 964 | } |
947 | do_holes: | 965 | do_holes: |
948 | /* Handle holes */ | 966 | /* Handle holes */ |
@@ -961,7 +979,7 @@ do_holes: | |||
961 | */ | 979 | */ |
962 | i_size_aligned = ALIGN(i_size_read(dio->inode), | 980 | i_size_aligned = ALIGN(i_size_read(dio->inode), |
963 | 1 << blkbits); | 981 | 1 << blkbits); |
964 | if (dio->block_in_file >= | 982 | if (sdio->block_in_file >= |
965 | i_size_aligned >> blkbits) { | 983 | i_size_aligned >> blkbits) { |
966 | /* We hit eof */ | 984 | /* We hit eof */ |
967 | page_cache_release(page); | 985 | page_cache_release(page); |
@@ -969,7 +987,7 @@ do_holes: | |||
969 | } | 987 | } |
970 | zero_user(page, block_in_page << blkbits, | 988 | zero_user(page, block_in_page << blkbits, |
971 | 1 << blkbits); | 989 | 1 << blkbits); |
972 | dio->block_in_file++; | 990 | sdio->block_in_file++; |
973 | block_in_page++; | 991 | block_in_page++; |
974 | goto next_block; | 992 | goto next_block; |
975 | } | 993 | } |
@@ -979,38 +997,41 @@ do_holes: | |||
979 | * is finer than the underlying fs, go check to see if | 997 | * is finer than the underlying fs, go check to see if |
980 | * we must zero out the start of this block. | 998 | * we must zero out the start of this block. |
981 | */ | 999 | */ |
982 | if (unlikely(dio->blkfactor && !dio->start_zero_done)) | 1000 | if (unlikely(sdio->blkfactor && !sdio->start_zero_done)) |
983 | dio_zero_block(dio, 0); | 1001 | dio_zero_block(dio, sdio, 0, map_bh); |
984 | 1002 | ||
985 | /* | 1003 | /* |
986 | * Work out, in this_chunk_blocks, how much disk we | 1004 | * Work out, in this_chunk_blocks, how much disk we |
987 | * can add to this page | 1005 | * can add to this page |
988 | */ | 1006 | */ |
989 | this_chunk_blocks = dio->blocks_available; | 1007 | this_chunk_blocks = sdio->blocks_available; |
990 | u = (PAGE_SIZE - offset_in_page) >> blkbits; | 1008 | u = (PAGE_SIZE - offset_in_page) >> blkbits; |
991 | if (this_chunk_blocks > u) | 1009 | if (this_chunk_blocks > u) |
992 | this_chunk_blocks = u; | 1010 | this_chunk_blocks = u; |
993 | u = dio->final_block_in_request - dio->block_in_file; | 1011 | u = sdio->final_block_in_request - sdio->block_in_file; |
994 | if (this_chunk_blocks > u) | 1012 | if (this_chunk_blocks > u) |
995 | this_chunk_blocks = u; | 1013 | this_chunk_blocks = u; |
996 | this_chunk_bytes = this_chunk_blocks << blkbits; | 1014 | this_chunk_bytes = this_chunk_blocks << blkbits; |
997 | BUG_ON(this_chunk_bytes == 0); | 1015 | BUG_ON(this_chunk_bytes == 0); |
998 | 1016 | ||
999 | dio->boundary = buffer_boundary(map_bh); | 1017 | sdio->boundary = buffer_boundary(map_bh); |
1000 | ret = submit_page_section(dio, page, offset_in_page, | 1018 | ret = submit_page_section(dio, sdio, page, |
1001 | this_chunk_bytes, dio->next_block_for_io); | 1019 | offset_in_page, |
1020 | this_chunk_bytes, | ||
1021 | sdio->next_block_for_io, | ||
1022 | map_bh); | ||
1002 | if (ret) { | 1023 | if (ret) { |
1003 | page_cache_release(page); | 1024 | page_cache_release(page); |
1004 | goto out; | 1025 | goto out; |
1005 | } | 1026 | } |
1006 | dio->next_block_for_io += this_chunk_blocks; | 1027 | sdio->next_block_for_io += this_chunk_blocks; |
1007 | 1028 | ||
1008 | dio->block_in_file += this_chunk_blocks; | 1029 | sdio->block_in_file += this_chunk_blocks; |
1009 | block_in_page += this_chunk_blocks; | 1030 | block_in_page += this_chunk_blocks; |
1010 | dio->blocks_available -= this_chunk_blocks; | 1031 | sdio->blocks_available -= this_chunk_blocks; |
1011 | next_block: | 1032 | next_block: |
1012 | BUG_ON(dio->block_in_file > dio->final_block_in_request); | 1033 | BUG_ON(sdio->block_in_file > sdio->final_block_in_request); |
1013 | if (dio->block_in_file == dio->final_block_in_request) | 1034 | if (sdio->block_in_file == sdio->final_block_in_request) |
1014 | break; | 1035 | break; |
1015 | } | 1036 | } |
1016 | 1037 | ||
@@ -1022,135 +1043,10 @@ out: | |||
1022 | return ret; | 1043 | return ret; |
1023 | } | 1044 | } |
1024 | 1045 | ||
1025 | static ssize_t | 1046 | static inline int drop_refcount(struct dio *dio) |
1026 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | ||
1027 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | ||
1028 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, | ||
1029 | dio_submit_t submit_io, struct dio *dio) | ||
1030 | { | 1047 | { |
1031 | unsigned long user_addr; | 1048 | int ret2; |
1032 | unsigned long flags; | 1049 | unsigned long flags; |
1033 | int seg; | ||
1034 | ssize_t ret = 0; | ||
1035 | ssize_t ret2; | ||
1036 | size_t bytes; | ||
1037 | |||
1038 | dio->inode = inode; | ||
1039 | dio->rw = rw; | ||
1040 | dio->blkbits = blkbits; | ||
1041 | dio->blkfactor = inode->i_blkbits - blkbits; | ||
1042 | dio->block_in_file = offset >> blkbits; | ||
1043 | |||
1044 | dio->get_block = get_block; | ||
1045 | dio->end_io = end_io; | ||
1046 | dio->submit_io = submit_io; | ||
1047 | dio->final_block_in_bio = -1; | ||
1048 | dio->next_block_for_io = -1; | ||
1049 | |||
1050 | dio->iocb = iocb; | ||
1051 | dio->i_size = i_size_read(inode); | ||
1052 | |||
1053 | spin_lock_init(&dio->bio_lock); | ||
1054 | dio->refcount = 1; | ||
1055 | |||
1056 | /* | ||
1057 | * In case of non-aligned buffers, we may need 2 more | ||
1058 | * pages since we need to zero out first and last block. | ||
1059 | */ | ||
1060 | if (unlikely(dio->blkfactor)) | ||
1061 | dio->pages_in_io = 2; | ||
1062 | |||
1063 | for (seg = 0; seg < nr_segs; seg++) { | ||
1064 | user_addr = (unsigned long)iov[seg].iov_base; | ||
1065 | dio->pages_in_io += | ||
1066 | ((user_addr+iov[seg].iov_len +PAGE_SIZE-1)/PAGE_SIZE | ||
1067 | - user_addr/PAGE_SIZE); | ||
1068 | } | ||
1069 | |||
1070 | for (seg = 0; seg < nr_segs; seg++) { | ||
1071 | user_addr = (unsigned long)iov[seg].iov_base; | ||
1072 | dio->size += bytes = iov[seg].iov_len; | ||
1073 | |||
1074 | /* Index into the first page of the first block */ | ||
1075 | dio->first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; | ||
1076 | dio->final_block_in_request = dio->block_in_file + | ||
1077 | (bytes >> blkbits); | ||
1078 | /* Page fetching state */ | ||
1079 | dio->head = 0; | ||
1080 | dio->tail = 0; | ||
1081 | dio->curr_page = 0; | ||
1082 | |||
1083 | dio->total_pages = 0; | ||
1084 | if (user_addr & (PAGE_SIZE-1)) { | ||
1085 | dio->total_pages++; | ||
1086 | bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); | ||
1087 | } | ||
1088 | dio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; | ||
1089 | dio->curr_user_address = user_addr; | ||
1090 | |||
1091 | ret = do_direct_IO(dio); | ||
1092 | |||
1093 | dio->result += iov[seg].iov_len - | ||
1094 | ((dio->final_block_in_request - dio->block_in_file) << | ||
1095 | blkbits); | ||
1096 | |||
1097 | if (ret) { | ||
1098 | dio_cleanup(dio); | ||
1099 | break; | ||
1100 | } | ||
1101 | } /* end iovec loop */ | ||
1102 | |||
1103 | if (ret == -ENOTBLK) { | ||
1104 | /* | ||
1105 | * The remaining part of the request will be | ||
1106 | * be handled by buffered I/O when we return | ||
1107 | */ | ||
1108 | ret = 0; | ||
1109 | } | ||
1110 | /* | ||
1111 | * There may be some unwritten disk at the end of a part-written | ||
1112 | * fs-block-sized block. Go zero that now. | ||
1113 | */ | ||
1114 | dio_zero_block(dio, 1); | ||
1115 | |||
1116 | if (dio->cur_page) { | ||
1117 | ret2 = dio_send_cur_page(dio); | ||
1118 | if (ret == 0) | ||
1119 | ret = ret2; | ||
1120 | page_cache_release(dio->cur_page); | ||
1121 | dio->cur_page = NULL; | ||
1122 | } | ||
1123 | if (dio->bio) | ||
1124 | dio_bio_submit(dio); | ||
1125 | |||
1126 | /* | ||
1127 | * It is possible that, we return short IO due to end of file. | ||
1128 | * In that case, we need to release all the pages we got hold on. | ||
1129 | */ | ||
1130 | dio_cleanup(dio); | ||
1131 | |||
1132 | /* | ||
1133 | * All block lookups have been performed. For READ requests | ||
1134 | * we can let i_mutex go now that its achieved its purpose | ||
1135 | * of protecting us from looking up uninitialized blocks. | ||
1136 | */ | ||
1137 | if (rw == READ && (dio->flags & DIO_LOCKING)) | ||
1138 | mutex_unlock(&dio->inode->i_mutex); | ||
1139 | |||
1140 | /* | ||
1141 | * The only time we want to leave bios in flight is when a successful | ||
1142 | * partial aio read or full aio write have been setup. In that case | ||
1143 | * bio completion will call aio_complete. The only time it's safe to | ||
1144 | * call aio_complete is when we return -EIOCBQUEUED, so we key on that. | ||
1145 | * This had *better* be the only place that raises -EIOCBQUEUED. | ||
1146 | */ | ||
1147 | BUG_ON(ret == -EIOCBQUEUED); | ||
1148 | if (dio->is_async && ret == 0 && dio->result && | ||
1149 | ((rw & READ) || (dio->result == dio->size))) | ||
1150 | ret = -EIOCBQUEUED; | ||
1151 | |||
1152 | if (ret != -EIOCBQUEUED) | ||
1153 | dio_await_completion(dio); | ||
1154 | 1050 | ||
1155 | /* | 1051 | /* |
1156 | * Sync will always be dropping the final ref and completing the | 1052 | * Sync will always be dropping the final ref and completing the |
@@ -1166,14 +1062,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1166 | spin_lock_irqsave(&dio->bio_lock, flags); | 1062 | spin_lock_irqsave(&dio->bio_lock, flags); |
1167 | ret2 = --dio->refcount; | 1063 | ret2 = --dio->refcount; |
1168 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 1064 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
1169 | 1065 | return ret2; | |
1170 | if (ret2 == 0) { | ||
1171 | ret = dio_complete(dio, offset, ret, false); | ||
1172 | kfree(dio); | ||
1173 | } else | ||
1174 | BUG_ON(ret != -EIOCBQUEUED); | ||
1175 | |||
1176 | return ret; | ||
1177 | } | 1066 | } |
1178 | 1067 | ||
1179 | /* | 1068 | /* |
@@ -1195,6 +1084,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1195 | * expected that filesystem provide exclusion between new direct I/O | 1084 | * expected that filesystem provide exclusion between new direct I/O |
1196 | * and truncates. For DIO_LOCKING filesystems this is done by i_mutex, | 1085 | * and truncates. For DIO_LOCKING filesystems this is done by i_mutex, |
1197 | * but other filesystems need to take care of this on their own. | 1086 | * but other filesystems need to take care of this on their own. |
1087 | * | ||
1088 | * NOTE: if you pass "sdio" to anything by pointer make sure that function | ||
1089 | * is always inlined. Otherwise gcc is unable to split the structure into | ||
1090 | * individual fields and will generate much worse code. This is important | ||
1091 | * for the whole file. | ||
1198 | */ | 1092 | */ |
1199 | ssize_t | 1093 | ssize_t |
1200 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1094 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
@@ -1211,6 +1105,10 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1211 | ssize_t retval = -EINVAL; | 1105 | ssize_t retval = -EINVAL; |
1212 | loff_t end = offset; | 1106 | loff_t end = offset; |
1213 | struct dio *dio; | 1107 | struct dio *dio; |
1108 | struct dio_submit sdio = { 0, }; | ||
1109 | unsigned long user_addr; | ||
1110 | size_t bytes; | ||
1111 | struct buffer_head map_bh = { 0, }; | ||
1214 | 1112 | ||
1215 | if (rw & WRITE) | 1113 | if (rw & WRITE) |
1216 | rw = WRITE_ODIRECT; | 1114 | rw = WRITE_ODIRECT; |
@@ -1244,7 +1142,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1244 | if (rw == READ && end == offset) | 1142 | if (rw == READ && end == offset) |
1245 | return 0; | 1143 | return 0; |
1246 | 1144 | ||
1247 | dio = kmalloc(sizeof(*dio), GFP_KERNEL); | 1145 | dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); |
1248 | retval = -ENOMEM; | 1146 | retval = -ENOMEM; |
1249 | if (!dio) | 1147 | if (!dio) |
1250 | goto out; | 1148 | goto out; |
@@ -1268,7 +1166,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1268 | end - 1); | 1166 | end - 1); |
1269 | if (retval) { | 1167 | if (retval) { |
1270 | mutex_unlock(&inode->i_mutex); | 1168 | mutex_unlock(&inode->i_mutex); |
1271 | kfree(dio); | 1169 | kmem_cache_free(dio_cache, dio); |
1272 | goto out; | 1170 | goto out; |
1273 | } | 1171 | } |
1274 | } | 1172 | } |
@@ -1288,11 +1186,141 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1288 | dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && | 1186 | dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && |
1289 | (end > i_size_read(inode))); | 1187 | (end > i_size_read(inode))); |
1290 | 1188 | ||
1291 | retval = direct_io_worker(rw, iocb, inode, iov, offset, | 1189 | retval = 0; |
1292 | nr_segs, blkbits, get_block, end_io, | 1190 | |
1293 | submit_io, dio); | 1191 | dio->inode = inode; |
1192 | dio->rw = rw; | ||
1193 | sdio.blkbits = blkbits; | ||
1194 | sdio.blkfactor = inode->i_blkbits - blkbits; | ||
1195 | sdio.block_in_file = offset >> blkbits; | ||
1196 | |||
1197 | sdio.get_block = get_block; | ||
1198 | dio->end_io = end_io; | ||
1199 | sdio.submit_io = submit_io; | ||
1200 | sdio.final_block_in_bio = -1; | ||
1201 | sdio.next_block_for_io = -1; | ||
1202 | |||
1203 | dio->iocb = iocb; | ||
1204 | dio->i_size = i_size_read(inode); | ||
1205 | |||
1206 | spin_lock_init(&dio->bio_lock); | ||
1207 | dio->refcount = 1; | ||
1208 | |||
1209 | /* | ||
1210 | * In case of non-aligned buffers, we may need 2 more | ||
1211 | * pages since we need to zero out first and last block. | ||
1212 | */ | ||
1213 | if (unlikely(sdio.blkfactor)) | ||
1214 | sdio.pages_in_io = 2; | ||
1215 | |||
1216 | for (seg = 0; seg < nr_segs; seg++) { | ||
1217 | user_addr = (unsigned long)iov[seg].iov_base; | ||
1218 | sdio.pages_in_io += | ||
1219 | ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) / | ||
1220 | PAGE_SIZE - user_addr / PAGE_SIZE); | ||
1221 | } | ||
1222 | |||
1223 | for (seg = 0; seg < nr_segs; seg++) { | ||
1224 | user_addr = (unsigned long)iov[seg].iov_base; | ||
1225 | sdio.size += bytes = iov[seg].iov_len; | ||
1226 | |||
1227 | /* Index into the first page of the first block */ | ||
1228 | sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; | ||
1229 | sdio.final_block_in_request = sdio.block_in_file + | ||
1230 | (bytes >> blkbits); | ||
1231 | /* Page fetching state */ | ||
1232 | sdio.head = 0; | ||
1233 | sdio.tail = 0; | ||
1234 | sdio.curr_page = 0; | ||
1235 | |||
1236 | sdio.total_pages = 0; | ||
1237 | if (user_addr & (PAGE_SIZE-1)) { | ||
1238 | sdio.total_pages++; | ||
1239 | bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); | ||
1240 | } | ||
1241 | sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; | ||
1242 | sdio.curr_user_address = user_addr; | ||
1243 | |||
1244 | retval = do_direct_IO(dio, &sdio, &map_bh); | ||
1245 | |||
1246 | dio->result += iov[seg].iov_len - | ||
1247 | ((sdio.final_block_in_request - sdio.block_in_file) << | ||
1248 | blkbits); | ||
1249 | |||
1250 | if (retval) { | ||
1251 | dio_cleanup(dio, &sdio); | ||
1252 | break; | ||
1253 | } | ||
1254 | } /* end iovec loop */ | ||
1255 | |||
1256 | if (retval == -ENOTBLK) { | ||
1257 | /* | ||
1258 | * The remaining part of the request will be | ||
1259 | * be handled by buffered I/O when we return | ||
1260 | */ | ||
1261 | retval = 0; | ||
1262 | } | ||
1263 | /* | ||
1264 | * There may be some unwritten disk at the end of a part-written | ||
1265 | * fs-block-sized block. Go zero that now. | ||
1266 | */ | ||
1267 | dio_zero_block(dio, &sdio, 1, &map_bh); | ||
1268 | |||
1269 | if (sdio.cur_page) { | ||
1270 | ssize_t ret2; | ||
1271 | |||
1272 | ret2 = dio_send_cur_page(dio, &sdio, &map_bh); | ||
1273 | if (retval == 0) | ||
1274 | retval = ret2; | ||
1275 | page_cache_release(sdio.cur_page); | ||
1276 | sdio.cur_page = NULL; | ||
1277 | } | ||
1278 | if (sdio.bio) | ||
1279 | dio_bio_submit(dio, &sdio); | ||
1280 | |||
1281 | /* | ||
1282 | * It is possible that, we return short IO due to end of file. | ||
1283 | * In that case, we need to release all the pages we got hold on. | ||
1284 | */ | ||
1285 | dio_cleanup(dio, &sdio); | ||
1286 | |||
1287 | /* | ||
1288 | * All block lookups have been performed. For READ requests | ||
1289 | * we can let i_mutex go now that its achieved its purpose | ||
1290 | * of protecting us from looking up uninitialized blocks. | ||
1291 | */ | ||
1292 | if (rw == READ && (dio->flags & DIO_LOCKING)) | ||
1293 | mutex_unlock(&dio->inode->i_mutex); | ||
1294 | |||
1295 | /* | ||
1296 | * The only time we want to leave bios in flight is when a successful | ||
1297 | * partial aio read or full aio write have been setup. In that case | ||
1298 | * bio completion will call aio_complete. The only time it's safe to | ||
1299 | * call aio_complete is when we return -EIOCBQUEUED, so we key on that. | ||
1300 | * This had *better* be the only place that raises -EIOCBQUEUED. | ||
1301 | */ | ||
1302 | BUG_ON(retval == -EIOCBQUEUED); | ||
1303 | if (dio->is_async && retval == 0 && dio->result && | ||
1304 | ((rw & READ) || (dio->result == sdio.size))) | ||
1305 | retval = -EIOCBQUEUED; | ||
1306 | |||
1307 | if (retval != -EIOCBQUEUED) | ||
1308 | dio_await_completion(dio); | ||
1309 | |||
1310 | if (drop_refcount(dio) == 0) { | ||
1311 | retval = dio_complete(dio, offset, retval, false); | ||
1312 | kmem_cache_free(dio_cache, dio); | ||
1313 | } else | ||
1314 | BUG_ON(retval != -EIOCBQUEUED); | ||
1294 | 1315 | ||
1295 | out: | 1316 | out: |
1296 | return retval; | 1317 | return retval; |
1297 | } | 1318 | } |
1298 | EXPORT_SYMBOL(__blockdev_direct_IO); | 1319 | EXPORT_SYMBOL(__blockdev_direct_IO); |
1320 | |||
1321 | static __init int dio_init(void) | ||
1322 | { | ||
1323 | dio_cache = KMEM_CACHE(dio, SLAB_PANIC); | ||
1324 | return 0; | ||
1325 | } | ||
1326 | module_init(dio_init) | ||
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig index 1cd6d9d3e29a..cc16562654de 100644 --- a/fs/ecryptfs/Kconfig +++ b/fs/ecryptfs/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config ECRYPT_FS | 1 | config ECRYPT_FS |
2 | tristate "eCrypt filesystem layer support (EXPERIMENTAL)" | 2 | tristate "eCrypt filesystem layer support (EXPERIMENTAL)" |
3 | depends on EXPERIMENTAL && KEYS && CRYPTO | 3 | depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n) |
4 | select CRYPTO_ECB | 4 | select CRYPTO_ECB |
5 | select CRYPTO_CBC | 5 | select CRYPTO_CBC |
6 | select CRYPTO_MD5 | 6 | select CRYPTO_MD5 |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 08a2b52bf565..ac1ad48c2376 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -1973,7 +1973,7 @@ pki_encrypt_session_key(struct key *auth_tok_key, | |||
1973 | { | 1973 | { |
1974 | struct ecryptfs_msg_ctx *msg_ctx = NULL; | 1974 | struct ecryptfs_msg_ctx *msg_ctx = NULL; |
1975 | char *payload = NULL; | 1975 | char *payload = NULL; |
1976 | size_t payload_len; | 1976 | size_t payload_len = 0; |
1977 | struct ecryptfs_message *msg; | 1977 | struct ecryptfs_message *msg; |
1978 | int rc; | 1978 | int rc; |
1979 | 1979 | ||
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9f1bb747d77d..b4a6befb1216 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -175,6 +175,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, | |||
175 | ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, | 175 | ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, |
176 | ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, | 176 | ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, |
177 | ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only, | 177 | ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only, |
178 | ecryptfs_opt_check_dev_ruid, | ||
178 | ecryptfs_opt_err }; | 179 | ecryptfs_opt_err }; |
179 | 180 | ||
180 | static const match_table_t tokens = { | 181 | static const match_table_t tokens = { |
@@ -191,6 +192,7 @@ static const match_table_t tokens = { | |||
191 | {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, | 192 | {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, |
192 | {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, | 193 | {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, |
193 | {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"}, | 194 | {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"}, |
195 | {ecryptfs_opt_check_dev_ruid, "ecryptfs_check_dev_ruid"}, | ||
194 | {ecryptfs_opt_err, NULL} | 196 | {ecryptfs_opt_err, NULL} |
195 | }; | 197 | }; |
196 | 198 | ||
@@ -236,6 +238,7 @@ static void ecryptfs_init_mount_crypt_stat( | |||
236 | * ecryptfs_parse_options | 238 | * ecryptfs_parse_options |
237 | * @sb: The ecryptfs super block | 239 | * @sb: The ecryptfs super block |
238 | * @options: The options passed to the kernel | 240 | * @options: The options passed to the kernel |
241 | * @check_ruid: set to 1 if device uid should be checked against the ruid | ||
239 | * | 242 | * |
240 | * Parse mount options: | 243 | * Parse mount options: |
241 | * debug=N - ecryptfs_verbosity level for debug output | 244 | * debug=N - ecryptfs_verbosity level for debug output |
@@ -251,7 +254,8 @@ static void ecryptfs_init_mount_crypt_stat( | |||
251 | * | 254 | * |
252 | * Returns zero on success; non-zero on error | 255 | * Returns zero on success; non-zero on error |
253 | */ | 256 | */ |
254 | static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) | 257 | static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, |
258 | uid_t *check_ruid) | ||
255 | { | 259 | { |
256 | char *p; | 260 | char *p; |
257 | int rc = 0; | 261 | int rc = 0; |
@@ -276,6 +280,8 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) | |||
276 | char *cipher_key_bytes_src; | 280 | char *cipher_key_bytes_src; |
277 | char *fn_cipher_key_bytes_src; | 281 | char *fn_cipher_key_bytes_src; |
278 | 282 | ||
283 | *check_ruid = 0; | ||
284 | |||
279 | if (!options) { | 285 | if (!options) { |
280 | rc = -EINVAL; | 286 | rc = -EINVAL; |
281 | goto out; | 287 | goto out; |
@@ -380,6 +386,9 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) | |||
380 | mount_crypt_stat->flags |= | 386 | mount_crypt_stat->flags |= |
381 | ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY; | 387 | ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY; |
382 | break; | 388 | break; |
389 | case ecryptfs_opt_check_dev_ruid: | ||
390 | *check_ruid = 1; | ||
391 | break; | ||
383 | case ecryptfs_opt_err: | 392 | case ecryptfs_opt_err: |
384 | default: | 393 | default: |
385 | printk(KERN_WARNING | 394 | printk(KERN_WARNING |
@@ -475,6 +484,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
475 | const char *err = "Getting sb failed"; | 484 | const char *err = "Getting sb failed"; |
476 | struct inode *inode; | 485 | struct inode *inode; |
477 | struct path path; | 486 | struct path path; |
487 | uid_t check_ruid; | ||
478 | int rc; | 488 | int rc; |
479 | 489 | ||
480 | sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); | 490 | sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); |
@@ -483,7 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
483 | goto out; | 493 | goto out; |
484 | } | 494 | } |
485 | 495 | ||
486 | rc = ecryptfs_parse_options(sbi, raw_data); | 496 | rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid); |
487 | if (rc) { | 497 | if (rc) { |
488 | err = "Error parsing options"; | 498 | err = "Error parsing options"; |
489 | goto out; | 499 | goto out; |
@@ -521,6 +531,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
521 | "known incompatibilities\n"); | 531 | "known incompatibilities\n"); |
522 | goto out_free; | 532 | goto out_free; |
523 | } | 533 | } |
534 | |||
535 | if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) { | ||
536 | rc = -EPERM; | ||
537 | printk(KERN_ERR "Mount of device (uid: %d) not owned by " | ||
538 | "requested user (uid: %d)\n", | ||
539 | path.dentry->d_inode->i_uid, current_uid()); | ||
540 | goto out_free; | ||
541 | } | ||
542 | |||
524 | ecryptfs_set_superblock_lower(s, path.dentry->d_sb); | 543 | ecryptfs_set_superblock_lower(s, path.dentry->d_sb); |
525 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; | 544 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
526 | s->s_blocksize = path.dentry->d_sb->s_blocksize; | 545 | s->s_blocksize = path.dentry->d_sb->s_blocksize; |
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 85d430963116..3745f7c2b9c2 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c | |||
@@ -39,15 +39,16 @@ | |||
39 | int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, | 39 | int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, |
40 | loff_t offset, size_t size) | 40 | loff_t offset, size_t size) |
41 | { | 41 | { |
42 | struct ecryptfs_inode_info *inode_info; | 42 | struct file *lower_file; |
43 | mm_segment_t fs_save; | 43 | mm_segment_t fs_save; |
44 | ssize_t rc; | 44 | ssize_t rc; |
45 | 45 | ||
46 | inode_info = ecryptfs_inode_to_private(ecryptfs_inode); | 46 | lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; |
47 | BUG_ON(!inode_info->lower_file); | 47 | if (!lower_file) |
48 | return -EIO; | ||
48 | fs_save = get_fs(); | 49 | fs_save = get_fs(); |
49 | set_fs(get_ds()); | 50 | set_fs(get_ds()); |
50 | rc = vfs_write(inode_info->lower_file, data, size, &offset); | 51 | rc = vfs_write(lower_file, data, size, &offset); |
51 | set_fs(fs_save); | 52 | set_fs(fs_save); |
52 | mark_inode_dirty_sync(ecryptfs_inode); | 53 | mark_inode_dirty_sync(ecryptfs_inode); |
53 | return rc; | 54 | return rc; |
@@ -225,15 +226,16 @@ out: | |||
225 | int ecryptfs_read_lower(char *data, loff_t offset, size_t size, | 226 | int ecryptfs_read_lower(char *data, loff_t offset, size_t size, |
226 | struct inode *ecryptfs_inode) | 227 | struct inode *ecryptfs_inode) |
227 | { | 228 | { |
228 | struct ecryptfs_inode_info *inode_info = | 229 | struct file *lower_file; |
229 | ecryptfs_inode_to_private(ecryptfs_inode); | ||
230 | mm_segment_t fs_save; | 230 | mm_segment_t fs_save; |
231 | ssize_t rc; | 231 | ssize_t rc; |
232 | 232 | ||
233 | BUG_ON(!inode_info->lower_file); | 233 | lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; |
234 | if (!lower_file) | ||
235 | return -EIO; | ||
234 | fs_save = get_fs(); | 236 | fs_save = get_fs(); |
235 | set_fs(get_ds()); | 237 | set_fs(get_ds()); |
236 | rc = vfs_read(inode_info->lower_file, data, size, &offset); | 238 | rc = vfs_read(lower_file, data, size, &offset); |
237 | set_fs(fs_save); | 239 | set_fs(fs_save); |
238 | return rc; | 240 | return rc; |
239 | } | 241 | } |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index fe047d966dc5..9026fc91fe3b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -700,7 +700,7 @@ static const struct file_operations eventpoll_fops = { | |||
700 | .llseek = noop_llseek, | 700 | .llseek = noop_llseek, |
701 | }; | 701 | }; |
702 | 702 | ||
703 | /* Fast test to see if the file is an evenpoll file */ | 703 | /* Fast test to see if the file is an eventpoll file */ |
704 | static inline int is_file_epoll(struct file *f) | 704 | static inline int is_file_epoll(struct file *f) |
705 | { | 705 | { |
706 | return f->f_op == &eventpoll_fops; | 706 | return f->f_op == &eventpoll_fops; |
@@ -1459,6 +1459,23 @@ static int do_execve_common(const char *filename, | |||
1459 | struct files_struct *displaced; | 1459 | struct files_struct *displaced; |
1460 | bool clear_in_exec; | 1460 | bool clear_in_exec; |
1461 | int retval; | 1461 | int retval; |
1462 | const struct cred *cred = current_cred(); | ||
1463 | |||
1464 | /* | ||
1465 | * We move the actual failure in case of RLIMIT_NPROC excess from | ||
1466 | * set*uid() to execve() because too many poorly written programs | ||
1467 | * don't check setuid() return code. Here we additionally recheck | ||
1468 | * whether NPROC limit is still exceeded. | ||
1469 | */ | ||
1470 | if ((current->flags & PF_NPROC_EXCEEDED) && | ||
1471 | atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { | ||
1472 | retval = -EAGAIN; | ||
1473 | goto out_ret; | ||
1474 | } | ||
1475 | |||
1476 | /* We're below the limit (still or again), so we don't want to make | ||
1477 | * further execve() calls fail. */ | ||
1478 | current->flags &= ~PF_NPROC_EXCEEDED; | ||
1462 | 1479 | ||
1463 | retval = unshare_files(&displaced); | 1480 | retval = unshare_files(&displaced); |
1464 | if (retval) | 1481 | if (retval) |
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index c5a5855a6c44..352ba149d23e 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild | |||
@@ -13,7 +13,8 @@ | |||
13 | # | 13 | # |
14 | 14 | ||
15 | # ore module library | 15 | # ore module library |
16 | obj-$(CONFIG_ORE) += ore.o | 16 | libore-y := ore.o ore_raid.o |
17 | obj-$(CONFIG_ORE) += libore.o | ||
17 | 18 | ||
18 | exofs-y := inode.o file.o symlink.o namei.o dir.o super.o | 19 | exofs-y := inode.o file.o symlink.o namei.o dir.o super.o |
19 | obj-$(CONFIG_EXOFS_FS) += exofs.o | 20 | obj-$(CONFIG_EXOFS_FS) += exofs.o |
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index 70bae4149291..fa9a286c8771 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig | |||
@@ -1,10 +1,17 @@ | |||
1 | # Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects | ||
2 | # for every ORE user we do it like this. Any user should add itself here | ||
3 | # at the "depends on EXOFS_FS || ..." with an ||. The dependencies are | ||
4 | # selected here, and we default to "ON". So in effect it is like been | ||
5 | # selected by any of the users. | ||
1 | config ORE | 6 | config ORE |
2 | tristate | 7 | tristate |
8 | depends on EXOFS_FS | ||
9 | select ASYNC_XOR | ||
10 | default SCSI_OSD_ULD | ||
3 | 11 | ||
4 | config EXOFS_FS | 12 | config EXOFS_FS |
5 | tristate "exofs: OSD based file system support" | 13 | tristate "exofs: OSD based file system support" |
6 | depends on SCSI_OSD_ULD | 14 | depends on SCSI_OSD_ULD |
7 | select ORE | ||
8 | help | 15 | help |
9 | EXOFS is a file system that uses an OSD storage device, | 16 | EXOFS is a file system that uses an OSD storage device, |
10 | as its backing storage. | 17 | as its backing storage. |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index f4e442ec7445..51f4b4c40f09 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -53,6 +53,10 @@ | |||
53 | /* u64 has problems with printk this will cast it to unsigned long long */ | 53 | /* u64 has problems with printk this will cast it to unsigned long long */ |
54 | #define _LLU(x) (unsigned long long)(x) | 54 | #define _LLU(x) (unsigned long long)(x) |
55 | 55 | ||
56 | struct exofs_dev { | ||
57 | struct ore_dev ored; | ||
58 | unsigned did; | ||
59 | }; | ||
56 | /* | 60 | /* |
57 | * our extension to the in-memory superblock | 61 | * our extension to the in-memory superblock |
58 | */ | 62 | */ |
@@ -66,13 +70,9 @@ struct exofs_sb_info { | |||
66 | u32 s_next_generation; /* next gen # to use */ | 70 | u32 s_next_generation; /* next gen # to use */ |
67 | atomic_t s_curr_pending; /* number of pending commands */ | 71 | atomic_t s_curr_pending; /* number of pending commands */ |
68 | 72 | ||
69 | struct pnfs_osd_data_map data_map; /* Default raid to use | ||
70 | * FIXME: Needed ? | ||
71 | */ | ||
72 | struct ore_layout layout; /* Default files layout */ | 73 | struct ore_layout layout; /* Default files layout */ |
73 | struct ore_comp one_comp; /* id & cred of partition id=0*/ | 74 | struct ore_comp one_comp; /* id & cred of partition id=0*/ |
74 | struct ore_components comps; /* comps for the partition */ | 75 | struct ore_components oc; /* comps for the partition */ |
75 | struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ | ||
76 | }; | 76 | }; |
77 | 77 | ||
78 | /* | 78 | /* |
@@ -86,7 +86,7 @@ struct exofs_i_info { | |||
86 | uint32_t i_dir_start_lookup; /* which page to start lookup */ | 86 | uint32_t i_dir_start_lookup; /* which page to start lookup */ |
87 | uint64_t i_commit_size; /* the object's written length */ | 87 | uint64_t i_commit_size; /* the object's written length */ |
88 | struct ore_comp one_comp; /* same component for all devices */ | 88 | struct ore_comp one_comp; /* same component for all devices */ |
89 | struct ore_components comps; /* inode view of the device table */ | 89 | struct ore_components oc; /* inode view of the device table */ |
90 | }; | 90 | }; |
91 | 91 | ||
92 | static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) | 92 | static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) |
@@ -207,7 +207,7 @@ extern const struct inode_operations exofs_fast_symlink_inode_operations; | |||
207 | * bigger and that the device table repeats twice. | 207 | * bigger and that the device table repeats twice. |
208 | * See: exofs_read_lookup_dev_table() | 208 | * See: exofs_read_lookup_dev_table() |
209 | */ | 209 | */ |
210 | static inline void exofs_init_comps(struct ore_components *comps, | 210 | static inline void exofs_init_comps(struct ore_components *oc, |
211 | struct ore_comp *one_comp, | 211 | struct ore_comp *one_comp, |
212 | struct exofs_sb_info *sbi, osd_id oid) | 212 | struct exofs_sb_info *sbi, osd_id oid) |
213 | { | 213 | { |
@@ -217,13 +217,15 @@ static inline void exofs_init_comps(struct ore_components *comps, | |||
217 | one_comp->obj.id = oid; | 217 | one_comp->obj.id = oid; |
218 | exofs_make_credential(one_comp->cred, &one_comp->obj); | 218 | exofs_make_credential(one_comp->cred, &one_comp->obj); |
219 | 219 | ||
220 | comps->numdevs = sbi->comps.numdevs; | 220 | oc->first_dev = 0; |
221 | comps->single_comp = EC_SINGLE_COMP; | 221 | oc->numdevs = sbi->layout.group_width * sbi->layout.mirrors_p1 * |
222 | comps->comps = one_comp; | 222 | sbi->layout.group_count; |
223 | oc->single_comp = EC_SINGLE_COMP; | ||
224 | oc->comps = one_comp; | ||
223 | 225 | ||
224 | /* Round robin device view of the table */ | 226 | /* Round robin device view of the table */ |
225 | first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs; | 227 | first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs; |
226 | comps->ods = sbi->comps.ods + first_dev; | 228 | oc->ods = &sbi->oc.ods[first_dev]; |
227 | } | 229 | } |
228 | 230 | ||
229 | #endif | 231 | #endif |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index f39a38fc2349..3e5f3a6be90a 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -37,11 +37,7 @@ | |||
37 | 37 | ||
38 | #define EXOFS_DBGMSG2(M...) do {} while (0) | 38 | #define EXOFS_DBGMSG2(M...) do {} while (0) |
39 | 39 | ||
40 | enum { BIO_MAX_PAGES_KMALLOC = | 40 | enum {MAX_PAGES_KMALLOC = PAGE_SIZE / sizeof(struct page *), }; |
41 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
42 | MAX_PAGES_KMALLOC = | ||
43 | PAGE_SIZE / sizeof(struct page *), | ||
44 | }; | ||
45 | 41 | ||
46 | unsigned exofs_max_io_pages(struct ore_layout *layout, | 42 | unsigned exofs_max_io_pages(struct ore_layout *layout, |
47 | unsigned expected_pages) | 43 | unsigned expected_pages) |
@@ -49,8 +45,7 @@ unsigned exofs_max_io_pages(struct ore_layout *layout, | |||
49 | unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); | 45 | unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); |
50 | 46 | ||
51 | /* TODO: easily support bio chaining */ | 47 | /* TODO: easily support bio chaining */ |
52 | pages = min_t(unsigned, pages, | 48 | pages = min_t(unsigned, pages, layout->max_io_length / PAGE_SIZE); |
53 | layout->group_width * BIO_MAX_PAGES_KMALLOC); | ||
54 | return pages; | 49 | return pages; |
55 | } | 50 | } |
56 | 51 | ||
@@ -68,6 +63,7 @@ struct page_collect { | |||
68 | bool read_4_write; /* This means two things: that the read is sync | 63 | bool read_4_write; /* This means two things: that the read is sync |
69 | * And the pages should not be unlocked. | 64 | * And the pages should not be unlocked. |
70 | */ | 65 | */ |
66 | struct page *that_locked_page; | ||
71 | }; | 67 | }; |
72 | 68 | ||
73 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | 69 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
@@ -86,6 +82,7 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | |||
86 | pcol->length = 0; | 82 | pcol->length = 0; |
87 | pcol->pg_first = -1; | 83 | pcol->pg_first = -1; |
88 | pcol->read_4_write = false; | 84 | pcol->read_4_write = false; |
85 | pcol->that_locked_page = NULL; | ||
89 | } | 86 | } |
90 | 87 | ||
91 | static void _pcol_reset(struct page_collect *pcol) | 88 | static void _pcol_reset(struct page_collect *pcol) |
@@ -98,6 +95,7 @@ static void _pcol_reset(struct page_collect *pcol) | |||
98 | pcol->length = 0; | 95 | pcol->length = 0; |
99 | pcol->pg_first = -1; | 96 | pcol->pg_first = -1; |
100 | pcol->ios = NULL; | 97 | pcol->ios = NULL; |
98 | pcol->that_locked_page = NULL; | ||
101 | 99 | ||
102 | /* this is probably the end of the loop but in writes | 100 | /* this is probably the end of the loop but in writes |
103 | * it might not end here. don't be left with nothing | 101 | * it might not end here. don't be left with nothing |
@@ -149,14 +147,17 @@ static int pcol_add_page(struct page_collect *pcol, struct page *page, | |||
149 | return 0; | 147 | return 0; |
150 | } | 148 | } |
151 | 149 | ||
150 | enum {PAGE_WAS_NOT_IN_IO = 17}; | ||
152 | static int update_read_page(struct page *page, int ret) | 151 | static int update_read_page(struct page *page, int ret) |
153 | { | 152 | { |
154 | if (ret == 0) { | 153 | switch (ret) { |
154 | case 0: | ||
155 | /* Everything is OK */ | 155 | /* Everything is OK */ |
156 | SetPageUptodate(page); | 156 | SetPageUptodate(page); |
157 | if (PageError(page)) | 157 | if (PageError(page)) |
158 | ClearPageError(page); | 158 | ClearPageError(page); |
159 | } else if (ret == -EFAULT) { | 159 | break; |
160 | case -EFAULT: | ||
160 | /* In this case we were trying to read something that wasn't on | 161 | /* In this case we were trying to read something that wasn't on |
161 | * disk yet - return a page full of zeroes. This should be OK, | 162 | * disk yet - return a page full of zeroes. This should be OK, |
162 | * because the object should be empty (if there was a write | 163 | * because the object should be empty (if there was a write |
@@ -167,16 +168,22 @@ static int update_read_page(struct page *page, int ret) | |||
167 | SetPageUptodate(page); | 168 | SetPageUptodate(page); |
168 | if (PageError(page)) | 169 | if (PageError(page)) |
169 | ClearPageError(page); | 170 | ClearPageError(page); |
170 | ret = 0; /* recovered error */ | ||
171 | EXOFS_DBGMSG("recovered read error\n"); | 171 | EXOFS_DBGMSG("recovered read error\n"); |
172 | } else /* Error */ | 172 | /* fall through */ |
173 | case PAGE_WAS_NOT_IN_IO: | ||
174 | ret = 0; /* recovered error */ | ||
175 | break; | ||
176 | default: | ||
173 | SetPageError(page); | 177 | SetPageError(page); |
174 | 178 | } | |
175 | return ret; | 179 | return ret; |
176 | } | 180 | } |
177 | 181 | ||
178 | static void update_write_page(struct page *page, int ret) | 182 | static void update_write_page(struct page *page, int ret) |
179 | { | 183 | { |
184 | if (unlikely(ret == PAGE_WAS_NOT_IN_IO)) | ||
185 | return; /* don't pass start don't collect $200 */ | ||
186 | |||
180 | if (ret) { | 187 | if (ret) { |
181 | mapping_set_error(page->mapping, ret); | 188 | mapping_set_error(page->mapping, ret); |
182 | SetPageError(page); | 189 | SetPageError(page); |
@@ -190,15 +197,16 @@ static void update_write_page(struct page *page, int ret) | |||
190 | static int __readpages_done(struct page_collect *pcol) | 197 | static int __readpages_done(struct page_collect *pcol) |
191 | { | 198 | { |
192 | int i; | 199 | int i; |
193 | u64 resid; | ||
194 | u64 good_bytes; | 200 | u64 good_bytes; |
195 | u64 length = 0; | 201 | u64 length = 0; |
196 | int ret = ore_check_io(pcol->ios, &resid); | 202 | int ret = ore_check_io(pcol->ios, NULL); |
197 | 203 | ||
198 | if (likely(!ret)) | 204 | if (likely(!ret)) { |
199 | good_bytes = pcol->length; | 205 | good_bytes = pcol->length; |
200 | else | 206 | ret = PAGE_WAS_NOT_IN_IO; |
201 | good_bytes = pcol->length - resid; | 207 | } else { |
208 | good_bytes = 0; | ||
209 | } | ||
202 | 210 | ||
203 | EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx" | 211 | EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx" |
204 | " length=0x%lx nr_pages=%u\n", | 212 | " length=0x%lx nr_pages=%u\n", |
@@ -259,6 +267,46 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | |||
259 | } | 267 | } |
260 | } | 268 | } |
261 | 269 | ||
270 | static int _maybe_not_all_in_one_io(struct ore_io_state *ios, | ||
271 | struct page_collect *pcol_src, struct page_collect *pcol) | ||
272 | { | ||
273 | /* length was wrong or offset was not page aligned */ | ||
274 | BUG_ON(pcol_src->nr_pages < ios->nr_pages); | ||
275 | |||
276 | if (pcol_src->nr_pages > ios->nr_pages) { | ||
277 | struct page **src_page; | ||
278 | unsigned pages_less = pcol_src->nr_pages - ios->nr_pages; | ||
279 | unsigned long len_less = pcol_src->length - ios->length; | ||
280 | unsigned i; | ||
281 | int ret; | ||
282 | |||
283 | /* This IO was trimmed */ | ||
284 | pcol_src->nr_pages = ios->nr_pages; | ||
285 | pcol_src->length = ios->length; | ||
286 | |||
287 | /* Left over pages are passed to the next io */ | ||
288 | pcol->expected_pages += pages_less; | ||
289 | pcol->nr_pages = pages_less; | ||
290 | pcol->length = len_less; | ||
291 | src_page = pcol_src->pages + pcol_src->nr_pages; | ||
292 | pcol->pg_first = (*src_page)->index; | ||
293 | |||
294 | ret = pcol_try_alloc(pcol); | ||
295 | if (unlikely(ret)) | ||
296 | return ret; | ||
297 | |||
298 | for (i = 0; i < pages_less; ++i) | ||
299 | pcol->pages[i] = *src_page++; | ||
300 | |||
301 | EXOFS_DBGMSG("Length was adjusted nr_pages=0x%x " | ||
302 | "pages_less=0x%x expected_pages=0x%x " | ||
303 | "next_offset=0x%llx next_len=0x%lx\n", | ||
304 | pcol_src->nr_pages, pages_less, pcol->expected_pages, | ||
305 | pcol->pg_first * PAGE_SIZE, pcol->length); | ||
306 | } | ||
307 | return 0; | ||
308 | } | ||
309 | |||
262 | static int read_exec(struct page_collect *pcol) | 310 | static int read_exec(struct page_collect *pcol) |
263 | { | 311 | { |
264 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 312 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
@@ -270,7 +318,7 @@ static int read_exec(struct page_collect *pcol) | |||
270 | return 0; | 318 | return 0; |
271 | 319 | ||
272 | if (!pcol->ios) { | 320 | if (!pcol->ios) { |
273 | int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, true, | 321 | int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true, |
274 | pcol->pg_first << PAGE_CACHE_SHIFT, | 322 | pcol->pg_first << PAGE_CACHE_SHIFT, |
275 | pcol->length, &pcol->ios); | 323 | pcol->length, &pcol->ios); |
276 | 324 | ||
@@ -280,7 +328,6 @@ static int read_exec(struct page_collect *pcol) | |||
280 | 328 | ||
281 | ios = pcol->ios; | 329 | ios = pcol->ios; |
282 | ios->pages = pcol->pages; | 330 | ios->pages = pcol->pages; |
283 | ios->nr_pages = pcol->nr_pages; | ||
284 | 331 | ||
285 | if (pcol->read_4_write) { | 332 | if (pcol->read_4_write) { |
286 | ore_read(pcol->ios); | 333 | ore_read(pcol->ios); |
@@ -296,17 +343,23 @@ static int read_exec(struct page_collect *pcol) | |||
296 | *pcol_copy = *pcol; | 343 | *pcol_copy = *pcol; |
297 | ios->done = readpages_done; | 344 | ios->done = readpages_done; |
298 | ios->private = pcol_copy; | 345 | ios->private = pcol_copy; |
346 | |||
347 | /* pages ownership was passed to pcol_copy */ | ||
348 | _pcol_reset(pcol); | ||
349 | |||
350 | ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); | ||
351 | if (unlikely(ret)) | ||
352 | goto err; | ||
353 | |||
354 | EXOFS_DBGMSG2("read_exec(0x%lx) offset=0x%llx length=0x%llx\n", | ||
355 | pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); | ||
356 | |||
299 | ret = ore_read(ios); | 357 | ret = ore_read(ios); |
300 | if (unlikely(ret)) | 358 | if (unlikely(ret)) |
301 | goto err; | 359 | goto err; |
302 | 360 | ||
303 | atomic_inc(&pcol->sbi->s_curr_pending); | 361 | atomic_inc(&pcol->sbi->s_curr_pending); |
304 | 362 | ||
305 | EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | ||
306 | oi->one_comp.obj.id, _LLU(ios->offset), pcol->length); | ||
307 | |||
308 | /* pages ownership was passed to pcol_copy */ | ||
309 | _pcol_reset(pcol); | ||
310 | return 0; | 363 | return 0; |
311 | 364 | ||
312 | err: | 365 | err: |
@@ -341,6 +394,8 @@ static int readpage_strip(void *data, struct page *page) | |||
341 | EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, | 394 | EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, |
342 | page->index); | 395 | page->index); |
343 | 396 | ||
397 | pcol->that_locked_page = page; | ||
398 | |||
344 | if (page->index < end_index) | 399 | if (page->index < end_index) |
345 | len = PAGE_CACHE_SIZE; | 400 | len = PAGE_CACHE_SIZE; |
346 | else if (page->index == end_index) | 401 | else if (page->index == end_index) |
@@ -429,6 +484,10 @@ static int exofs_readpages(struct file *file, struct address_space *mapping, | |||
429 | return ret; | 484 | return ret; |
430 | } | 485 | } |
431 | 486 | ||
487 | ret = read_exec(&pcol); | ||
488 | if (unlikely(ret)) | ||
489 | return ret; | ||
490 | |||
432 | return read_exec(&pcol); | 491 | return read_exec(&pcol); |
433 | } | 492 | } |
434 | 493 | ||
@@ -462,17 +521,18 @@ static void writepages_done(struct ore_io_state *ios, void *p) | |||
462 | { | 521 | { |
463 | struct page_collect *pcol = p; | 522 | struct page_collect *pcol = p; |
464 | int i; | 523 | int i; |
465 | u64 resid; | ||
466 | u64 good_bytes; | 524 | u64 good_bytes; |
467 | u64 length = 0; | 525 | u64 length = 0; |
468 | int ret = ore_check_io(ios, &resid); | 526 | int ret = ore_check_io(ios, NULL); |
469 | 527 | ||
470 | atomic_dec(&pcol->sbi->s_curr_pending); | 528 | atomic_dec(&pcol->sbi->s_curr_pending); |
471 | 529 | ||
472 | if (likely(!ret)) | 530 | if (likely(!ret)) { |
473 | good_bytes = pcol->length; | 531 | good_bytes = pcol->length; |
474 | else | 532 | ret = PAGE_WAS_NOT_IN_IO; |
475 | good_bytes = pcol->length - resid; | 533 | } else { |
534 | good_bytes = 0; | ||
535 | } | ||
476 | 536 | ||
477 | EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx" | 537 | EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx" |
478 | " length=0x%lx nr_pages=%u\n", | 538 | " length=0x%lx nr_pages=%u\n", |
@@ -505,6 +565,56 @@ static void writepages_done(struct ore_io_state *ios, void *p) | |||
505 | EXOFS_DBGMSG2("writepages_done END\n"); | 565 | EXOFS_DBGMSG2("writepages_done END\n"); |
506 | } | 566 | } |
507 | 567 | ||
568 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) | ||
569 | { | ||
570 | struct page_collect *pcol = priv; | ||
571 | pgoff_t index = offset / PAGE_SIZE; | ||
572 | |||
573 | if (!pcol->that_locked_page || | ||
574 | (pcol->that_locked_page->index != index)) { | ||
575 | struct page *page = find_get_page(pcol->inode->i_mapping, index); | ||
576 | |||
577 | if (!page) { | ||
578 | page = find_or_create_page(pcol->inode->i_mapping, | ||
579 | index, GFP_NOFS); | ||
580 | if (unlikely(!page)) { | ||
581 | EXOFS_DBGMSG("grab_cache_page Failed " | ||
582 | "index=0x%llx\n", _LLU(index)); | ||
583 | return NULL; | ||
584 | } | ||
585 | unlock_page(page); | ||
586 | } | ||
587 | if (PageDirty(page) || PageWriteback(page)) | ||
588 | *uptodate = true; | ||
589 | else | ||
590 | *uptodate = PageUptodate(page); | ||
591 | EXOFS_DBGMSG("index=0x%lx uptodate=%d\n", index, *uptodate); | ||
592 | return page; | ||
593 | } else { | ||
594 | EXOFS_DBGMSG("YES that_locked_page index=0x%lx\n", | ||
595 | pcol->that_locked_page->index); | ||
596 | *uptodate = true; | ||
597 | return pcol->that_locked_page; | ||
598 | } | ||
599 | } | ||
600 | |||
601 | static void __r4w_put_page(void *priv, struct page *page) | ||
602 | { | ||
603 | struct page_collect *pcol = priv; | ||
604 | |||
605 | if (pcol->that_locked_page != page) { | ||
606 | EXOFS_DBGMSG("index=0x%lx\n", page->index); | ||
607 | page_cache_release(page); | ||
608 | return; | ||
609 | } | ||
610 | EXOFS_DBGMSG("that_locked_page index=0x%lx\n", page->index); | ||
611 | } | ||
612 | |||
613 | static const struct _ore_r4w_op _r4w_op = { | ||
614 | .get_page = &__r4w_get_page, | ||
615 | .put_page = &__r4w_put_page, | ||
616 | }; | ||
617 | |||
508 | static int write_exec(struct page_collect *pcol) | 618 | static int write_exec(struct page_collect *pcol) |
509 | { | 619 | { |
510 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 620 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
@@ -516,10 +626,9 @@ static int write_exec(struct page_collect *pcol) | |||
516 | return 0; | 626 | return 0; |
517 | 627 | ||
518 | BUG_ON(pcol->ios); | 628 | BUG_ON(pcol->ios); |
519 | ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, false, | 629 | ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false, |
520 | pcol->pg_first << PAGE_CACHE_SHIFT, | 630 | pcol->pg_first << PAGE_CACHE_SHIFT, |
521 | pcol->length, &pcol->ios); | 631 | pcol->length, &pcol->ios); |
522 | |||
523 | if (unlikely(ret)) | 632 | if (unlikely(ret)) |
524 | goto err; | 633 | goto err; |
525 | 634 | ||
@@ -534,10 +643,20 @@ static int write_exec(struct page_collect *pcol) | |||
534 | 643 | ||
535 | ios = pcol->ios; | 644 | ios = pcol->ios; |
536 | ios->pages = pcol_copy->pages; | 645 | ios->pages = pcol_copy->pages; |
537 | ios->nr_pages = pcol_copy->nr_pages; | ||
538 | ios->done = writepages_done; | 646 | ios->done = writepages_done; |
647 | ios->r4w = &_r4w_op; | ||
539 | ios->private = pcol_copy; | 648 | ios->private = pcol_copy; |
540 | 649 | ||
650 | /* pages ownership was passed to pcol_copy */ | ||
651 | _pcol_reset(pcol); | ||
652 | |||
653 | ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol); | ||
654 | if (unlikely(ret)) | ||
655 | goto err; | ||
656 | |||
657 | EXOFS_DBGMSG2("write_exec(0x%lx) offset=0x%llx length=0x%llx\n", | ||
658 | pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length)); | ||
659 | |||
541 | ret = ore_write(ios); | 660 | ret = ore_write(ios); |
542 | if (unlikely(ret)) { | 661 | if (unlikely(ret)) { |
543 | EXOFS_ERR("write_exec: ore_write() Failed\n"); | 662 | EXOFS_ERR("write_exec: ore_write() Failed\n"); |
@@ -545,11 +664,6 @@ static int write_exec(struct page_collect *pcol) | |||
545 | } | 664 | } |
546 | 665 | ||
547 | atomic_inc(&pcol->sbi->s_curr_pending); | 666 | atomic_inc(&pcol->sbi->s_curr_pending); |
548 | EXOFS_DBGMSG2("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | ||
549 | pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset), | ||
550 | pcol->length); | ||
551 | /* pages ownership was passed to pcol_copy */ | ||
552 | _pcol_reset(pcol); | ||
553 | return 0; | 667 | return 0; |
554 | 668 | ||
555 | err: | 669 | err: |
@@ -689,14 +803,33 @@ static int exofs_writepages(struct address_space *mapping, | |||
689 | _pcol_init(&pcol, expected_pages, mapping->host); | 803 | _pcol_init(&pcol, expected_pages, mapping->host); |
690 | 804 | ||
691 | ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); | 805 | ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); |
692 | if (ret) { | 806 | if (unlikely(ret)) { |
693 | EXOFS_ERR("write_cache_pages => %d\n", ret); | 807 | EXOFS_ERR("write_cache_pages => %d\n", ret); |
694 | return ret; | 808 | return ret; |
695 | } | 809 | } |
696 | 810 | ||
697 | return write_exec(&pcol); | 811 | ret = write_exec(&pcol); |
812 | if (unlikely(ret)) | ||
813 | return ret; | ||
814 | |||
815 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
816 | return write_exec(&pcol); /* pump the last reminder */ | ||
817 | } else if (pcol.nr_pages) { | ||
818 | /* not SYNC let the reminder join the next writeout */ | ||
819 | unsigned i; | ||
820 | |||
821 | for (i = 0; i < pcol.nr_pages; i++) { | ||
822 | struct page *page = pcol.pages[i]; | ||
823 | |||
824 | end_page_writeback(page); | ||
825 | set_page_dirty(page); | ||
826 | unlock_page(page); | ||
827 | } | ||
828 | } | ||
829 | return 0; | ||
698 | } | 830 | } |
699 | 831 | ||
832 | /* | ||
700 | static int exofs_writepage(struct page *page, struct writeback_control *wbc) | 833 | static int exofs_writepage(struct page *page, struct writeback_control *wbc) |
701 | { | 834 | { |
702 | struct page_collect pcol; | 835 | struct page_collect pcol; |
@@ -712,7 +845,7 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc) | |||
712 | 845 | ||
713 | return write_exec(&pcol); | 846 | return write_exec(&pcol); |
714 | } | 847 | } |
715 | 848 | */ | |
716 | /* i_mutex held using inode->i_size directly */ | 849 | /* i_mutex held using inode->i_size directly */ |
717 | static void _write_failed(struct inode *inode, loff_t to) | 850 | static void _write_failed(struct inode *inode, loff_t to) |
718 | { | 851 | { |
@@ -818,7 +951,7 @@ static void exofs_invalidatepage(struct page *page, unsigned long offset) | |||
818 | const struct address_space_operations exofs_aops = { | 951 | const struct address_space_operations exofs_aops = { |
819 | .readpage = exofs_readpage, | 952 | .readpage = exofs_readpage, |
820 | .readpages = exofs_readpages, | 953 | .readpages = exofs_readpages, |
821 | .writepage = exofs_writepage, | 954 | .writepage = NULL, |
822 | .writepages = exofs_writepages, | 955 | .writepages = exofs_writepages, |
823 | .write_begin = exofs_write_begin_export, | 956 | .write_begin = exofs_write_begin_export, |
824 | .write_end = exofs_write_end, | 957 | .write_end = exofs_write_end, |
@@ -860,7 +993,7 @@ static int _do_truncate(struct inode *inode, loff_t newsize) | |||
860 | 993 | ||
861 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 994 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
862 | 995 | ||
863 | ret = ore_truncate(&sbi->layout, &oi->comps, (u64)newsize); | 996 | ret = ore_truncate(&sbi->layout, &oi->oc, (u64)newsize); |
864 | if (likely(!ret)) | 997 | if (likely(!ret)) |
865 | truncate_setsize(inode, newsize); | 998 | truncate_setsize(inode, newsize); |
866 | 999 | ||
@@ -927,14 +1060,14 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | |||
927 | struct exofs_on_disk_inode_layout *layout; | 1060 | struct exofs_on_disk_inode_layout *layout; |
928 | int ret; | 1061 | int ret; |
929 | 1062 | ||
930 | ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); | 1063 | ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); |
931 | if (unlikely(ret)) { | 1064 | if (unlikely(ret)) { |
932 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); | 1065 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); |
933 | return ret; | 1066 | return ret; |
934 | } | 1067 | } |
935 | 1068 | ||
936 | attrs[1].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); | 1069 | attrs[1].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); |
937 | attrs[2].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); | 1070 | attrs[2].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs); |
938 | 1071 | ||
939 | ios->in_attr = attrs; | 1072 | ios->in_attr = attrs; |
940 | ios->in_attr_len = ARRAY_SIZE(attrs); | 1073 | ios->in_attr_len = ARRAY_SIZE(attrs); |
@@ -1018,7 +1151,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
1018 | return inode; | 1151 | return inode; |
1019 | oi = exofs_i(inode); | 1152 | oi = exofs_i(inode); |
1020 | __oi_init(oi); | 1153 | __oi_init(oi); |
1021 | exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, | 1154 | exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, |
1022 | exofs_oi_objno(oi)); | 1155 | exofs_oi_objno(oi)); |
1023 | 1156 | ||
1024 | /* read the inode from the osd */ | 1157 | /* read the inode from the osd */ |
@@ -1172,13 +1305,13 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1172 | spin_unlock(&sbi->s_next_gen_lock); | 1305 | spin_unlock(&sbi->s_next_gen_lock); |
1173 | insert_inode_hash(inode); | 1306 | insert_inode_hash(inode); |
1174 | 1307 | ||
1175 | exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, | 1308 | exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info, |
1176 | exofs_oi_objno(oi)); | 1309 | exofs_oi_objno(oi)); |
1177 | exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ | 1310 | exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ |
1178 | 1311 | ||
1179 | mark_inode_dirty(inode); | 1312 | mark_inode_dirty(inode); |
1180 | 1313 | ||
1181 | ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); | 1314 | ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); |
1182 | if (unlikely(ret)) { | 1315 | if (unlikely(ret)) { |
1183 | EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); | 1316 | EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); |
1184 | return ERR_PTR(ret); | 1317 | return ERR_PTR(ret); |
@@ -1267,7 +1400,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1267 | } else | 1400 | } else |
1268 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); | 1401 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); |
1269 | 1402 | ||
1270 | ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); | 1403 | ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); |
1271 | if (unlikely(ret)) { | 1404 | if (unlikely(ret)) { |
1272 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); | 1405 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); |
1273 | goto free_args; | 1406 | goto free_args; |
@@ -1350,7 +1483,7 @@ void exofs_evict_inode(struct inode *inode) | |||
1350 | /* ignore the error, attempt a remove anyway */ | 1483 | /* ignore the error, attempt a remove anyway */ |
1351 | 1484 | ||
1352 | /* Now Remove the OSD objects */ | 1485 | /* Now Remove the OSD objects */ |
1353 | ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); | 1486 | ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios); |
1354 | if (unlikely(ret)) { | 1487 | if (unlikely(ret)) { |
1355 | EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); | 1488 | EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); |
1356 | return; | 1489 | return; |
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 25305af88198..fcfa86ae6faf 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
@@ -24,76 +24,287 @@ | |||
24 | 24 | ||
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
27 | #include <linux/lcm.h> | ||
27 | 28 | ||
28 | #include <scsi/osd_ore.h> | 29 | #include "ore_raid.h" |
29 | 30 | ||
30 | #define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a) | 31 | MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); |
32 | MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); | ||
33 | MODULE_LICENSE("GPL"); | ||
34 | |||
35 | /* ore_verify_layout does a couple of things: | ||
36 | * 1. Given a minimum number of needed parameters fixes up the rest of the | ||
37 | * members to be operatonals for the ore. The needed parameters are those | ||
38 | * that are defined by the pnfs-objects layout STD. | ||
39 | * 2. Check to see if the current ore code actually supports these parameters | ||
40 | * for example stripe_unit must be a multple of the system PAGE_SIZE, | ||
41 | * and etc... | ||
42 | * 3. Cache some havily used calculations that will be needed by users. | ||
43 | */ | ||
44 | |||
45 | enum { BIO_MAX_PAGES_KMALLOC = | ||
46 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),}; | ||
31 | 47 | ||
32 | #ifdef CONFIG_EXOFS_DEBUG | 48 | int ore_verify_layout(unsigned total_comps, struct ore_layout *layout) |
33 | #define ORE_DBGMSG(fmt, a...) \ | 49 | { |
34 | printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a) | 50 | u64 stripe_length; |
35 | #else | 51 | |
36 | #define ORE_DBGMSG(fmt, a...) \ | 52 | switch (layout->raid_algorithm) { |
37 | do { if (0) printk(fmt, ##a); } while (0) | 53 | case PNFS_OSD_RAID_0: |
38 | #endif | 54 | layout->parity = 0; |
55 | break; | ||
56 | case PNFS_OSD_RAID_5: | ||
57 | layout->parity = 1; | ||
58 | break; | ||
59 | case PNFS_OSD_RAID_PQ: | ||
60 | case PNFS_OSD_RAID_4: | ||
61 | default: | ||
62 | ORE_ERR("Only RAID_0/5 for now\n"); | ||
63 | return -EINVAL; | ||
64 | } | ||
65 | if (0 != (layout->stripe_unit & ~PAGE_MASK)) { | ||
66 | ORE_ERR("Stripe Unit(0x%llx)" | ||
67 | " must be Multples of PAGE_SIZE(0x%lx)\n", | ||
68 | _LLU(layout->stripe_unit), PAGE_SIZE); | ||
69 | return -EINVAL; | ||
70 | } | ||
71 | if (layout->group_width) { | ||
72 | if (!layout->group_depth) { | ||
73 | ORE_ERR("group_depth == 0 && group_width != 0\n"); | ||
74 | return -EINVAL; | ||
75 | } | ||
76 | if (total_comps < (layout->group_width * layout->mirrors_p1)) { | ||
77 | ORE_ERR("Data Map wrong, " | ||
78 | "numdevs=%d < group_width=%d * mirrors=%d\n", | ||
79 | total_comps, layout->group_width, | ||
80 | layout->mirrors_p1); | ||
81 | return -EINVAL; | ||
82 | } | ||
83 | layout->group_count = total_comps / layout->mirrors_p1 / | ||
84 | layout->group_width; | ||
85 | } else { | ||
86 | if (layout->group_depth) { | ||
87 | printk(KERN_NOTICE "Warning: group_depth ignored " | ||
88 | "group_width == 0 && group_depth == %lld\n", | ||
89 | _LLU(layout->group_depth)); | ||
90 | } | ||
91 | layout->group_width = total_comps / layout->mirrors_p1; | ||
92 | layout->group_depth = -1; | ||
93 | layout->group_count = 1; | ||
94 | } | ||
39 | 95 | ||
40 | /* u64 has problems with printk this will cast it to unsigned long long */ | 96 | stripe_length = (u64)layout->group_width * layout->stripe_unit; |
41 | #define _LLU(x) (unsigned long long)(x) | 97 | if (stripe_length >= (1ULL << 32)) { |
98 | ORE_ERR("Stripe_length(0x%llx) >= 32bit is not supported\n", | ||
99 | _LLU(stripe_length)); | ||
100 | return -EINVAL; | ||
101 | } | ||
42 | 102 | ||
43 | #define ORE_DBGMSG2(M...) do {} while (0) | 103 | layout->max_io_length = |
44 | /* #define ORE_DBGMSG2 ORE_DBGMSG */ | 104 | (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) * |
105 | layout->group_width; | ||
106 | if (layout->parity) { | ||
107 | unsigned stripe_length = | ||
108 | (layout->group_width - layout->parity) * | ||
109 | layout->stripe_unit; | ||
45 | 110 | ||
46 | MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); | 111 | layout->max_io_length /= stripe_length; |
47 | MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); | 112 | layout->max_io_length *= stripe_length; |
48 | MODULE_LICENSE("GPL"); | 113 | } |
114 | return 0; | ||
115 | } | ||
116 | EXPORT_SYMBOL(ore_verify_layout); | ||
49 | 117 | ||
50 | static u8 *_ios_cred(struct ore_io_state *ios, unsigned index) | 118 | static u8 *_ios_cred(struct ore_io_state *ios, unsigned index) |
51 | { | 119 | { |
52 | return ios->comps->comps[index & ios->comps->single_comp].cred; | 120 | return ios->oc->comps[index & ios->oc->single_comp].cred; |
53 | } | 121 | } |
54 | 122 | ||
55 | static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index) | 123 | static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index) |
56 | { | 124 | { |
57 | return &ios->comps->comps[index & ios->comps->single_comp].obj; | 125 | return &ios->oc->comps[index & ios->oc->single_comp].obj; |
58 | } | 126 | } |
59 | 127 | ||
60 | static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) | 128 | static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) |
61 | { | 129 | { |
62 | return ios->comps->ods[index]; | 130 | ORE_DBGMSG2("oc->first_dev=%d oc->numdevs=%d i=%d oc->ods=%p\n", |
131 | ios->oc->first_dev, ios->oc->numdevs, index, | ||
132 | ios->oc->ods); | ||
133 | |||
134 | return ore_comp_dev(ios->oc, index); | ||
63 | } | 135 | } |
64 | 136 | ||
65 | int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps, | 137 | int _ore_get_io_state(struct ore_layout *layout, |
138 | struct ore_components *oc, unsigned numdevs, | ||
139 | unsigned sgs_per_dev, unsigned num_par_pages, | ||
140 | struct ore_io_state **pios) | ||
141 | { | ||
142 | struct ore_io_state *ios; | ||
143 | struct page **pages; | ||
144 | struct osd_sg_entry *sgilist; | ||
145 | struct __alloc_all_io_state { | ||
146 | struct ore_io_state ios; | ||
147 | struct ore_per_dev_state per_dev[numdevs]; | ||
148 | union { | ||
149 | struct osd_sg_entry sglist[sgs_per_dev * numdevs]; | ||
150 | struct page *pages[num_par_pages]; | ||
151 | }; | ||
152 | } *_aios; | ||
153 | |||
154 | if (likely(sizeof(*_aios) <= PAGE_SIZE)) { | ||
155 | _aios = kzalloc(sizeof(*_aios), GFP_KERNEL); | ||
156 | if (unlikely(!_aios)) { | ||
157 | ORE_DBGMSG("Failed kzalloc bytes=%zd\n", | ||
158 | sizeof(*_aios)); | ||
159 | *pios = NULL; | ||
160 | return -ENOMEM; | ||
161 | } | ||
162 | pages = num_par_pages ? _aios->pages : NULL; | ||
163 | sgilist = sgs_per_dev ? _aios->sglist : NULL; | ||
164 | ios = &_aios->ios; | ||
165 | } else { | ||
166 | struct __alloc_small_io_state { | ||
167 | struct ore_io_state ios; | ||
168 | struct ore_per_dev_state per_dev[numdevs]; | ||
169 | } *_aio_small; | ||
170 | union __extra_part { | ||
171 | struct osd_sg_entry sglist[sgs_per_dev * numdevs]; | ||
172 | struct page *pages[num_par_pages]; | ||
173 | } *extra_part; | ||
174 | |||
175 | _aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL); | ||
176 | if (unlikely(!_aio_small)) { | ||
177 | ORE_DBGMSG("Failed alloc first part bytes=%zd\n", | ||
178 | sizeof(*_aio_small)); | ||
179 | *pios = NULL; | ||
180 | return -ENOMEM; | ||
181 | } | ||
182 | extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL); | ||
183 | if (unlikely(!extra_part)) { | ||
184 | ORE_DBGMSG("Failed alloc second part bytes=%zd\n", | ||
185 | sizeof(*extra_part)); | ||
186 | kfree(_aio_small); | ||
187 | *pios = NULL; | ||
188 | return -ENOMEM; | ||
189 | } | ||
190 | |||
191 | pages = num_par_pages ? extra_part->pages : NULL; | ||
192 | sgilist = sgs_per_dev ? extra_part->sglist : NULL; | ||
193 | /* In this case the per_dev[0].sgilist holds the pointer to | ||
194 | * be freed | ||
195 | */ | ||
196 | ios = &_aio_small->ios; | ||
197 | ios->extra_part_alloc = true; | ||
198 | } | ||
199 | |||
200 | if (pages) { | ||
201 | ios->parity_pages = pages; | ||
202 | ios->max_par_pages = num_par_pages; | ||
203 | } | ||
204 | if (sgilist) { | ||
205 | unsigned d; | ||
206 | |||
207 | for (d = 0; d < numdevs; ++d) { | ||
208 | ios->per_dev[d].sglist = sgilist; | ||
209 | sgilist += sgs_per_dev; | ||
210 | } | ||
211 | ios->sgs_per_dev = sgs_per_dev; | ||
212 | } | ||
213 | |||
214 | ios->layout = layout; | ||
215 | ios->oc = oc; | ||
216 | *pios = ios; | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | /* Allocate an io_state for only a single group of devices | ||
221 | * | ||
222 | * If a user needs to call ore_read/write() this version must be used becase it | ||
223 | * allocates extra stuff for striping and raid. | ||
224 | * The ore might decide to only IO less then @length bytes do to alignmets | ||
225 | * and constrains as follows: | ||
226 | * - The IO cannot cross group boundary. | ||
227 | * - In raid5/6 The end of the IO must align at end of a stripe eg. | ||
228 | * (@offset + @length) % strip_size == 0. Or the complete range is within a | ||
229 | * single stripe. | ||
230 | * - Memory condition only permitted a shorter IO. (A user can use @length=~0 | ||
231 | * And check the returned ios->length for max_io_size.) | ||
232 | * | ||
233 | * The caller must check returned ios->length (and/or ios->nr_pages) and | ||
234 | * re-issue these pages that fall outside of ios->length | ||
235 | */ | ||
236 | int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, | ||
66 | bool is_reading, u64 offset, u64 length, | 237 | bool is_reading, u64 offset, u64 length, |
67 | struct ore_io_state **pios) | 238 | struct ore_io_state **pios) |
68 | { | 239 | { |
69 | struct ore_io_state *ios; | 240 | struct ore_io_state *ios; |
241 | unsigned numdevs = layout->group_width * layout->mirrors_p1; | ||
242 | unsigned sgs_per_dev = 0, max_par_pages = 0; | ||
243 | int ret; | ||
70 | 244 | ||
71 | /*TODO: Maybe use kmem_cach per sbi of size | 245 | if (layout->parity && length) { |
72 | * exofs_io_state_size(layout->s_numdevs) | 246 | unsigned data_devs = layout->group_width - layout->parity; |
73 | */ | 247 | unsigned stripe_size = layout->stripe_unit * data_devs; |
74 | ios = kzalloc(ore_io_state_size(comps->numdevs), GFP_KERNEL); | 248 | unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; |
75 | if (unlikely(!ios)) { | 249 | u32 remainder; |
76 | ORE_DBGMSG("Failed kzalloc bytes=%d\n", | 250 | u64 num_stripes; |
77 | ore_io_state_size(comps->numdevs)); | 251 | u64 num_raid_units; |
78 | *pios = NULL; | 252 | |
79 | return -ENOMEM; | 253 | num_stripes = div_u64_rem(length, stripe_size, &remainder); |
254 | if (remainder) | ||
255 | ++num_stripes; | ||
256 | |||
257 | num_raid_units = num_stripes * layout->parity; | ||
258 | |||
259 | if (is_reading) { | ||
260 | /* For reads add per_dev sglist array */ | ||
261 | /* TODO: Raid 6 we need twice more. Actually: | ||
262 | * num_stripes / LCMdP(W,P); | ||
263 | * if (W%P != 0) num_stripes *= parity; | ||
264 | */ | ||
265 | |||
266 | /* first/last seg is split */ | ||
267 | num_raid_units += layout->group_width; | ||
268 | sgs_per_dev = div_u64(num_raid_units, data_devs); | ||
269 | } else { | ||
270 | /* For Writes add parity pages array. */ | ||
271 | max_par_pages = num_raid_units * pages_in_unit * | ||
272 | sizeof(struct page *); | ||
273 | } | ||
80 | } | 274 | } |
81 | 275 | ||
82 | ios->layout = layout; | 276 | ret = _ore_get_io_state(layout, oc, numdevs, sgs_per_dev, max_par_pages, |
83 | ios->comps = comps; | 277 | pios); |
84 | ios->offset = offset; | 278 | if (unlikely(ret)) |
85 | ios->length = length; | 279 | return ret; |
280 | |||
281 | ios = *pios; | ||
86 | ios->reading = is_reading; | 282 | ios->reading = is_reading; |
283 | ios->offset = offset; | ||
284 | |||
285 | if (length) { | ||
286 | ore_calc_stripe_info(layout, offset, length, &ios->si); | ||
287 | ios->length = ios->si.length; | ||
288 | ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE; | ||
289 | if (layout->parity) | ||
290 | _ore_post_alloc_raid_stuff(ios); | ||
291 | } | ||
87 | 292 | ||
88 | *pios = ios; | ||
89 | return 0; | 293 | return 0; |
90 | } | 294 | } |
91 | EXPORT_SYMBOL(ore_get_rw_state); | 295 | EXPORT_SYMBOL(ore_get_rw_state); |
92 | 296 | ||
93 | int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps, | 297 | /* Allocate an io_state for all the devices in the comps array |
94 | struct ore_io_state **ios) | 298 | * |
299 | * This version of io_state allocation is used mostly by create/remove | ||
300 | * and trunc where we currently need all the devices. The only wastful | ||
301 | * bit is the read/write_attributes with no IO. Those sites should | ||
302 | * be converted to use ore_get_rw_state() with length=0 | ||
303 | */ | ||
304 | int ore_get_io_state(struct ore_layout *layout, struct ore_components *oc, | ||
305 | struct ore_io_state **pios) | ||
95 | { | 306 | { |
96 | return ore_get_rw_state(layout, comps, true, 0, 0, ios); | 307 | return _ore_get_io_state(layout, oc, oc->numdevs, 0, 0, pios); |
97 | } | 308 | } |
98 | EXPORT_SYMBOL(ore_get_io_state); | 309 | EXPORT_SYMBOL(ore_get_io_state); |
99 | 310 | ||
@@ -111,6 +322,7 @@ void ore_put_io_state(struct ore_io_state *ios) | |||
111 | bio_put(per_dev->bio); | 322 | bio_put(per_dev->bio); |
112 | } | 323 | } |
113 | 324 | ||
325 | _ore_free_raid_stuff(ios); | ||
114 | kfree(ios); | 326 | kfree(ios); |
115 | } | 327 | } |
116 | } | 328 | } |
@@ -138,7 +350,7 @@ static void _done_io(struct osd_request *or, void *p) | |||
138 | kref_put(&ios->kref, _last_io); | 350 | kref_put(&ios->kref, _last_io); |
139 | } | 351 | } |
140 | 352 | ||
141 | static int ore_io_execute(struct ore_io_state *ios) | 353 | int ore_io_execute(struct ore_io_state *ios) |
142 | { | 354 | { |
143 | DECLARE_COMPLETION_ONSTACK(wait); | 355 | DECLARE_COMPLETION_ONSTACK(wait); |
144 | bool sync = (ios->done == NULL); | 356 | bool sync = (ios->done == NULL); |
@@ -198,7 +410,7 @@ static void _clear_bio(struct bio *bio) | |||
198 | } | 410 | } |
199 | } | 411 | } |
200 | 412 | ||
201 | int ore_check_io(struct ore_io_state *ios, u64 *resid) | 413 | int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error) |
202 | { | 414 | { |
203 | enum osd_err_priority acumulated_osd_err = 0; | 415 | enum osd_err_priority acumulated_osd_err = 0; |
204 | int acumulated_lin_err = 0; | 416 | int acumulated_lin_err = 0; |
@@ -206,7 +418,8 @@ int ore_check_io(struct ore_io_state *ios, u64 *resid) | |||
206 | 418 | ||
207 | for (i = 0; i < ios->numdevs; i++) { | 419 | for (i = 0; i < ios->numdevs; i++) { |
208 | struct osd_sense_info osi; | 420 | struct osd_sense_info osi; |
209 | struct osd_request *or = ios->per_dev[i].or; | 421 | struct ore_per_dev_state *per_dev = &ios->per_dev[i]; |
422 | struct osd_request *or = per_dev->or; | ||
210 | int ret; | 423 | int ret; |
211 | 424 | ||
212 | if (unlikely(!or)) | 425 | if (unlikely(!or)) |
@@ -218,29 +431,31 @@ int ore_check_io(struct ore_io_state *ios, u64 *resid) | |||
218 | 431 | ||
219 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { | 432 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { |
220 | /* start read offset passed endof file */ | 433 | /* start read offset passed endof file */ |
221 | _clear_bio(ios->per_dev[i].bio); | 434 | _clear_bio(per_dev->bio); |
222 | ORE_DBGMSG("start read offset passed end of file " | 435 | ORE_DBGMSG("start read offset passed end of file " |
223 | "offset=0x%llx, length=0x%llx\n", | 436 | "offset=0x%llx, length=0x%llx\n", |
224 | _LLU(ios->per_dev[i].offset), | 437 | _LLU(per_dev->offset), |
225 | _LLU(ios->per_dev[i].length)); | 438 | _LLU(per_dev->length)); |
226 | 439 | ||
227 | continue; /* we recovered */ | 440 | continue; /* we recovered */ |
228 | } | 441 | } |
229 | 442 | ||
443 | if (on_dev_error) { | ||
444 | u64 residual = ios->reading ? | ||
445 | or->in.residual : or->out.residual; | ||
446 | u64 offset = (ios->offset + ios->length) - residual; | ||
447 | struct ore_dev *od = ios->oc->ods[ | ||
448 | per_dev->dev - ios->oc->first_dev]; | ||
449 | |||
450 | on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri, | ||
451 | offset, residual); | ||
452 | } | ||
230 | if (osi.osd_err_pri >= acumulated_osd_err) { | 453 | if (osi.osd_err_pri >= acumulated_osd_err) { |
231 | acumulated_osd_err = osi.osd_err_pri; | 454 | acumulated_osd_err = osi.osd_err_pri; |
232 | acumulated_lin_err = ret; | 455 | acumulated_lin_err = ret; |
233 | } | 456 | } |
234 | } | 457 | } |
235 | 458 | ||
236 | /* TODO: raid specific residual calculations */ | ||
237 | if (resid) { | ||
238 | if (likely(!acumulated_lin_err)) | ||
239 | *resid = 0; | ||
240 | else | ||
241 | *resid = ios->length; | ||
242 | } | ||
243 | |||
244 | return acumulated_lin_err; | 459 | return acumulated_lin_err; |
245 | } | 460 | } |
246 | EXPORT_SYMBOL(ore_check_io); | 461 | EXPORT_SYMBOL(ore_check_io); |
@@ -248,61 +463,65 @@ EXPORT_SYMBOL(ore_check_io); | |||
248 | /* | 463 | /* |
249 | * L - logical offset into the file | 464 | * L - logical offset into the file |
250 | * | 465 | * |
251 | * U - The number of bytes in a stripe within a group | 466 | * D - number of Data devices |
467 | * D = group_width - parity | ||
252 | * | 468 | * |
253 | * U = stripe_unit * group_width | 469 | * U - The number of bytes in a stripe within a group |
470 | * U = stripe_unit * D | ||
254 | * | 471 | * |
255 | * T - The number of bytes striped within a group of component objects | 472 | * T - The number of bytes striped within a group of component objects |
256 | * (before advancing to the next group) | 473 | * (before advancing to the next group) |
257 | * | 474 | * T = U * group_depth |
258 | * T = stripe_unit * group_width * group_depth | ||
259 | * | 475 | * |
260 | * S - The number of bytes striped across all component objects | 476 | * S - The number of bytes striped across all component objects |
261 | * before the pattern repeats | 477 | * before the pattern repeats |
478 | * S = T * group_count | ||
262 | * | 479 | * |
263 | * S = stripe_unit * group_width * group_depth * group_count | 480 | * M - The "major" (i.e., across all components) cycle number |
264 | * | ||
265 | * M - The "major" (i.e., across all components) stripe number | ||
266 | * | ||
267 | * M = L / S | 481 | * M = L / S |
268 | * | 482 | * |
269 | * G - Counts the groups from the beginning of the major stripe | 483 | * G - Counts the groups from the beginning of the major cycle |
270 | * | ||
271 | * G = (L - (M * S)) / T [or (L % S) / T] | 484 | * G = (L - (M * S)) / T [or (L % S) / T] |
272 | * | 485 | * |
273 | * H - The byte offset within the group | 486 | * H - The byte offset within the group |
274 | * | ||
275 | * H = (L - (M * S)) % T [or (L % S) % T] | 487 | * H = (L - (M * S)) % T [or (L % S) % T] |
276 | * | 488 | * |
277 | * N - The "minor" (i.e., across the group) stripe number | 489 | * N - The "minor" (i.e., across the group) stripe number |
278 | * | ||
279 | * N = H / U | 490 | * N = H / U |
280 | * | 491 | * |
281 | * C - The component index coresponding to L | 492 | * C - The component index coresponding to L |
282 | * | 493 | * |
283 | * C = (H - (N * U)) / stripe_unit + G * group_width | 494 | * C = (H - (N * U)) / stripe_unit + G * D |
284 | * [or (L % U) / stripe_unit + G * group_width] | 495 | * [or (L % U) / stripe_unit + G * D] |
285 | * | 496 | * |
286 | * O - The component offset coresponding to L | 497 | * O - The component offset coresponding to L |
287 | * | ||
288 | * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit | 498 | * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit |
499 | * | ||
500 | * LCMdP – Parity cycle: Lowest Common Multiple of group_width, parity | ||
501 | * divide by parity | ||
502 | * LCMdP = lcm(group_width, parity) / parity | ||
503 | * | ||
504 | * R - The parity Rotation stripe | ||
505 | * (Note parity cycle always starts at a group's boundary) | ||
506 | * R = N % LCMdP | ||
507 | * | ||
508 | * I = the first parity device index | ||
509 | * I = (group_width + group_width - R*parity - parity) % group_width | ||
510 | * | ||
511 | * Craid - The component index Rotated | ||
512 | * Craid = (group_width + C - R*parity) % group_width | ||
513 | * (We add the group_width to avoid negative numbers modulo math) | ||
289 | */ | 514 | */ |
290 | struct _striping_info { | 515 | void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, |
291 | u64 obj_offset; | 516 | u64 length, struct ore_striping_info *si) |
292 | u64 group_length; | ||
293 | u64 M; /* for truncate */ | ||
294 | unsigned dev; | ||
295 | unsigned unit_off; | ||
296 | }; | ||
297 | |||
298 | static void _calc_stripe_info(struct ore_layout *layout, u64 file_offset, | ||
299 | struct _striping_info *si) | ||
300 | { | 517 | { |
301 | u32 stripe_unit = layout->stripe_unit; | 518 | u32 stripe_unit = layout->stripe_unit; |
302 | u32 group_width = layout->group_width; | 519 | u32 group_width = layout->group_width; |
303 | u64 group_depth = layout->group_depth; | 520 | u64 group_depth = layout->group_depth; |
521 | u32 parity = layout->parity; | ||
304 | 522 | ||
305 | u32 U = stripe_unit * group_width; | 523 | u32 D = group_width - parity; |
524 | u32 U = D * stripe_unit; | ||
306 | u64 T = U * group_depth; | 525 | u64 T = U * group_depth; |
307 | u64 S = T * layout->group_count; | 526 | u64 S = T * layout->group_count; |
308 | u64 M = div64_u64(file_offset, S); | 527 | u64 M = div64_u64(file_offset, S); |
@@ -318,39 +537,65 @@ static void _calc_stripe_info(struct ore_layout *layout, u64 file_offset, | |||
318 | u32 N = div_u64(H, U); | 537 | u32 N = div_u64(H, U); |
319 | 538 | ||
320 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | 539 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ |
321 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | 540 | u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width; |
322 | si->dev *= layout->mirrors_p1; | ||
323 | 541 | ||
324 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); | 542 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); |
325 | 543 | ||
326 | si->obj_offset = si->unit_off + (N * stripe_unit) + | 544 | si->obj_offset = si->unit_off + (N * stripe_unit) + |
327 | (M * group_depth * stripe_unit); | 545 | (M * group_depth * stripe_unit); |
328 | 546 | ||
329 | si->group_length = T - H; | 547 | if (parity) { |
548 | u32 LCMdP = lcm(group_width, parity) / parity; | ||
549 | /* R = N % LCMdP; */ | ||
550 | u32 RxP = (N % LCMdP) * parity; | ||
551 | u32 first_dev = C - C % group_width; | ||
552 | |||
553 | si->par_dev = (group_width + group_width - parity - RxP) % | ||
554 | group_width + first_dev; | ||
555 | si->dev = (group_width + C - RxP) % group_width + first_dev; | ||
556 | si->bytes_in_stripe = U; | ||
557 | si->first_stripe_start = M * S + G * T + N * U; | ||
558 | } else { | ||
559 | /* Make the math correct see _prepare_one_group */ | ||
560 | si->par_dev = group_width; | ||
561 | si->dev = C; | ||
562 | } | ||
563 | |||
564 | si->dev *= layout->mirrors_p1; | ||
565 | si->par_dev *= layout->mirrors_p1; | ||
566 | si->offset = file_offset; | ||
567 | si->length = T - H; | ||
568 | if (si->length > length) | ||
569 | si->length = length; | ||
330 | si->M = M; | 570 | si->M = M; |
331 | } | 571 | } |
572 | EXPORT_SYMBOL(ore_calc_stripe_info); | ||
332 | 573 | ||
333 | static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, | 574 | int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, |
334 | unsigned pgbase, struct ore_per_dev_state *per_dev, | 575 | unsigned pgbase, struct page **pages, |
335 | int cur_len) | 576 | struct ore_per_dev_state *per_dev, int cur_len) |
336 | { | 577 | { |
337 | unsigned pg = *cur_pg; | 578 | unsigned pg = *cur_pg; |
338 | struct request_queue *q = | 579 | struct request_queue *q = |
339 | osd_request_queue(_ios_od(ios, per_dev->dev)); | 580 | osd_request_queue(_ios_od(ios, per_dev->dev)); |
340 | 581 | unsigned len = cur_len; | |
341 | per_dev->length += cur_len; | 582 | int ret; |
342 | 583 | ||
343 | if (per_dev->bio == NULL) { | 584 | if (per_dev->bio == NULL) { |
344 | unsigned pages_in_stripe = ios->layout->group_width * | 585 | unsigned pages_in_stripe = ios->layout->group_width * |
345 | (ios->layout->stripe_unit / PAGE_SIZE); | 586 | (ios->layout->stripe_unit / PAGE_SIZE); |
346 | unsigned bio_size = (ios->nr_pages + pages_in_stripe) / | 587 | unsigned nr_pages = ios->nr_pages * ios->layout->group_width / |
347 | ios->layout->group_width; | 588 | (ios->layout->group_width - |
589 | ios->layout->parity); | ||
590 | unsigned bio_size = (nr_pages + pages_in_stripe) / | ||
591 | ios->layout->group_width; | ||
348 | 592 | ||
349 | per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); | 593 | per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); |
350 | if (unlikely(!per_dev->bio)) { | 594 | if (unlikely(!per_dev->bio)) { |
351 | ORE_DBGMSG("Failed to allocate BIO size=%u\n", | 595 | ORE_DBGMSG("Failed to allocate BIO size=%u\n", |
352 | bio_size); | 596 | bio_size); |
353 | return -ENOMEM; | 597 | ret = -ENOMEM; |
598 | goto out; | ||
354 | } | 599 | } |
355 | } | 600 | } |
356 | 601 | ||
@@ -358,64 +603,90 @@ static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, | |||
358 | unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); | 603 | unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); |
359 | unsigned added_len; | 604 | unsigned added_len; |
360 | 605 | ||
361 | BUG_ON(ios->nr_pages <= pg); | ||
362 | cur_len -= pglen; | 606 | cur_len -= pglen; |
363 | 607 | ||
364 | added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg], | 608 | added_len = bio_add_pc_page(q, per_dev->bio, pages[pg], |
365 | pglen, pgbase); | 609 | pglen, pgbase); |
366 | if (unlikely(pglen != added_len)) | 610 | if (unlikely(pglen != added_len)) { |
367 | return -ENOMEM; | 611 | ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n", |
612 | per_dev->bio->bi_vcnt); | ||
613 | ret = -ENOMEM; | ||
614 | goto out; | ||
615 | } | ||
616 | _add_stripe_page(ios->sp2d, &ios->si, pages[pg]); | ||
617 | |||
368 | pgbase = 0; | 618 | pgbase = 0; |
369 | ++pg; | 619 | ++pg; |
370 | } | 620 | } |
371 | BUG_ON(cur_len); | 621 | BUG_ON(cur_len); |
372 | 622 | ||
623 | per_dev->length += len; | ||
373 | *cur_pg = pg; | 624 | *cur_pg = pg; |
374 | return 0; | 625 | ret = 0; |
626 | out: /* we fail the complete unit on an error eg don't advance | ||
627 | * per_dev->length and cur_pg. This means that we might have a bigger | ||
628 | * bio than the CDB requested length (per_dev->length). That's fine | ||
629 | * only the oposite is fatal. | ||
630 | */ | ||
631 | return ret; | ||
375 | } | 632 | } |
376 | 633 | ||
377 | static int _prepare_one_group(struct ore_io_state *ios, u64 length, | 634 | static int _prepare_for_striping(struct ore_io_state *ios) |
378 | struct _striping_info *si) | ||
379 | { | 635 | { |
636 | struct ore_striping_info *si = &ios->si; | ||
380 | unsigned stripe_unit = ios->layout->stripe_unit; | 637 | unsigned stripe_unit = ios->layout->stripe_unit; |
381 | unsigned mirrors_p1 = ios->layout->mirrors_p1; | 638 | unsigned mirrors_p1 = ios->layout->mirrors_p1; |
382 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; | 639 | unsigned group_width = ios->layout->group_width; |
640 | unsigned devs_in_group = group_width * mirrors_p1; | ||
383 | unsigned dev = si->dev; | 641 | unsigned dev = si->dev; |
384 | unsigned first_dev = dev - (dev % devs_in_group); | 642 | unsigned first_dev = dev - (dev % devs_in_group); |
385 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; | 643 | unsigned dev_order; |
386 | unsigned cur_pg = ios->pages_consumed; | 644 | unsigned cur_pg = ios->pages_consumed; |
645 | u64 length = ios->length; | ||
387 | int ret = 0; | 646 | int ret = 0; |
388 | 647 | ||
648 | if (!ios->pages) { | ||
649 | ios->numdevs = ios->layout->mirrors_p1; | ||
650 | return 0; | ||
651 | } | ||
652 | |||
653 | BUG_ON(length > si->length); | ||
654 | |||
655 | dev_order = _dev_order(devs_in_group, mirrors_p1, si->par_dev, dev); | ||
656 | si->cur_comp = dev_order; | ||
657 | si->cur_pg = si->unit_off / PAGE_SIZE; | ||
658 | |||
389 | while (length) { | 659 | while (length) { |
390 | struct ore_per_dev_state *per_dev = &ios->per_dev[dev]; | 660 | unsigned comp = dev - first_dev; |
661 | struct ore_per_dev_state *per_dev = &ios->per_dev[comp]; | ||
391 | unsigned cur_len, page_off = 0; | 662 | unsigned cur_len, page_off = 0; |
392 | 663 | ||
393 | if (!per_dev->length) { | 664 | if (!per_dev->length) { |
394 | per_dev->dev = dev; | 665 | per_dev->dev = dev; |
395 | if (dev < si->dev) { | 666 | if (dev == si->dev) { |
396 | per_dev->offset = si->obj_offset + stripe_unit - | 667 | WARN_ON(dev == si->par_dev); |
397 | si->unit_off; | ||
398 | cur_len = stripe_unit; | ||
399 | } else if (dev == si->dev) { | ||
400 | per_dev->offset = si->obj_offset; | 668 | per_dev->offset = si->obj_offset; |
401 | cur_len = stripe_unit - si->unit_off; | 669 | cur_len = stripe_unit - si->unit_off; |
402 | page_off = si->unit_off & ~PAGE_MASK; | 670 | page_off = si->unit_off & ~PAGE_MASK; |
403 | BUG_ON(page_off && (page_off != ios->pgbase)); | 671 | BUG_ON(page_off && (page_off != ios->pgbase)); |
404 | } else { /* dev > si->dev */ | 672 | } else { |
405 | per_dev->offset = si->obj_offset - si->unit_off; | 673 | if (si->cur_comp > dev_order) |
674 | per_dev->offset = | ||
675 | si->obj_offset - si->unit_off; | ||
676 | else /* si->cur_comp < dev_order */ | ||
677 | per_dev->offset = | ||
678 | si->obj_offset + stripe_unit - | ||
679 | si->unit_off; | ||
406 | cur_len = stripe_unit; | 680 | cur_len = stripe_unit; |
407 | } | 681 | } |
408 | |||
409 | if (max_comp < dev) | ||
410 | max_comp = dev; | ||
411 | } else { | 682 | } else { |
412 | cur_len = stripe_unit; | 683 | cur_len = stripe_unit; |
413 | } | 684 | } |
414 | if (cur_len >= length) | 685 | if (cur_len >= length) |
415 | cur_len = length; | 686 | cur_len = length; |
416 | 687 | ||
417 | ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, | 688 | ret = _ore_add_stripe_unit(ios, &cur_pg, page_off, ios->pages, |
418 | cur_len); | 689 | per_dev, cur_len); |
419 | if (unlikely(ret)) | 690 | if (unlikely(ret)) |
420 | goto out; | 691 | goto out; |
421 | 692 | ||
@@ -423,60 +694,60 @@ static int _prepare_one_group(struct ore_io_state *ios, u64 length, | |||
423 | dev = (dev % devs_in_group) + first_dev; | 694 | dev = (dev % devs_in_group) + first_dev; |
424 | 695 | ||
425 | length -= cur_len; | 696 | length -= cur_len; |
426 | } | ||
427 | out: | ||
428 | ios->numdevs = max_comp + mirrors_p1; | ||
429 | ios->pages_consumed = cur_pg; | ||
430 | return ret; | ||
431 | } | ||
432 | |||
433 | static int _prepare_for_striping(struct ore_io_state *ios) | ||
434 | { | ||
435 | u64 length = ios->length; | ||
436 | u64 offset = ios->offset; | ||
437 | struct _striping_info si; | ||
438 | int ret = 0; | ||
439 | 697 | ||
440 | if (!ios->pages) { | 698 | si->cur_comp = (si->cur_comp + 1) % group_width; |
441 | if (ios->kern_buff) { | 699 | if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) { |
442 | struct ore_per_dev_state *per_dev = &ios->per_dev[0]; | 700 | if (!length && ios->sp2d) { |
701 | /* If we are writing and this is the very last | ||
702 | * stripe. then operate on parity dev. | ||
703 | */ | ||
704 | dev = si->par_dev; | ||
705 | } | ||
706 | if (ios->sp2d) | ||
707 | /* In writes cur_len just means if it's the | ||
708 | * last one. See _ore_add_parity_unit. | ||
709 | */ | ||
710 | cur_len = length; | ||
711 | per_dev = &ios->per_dev[dev - first_dev]; | ||
712 | if (!per_dev->length) { | ||
713 | /* Only/always the parity unit of the first | ||
714 | * stripe will be empty. So this is a chance to | ||
715 | * initialize the per_dev info. | ||
716 | */ | ||
717 | per_dev->dev = dev; | ||
718 | per_dev->offset = si->obj_offset - si->unit_off; | ||
719 | } | ||
443 | 720 | ||
444 | _calc_stripe_info(ios->layout, ios->offset, &si); | 721 | ret = _ore_add_parity_unit(ios, si, per_dev, cur_len); |
445 | per_dev->offset = si.obj_offset; | 722 | if (unlikely(ret)) |
446 | per_dev->dev = si.dev; | 723 | goto out; |
447 | 724 | ||
448 | /* no cross device without page array */ | 725 | /* Rotate next par_dev backwards with wraping */ |
449 | BUG_ON((ios->layout->group_width > 1) && | 726 | si->par_dev = (devs_in_group + si->par_dev - |
450 | (si.unit_off + ios->length > | 727 | ios->layout->parity * mirrors_p1) % |
451 | ios->layout->stripe_unit)); | 728 | devs_in_group + first_dev; |
729 | /* Next stripe, start fresh */ | ||
730 | si->cur_comp = 0; | ||
731 | si->cur_pg = 0; | ||
452 | } | 732 | } |
453 | ios->numdevs = ios->layout->mirrors_p1; | ||
454 | return 0; | ||
455 | } | ||
456 | |||
457 | while (length) { | ||
458 | _calc_stripe_info(ios->layout, offset, &si); | ||
459 | |||
460 | if (length < si.group_length) | ||
461 | si.group_length = length; | ||
462 | |||
463 | ret = _prepare_one_group(ios, si.group_length, &si); | ||
464 | if (unlikely(ret)) | ||
465 | goto out; | ||
466 | |||
467 | offset += si.group_length; | ||
468 | length -= si.group_length; | ||
469 | } | 733 | } |
470 | |||
471 | out: | 734 | out: |
472 | return ret; | 735 | ios->numdevs = devs_in_group; |
736 | ios->pages_consumed = cur_pg; | ||
737 | if (unlikely(ret)) { | ||
738 | if (length == ios->length) | ||
739 | return ret; | ||
740 | else | ||
741 | ios->length -= length; | ||
742 | } | ||
743 | return 0; | ||
473 | } | 744 | } |
474 | 745 | ||
475 | int ore_create(struct ore_io_state *ios) | 746 | int ore_create(struct ore_io_state *ios) |
476 | { | 747 | { |
477 | int i, ret; | 748 | int i, ret; |
478 | 749 | ||
479 | for (i = 0; i < ios->comps->numdevs; i++) { | 750 | for (i = 0; i < ios->oc->numdevs; i++) { |
480 | struct osd_request *or; | 751 | struct osd_request *or; |
481 | 752 | ||
482 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); | 753 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); |
@@ -501,7 +772,7 @@ int ore_remove(struct ore_io_state *ios) | |||
501 | { | 772 | { |
502 | int i, ret; | 773 | int i, ret; |
503 | 774 | ||
504 | for (i = 0; i < ios->comps->numdevs; i++) { | 775 | for (i = 0; i < ios->oc->numdevs; i++) { |
505 | struct osd_request *or; | 776 | struct osd_request *or; |
506 | 777 | ||
507 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); | 778 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); |
@@ -543,7 +814,6 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
543 | goto out; | 814 | goto out; |
544 | } | 815 | } |
545 | per_dev->or = or; | 816 | per_dev->or = or; |
546 | per_dev->offset = master_dev->offset; | ||
547 | 817 | ||
548 | if (ios->pages) { | 818 | if (ios->pages) { |
549 | struct bio *bio; | 819 | struct bio *bio; |
@@ -562,6 +832,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
562 | __bio_clone(bio, master_dev->bio); | 832 | __bio_clone(bio, master_dev->bio); |
563 | bio->bi_bdev = NULL; | 833 | bio->bi_bdev = NULL; |
564 | bio->bi_next = NULL; | 834 | bio->bi_next = NULL; |
835 | per_dev->offset = master_dev->offset; | ||
565 | per_dev->length = master_dev->length; | 836 | per_dev->length = master_dev->length; |
566 | per_dev->bio = bio; | 837 | per_dev->bio = bio; |
567 | per_dev->dev = dev; | 838 | per_dev->dev = dev; |
@@ -579,7 +850,15 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
579 | _LLU(per_dev->offset), | 850 | _LLU(per_dev->offset), |
580 | _LLU(per_dev->length), dev); | 851 | _LLU(per_dev->length), dev); |
581 | } else if (ios->kern_buff) { | 852 | } else if (ios->kern_buff) { |
582 | ret = osd_req_write_kern(or, _ios_obj(ios, dev), | 853 | per_dev->offset = ios->si.obj_offset; |
854 | per_dev->dev = ios->si.dev + dev; | ||
855 | |||
856 | /* no cross device without page array */ | ||
857 | BUG_ON((ios->layout->group_width > 1) && | ||
858 | (ios->si.unit_off + ios->length > | ||
859 | ios->layout->stripe_unit)); | ||
860 | |||
861 | ret = osd_req_write_kern(or, _ios_obj(ios, per_dev->dev), | ||
583 | per_dev->offset, | 862 | per_dev->offset, |
584 | ios->kern_buff, ios->length); | 863 | ios->kern_buff, ios->length); |
585 | if (unlikely(ret)) | 864 | if (unlikely(ret)) |
@@ -588,7 +867,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
588 | "length=0x%llx dev=%d\n", | 867 | "length=0x%llx dev=%d\n", |
589 | _LLU(_ios_obj(ios, dev)->id), | 868 | _LLU(_ios_obj(ios, dev)->id), |
590 | _LLU(per_dev->offset), | 869 | _LLU(per_dev->offset), |
591 | _LLU(ios->length), dev); | 870 | _LLU(ios->length), per_dev->dev); |
592 | } else { | 871 | } else { |
593 | osd_req_set_attributes(or, _ios_obj(ios, dev)); | 872 | osd_req_set_attributes(or, _ios_obj(ios, dev)); |
594 | ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", | 873 | ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", |
@@ -614,6 +893,14 @@ int ore_write(struct ore_io_state *ios) | |||
614 | int i; | 893 | int i; |
615 | int ret; | 894 | int ret; |
616 | 895 | ||
896 | if (unlikely(ios->sp2d && !ios->r4w)) { | ||
897 | /* A library is attempting a RAID-write without providing | ||
898 | * a pages lock interface. | ||
899 | */ | ||
900 | WARN_ON_ONCE(1); | ||
901 | return -ENOTSUPP; | ||
902 | } | ||
903 | |||
617 | ret = _prepare_for_striping(ios); | 904 | ret = _prepare_for_striping(ios); |
618 | if (unlikely(ret)) | 905 | if (unlikely(ret)) |
619 | return ret; | 906 | return ret; |
@@ -629,7 +916,7 @@ int ore_write(struct ore_io_state *ios) | |||
629 | } | 916 | } |
630 | EXPORT_SYMBOL(ore_write); | 917 | EXPORT_SYMBOL(ore_write); |
631 | 918 | ||
632 | static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp) | 919 | int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp) |
633 | { | 920 | { |
634 | struct osd_request *or; | 921 | struct osd_request *or; |
635 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; | 922 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
@@ -648,22 +935,27 @@ static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp) | |||
648 | per_dev->or = or; | 935 | per_dev->or = or; |
649 | 936 | ||
650 | if (ios->pages) { | 937 | if (ios->pages) { |
651 | osd_req_read(or, obj, per_dev->offset, | 938 | if (per_dev->cur_sg) { |
652 | per_dev->bio, per_dev->length); | 939 | /* finalize the last sg_entry */ |
940 | _ore_add_sg_seg(per_dev, 0, false); | ||
941 | if (unlikely(!per_dev->cur_sg)) | ||
942 | return 0; /* Skip parity only device */ | ||
943 | |||
944 | osd_req_read_sg(or, obj, per_dev->bio, | ||
945 | per_dev->sglist, per_dev->cur_sg); | ||
946 | } else { | ||
947 | /* The no raid case */ | ||
948 | osd_req_read(or, obj, per_dev->offset, | ||
949 | per_dev->bio, per_dev->length); | ||
950 | } | ||
951 | |||
653 | ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" | 952 | ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" |
654 | " dev=%d\n", _LLU(obj->id), | 953 | " dev=%d sg_len=%d\n", _LLU(obj->id), |
655 | _LLU(per_dev->offset), _LLU(per_dev->length), | 954 | _LLU(per_dev->offset), _LLU(per_dev->length), |
656 | first_dev); | 955 | first_dev, per_dev->cur_sg); |
657 | } else if (ios->kern_buff) { | ||
658 | int ret = osd_req_read_kern(or, obj, per_dev->offset, | ||
659 | ios->kern_buff, ios->length); | ||
660 | ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx " | ||
661 | "length=0x%llx dev=%d ret=>%d\n", | ||
662 | _LLU(obj->id), _LLU(per_dev->offset), | ||
663 | _LLU(ios->length), first_dev, ret); | ||
664 | if (unlikely(ret)) | ||
665 | return ret; | ||
666 | } else { | 956 | } else { |
957 | BUG_ON(ios->kern_buff); | ||
958 | |||
667 | osd_req_get_attributes(or, obj); | 959 | osd_req_get_attributes(or, obj); |
668 | ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", | 960 | ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", |
669 | _LLU(obj->id), | 961 | _LLU(obj->id), |
@@ -688,7 +980,7 @@ int ore_read(struct ore_io_state *ios) | |||
688 | return ret; | 980 | return ret; |
689 | 981 | ||
690 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | 982 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { |
691 | ret = _read_mirror(ios, i); | 983 | ret = _ore_read_mirror(ios, i); |
692 | if (unlikely(ret)) | 984 | if (unlikely(ret)) |
693 | return ret; | 985 | return ret; |
694 | } | 986 | } |
@@ -744,31 +1036,29 @@ static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, | |||
744 | } | 1036 | } |
745 | 1037 | ||
746 | struct _trunc_info { | 1038 | struct _trunc_info { |
747 | struct _striping_info si; | 1039 | struct ore_striping_info si; |
748 | u64 prev_group_obj_off; | 1040 | u64 prev_group_obj_off; |
749 | u64 next_group_obj_off; | 1041 | u64 next_group_obj_off; |
750 | 1042 | ||
751 | unsigned first_group_dev; | 1043 | unsigned first_group_dev; |
752 | unsigned nex_group_dev; | 1044 | unsigned nex_group_dev; |
753 | unsigned max_devs; | ||
754 | }; | 1045 | }; |
755 | 1046 | ||
756 | void _calc_trunk_info(struct ore_layout *layout, u64 file_offset, | 1047 | static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset, |
757 | struct _trunc_info *ti) | 1048 | struct _trunc_info *ti) |
758 | { | 1049 | { |
759 | unsigned stripe_unit = layout->stripe_unit; | 1050 | unsigned stripe_unit = layout->stripe_unit; |
760 | 1051 | ||
761 | _calc_stripe_info(layout, file_offset, &ti->si); | 1052 | ore_calc_stripe_info(layout, file_offset, 0, &ti->si); |
762 | 1053 | ||
763 | ti->prev_group_obj_off = ti->si.M * stripe_unit; | 1054 | ti->prev_group_obj_off = ti->si.M * stripe_unit; |
764 | ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; | 1055 | ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; |
765 | 1056 | ||
766 | ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); | 1057 | ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); |
767 | ti->nex_group_dev = ti->first_group_dev + layout->group_width; | 1058 | ti->nex_group_dev = ti->first_group_dev + layout->group_width; |
768 | ti->max_devs = layout->group_width * layout->group_count; | ||
769 | } | 1059 | } |
770 | 1060 | ||
771 | int ore_truncate(struct ore_layout *layout, struct ore_components *comps, | 1061 | int ore_truncate(struct ore_layout *layout, struct ore_components *oc, |
772 | u64 size) | 1062 | u64 size) |
773 | { | 1063 | { |
774 | struct ore_io_state *ios; | 1064 | struct ore_io_state *ios; |
@@ -779,22 +1069,22 @@ int ore_truncate(struct ore_layout *layout, struct ore_components *comps, | |||
779 | struct _trunc_info ti; | 1069 | struct _trunc_info ti; |
780 | int i, ret; | 1070 | int i, ret; |
781 | 1071 | ||
782 | ret = ore_get_io_state(layout, comps, &ios); | 1072 | ret = ore_get_io_state(layout, oc, &ios); |
783 | if (unlikely(ret)) | 1073 | if (unlikely(ret)) |
784 | return ret; | 1074 | return ret; |
785 | 1075 | ||
786 | _calc_trunk_info(ios->layout, size, &ti); | 1076 | _calc_trunk_info(ios->layout, size, &ti); |
787 | 1077 | ||
788 | size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs), | 1078 | size_attrs = kcalloc(ios->oc->numdevs, sizeof(*size_attrs), |
789 | GFP_KERNEL); | 1079 | GFP_KERNEL); |
790 | if (unlikely(!size_attrs)) { | 1080 | if (unlikely(!size_attrs)) { |
791 | ret = -ENOMEM; | 1081 | ret = -ENOMEM; |
792 | goto out; | 1082 | goto out; |
793 | } | 1083 | } |
794 | 1084 | ||
795 | ios->numdevs = ios->comps->numdevs; | 1085 | ios->numdevs = ios->oc->numdevs; |
796 | 1086 | ||
797 | for (i = 0; i < ti.max_devs; ++i) { | 1087 | for (i = 0; i < ios->numdevs; ++i) { |
798 | struct exofs_trunc_attr *size_attr = &size_attrs[i]; | 1088 | struct exofs_trunc_attr *size_attr = &size_attrs[i]; |
799 | u64 obj_size; | 1089 | u64 obj_size; |
800 | 1090 | ||
@@ -815,7 +1105,7 @@ int ore_truncate(struct ore_layout *layout, struct ore_components *comps, | |||
815 | size_attr->attr.val_ptr = &size_attr->newsize; | 1105 | size_attr->attr.val_ptr = &size_attr->newsize; |
816 | 1106 | ||
817 | ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", | 1107 | ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", |
818 | _LLU(comps->comps->obj.id), _LLU(obj_size), i); | 1108 | _LLU(oc->comps->obj.id), _LLU(obj_size), i); |
819 | ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, | 1109 | ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, |
820 | &size_attr->attr); | 1110 | &size_attr->attr); |
821 | if (unlikely(ret)) | 1111 | if (unlikely(ret)) |
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c new file mode 100644 index 000000000000..29c47e5c4a86 --- /dev/null +++ b/fs/exofs/ore_raid.c | |||
@@ -0,0 +1,660 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 | ||
3 | * Boaz Harrosh <bharrosh@panasas.com> | ||
4 | * | ||
5 | * This file is part of the objects raid engine (ore). | ||
6 | * | ||
7 | * It is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as published | ||
9 | * by the Free Software Foundation. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with "ore". If not, write to the Free Software Foundation, Inc: | ||
13 | * "Free Software Foundation <info@fsf.org>" | ||
14 | */ | ||
15 | |||
16 | #include <linux/gfp.h> | ||
17 | #include <linux/async_tx.h> | ||
18 | |||
19 | #include "ore_raid.h" | ||
20 | |||
21 | #undef ORE_DBGMSG2 | ||
22 | #define ORE_DBGMSG2 ORE_DBGMSG | ||
23 | |||
24 | struct page *_raid_page_alloc(void) | ||
25 | { | ||
26 | return alloc_page(GFP_KERNEL); | ||
27 | } | ||
28 | |||
29 | void _raid_page_free(struct page *p) | ||
30 | { | ||
31 | __free_page(p); | ||
32 | } | ||
33 | |||
34 | /* This struct is forward declare in ore_io_state, but is private to here. | ||
35 | * It is put on ios->sp2d for RAID5/6 writes only. See _gen_xor_unit. | ||
36 | * | ||
37 | * __stripe_pages_2d is a 2d array of pages, and it is also a corner turn. | ||
38 | * Ascending page index access is sp2d(p-minor, c-major). But storage is | ||
39 | * sp2d[p-minor][c-major], so it can be properlly presented to the async-xor | ||
40 | * API. | ||
41 | */ | ||
42 | struct __stripe_pages_2d { | ||
43 | /* Cache some hot path repeated calculations */ | ||
44 | unsigned parity; | ||
45 | unsigned data_devs; | ||
46 | unsigned pages_in_unit; | ||
47 | |||
48 | bool needed ; | ||
49 | |||
50 | /* Array size is pages_in_unit (layout->stripe_unit / PAGE_SIZE) */ | ||
51 | struct __1_page_stripe { | ||
52 | bool alloc; | ||
53 | unsigned write_count; | ||
54 | struct async_submit_ctl submit; | ||
55 | struct dma_async_tx_descriptor *tx; | ||
56 | |||
57 | /* The size of this array is data_devs + parity */ | ||
58 | struct page **pages; | ||
59 | struct page **scribble; | ||
60 | /* bool array, size of this array is data_devs */ | ||
61 | char *page_is_read; | ||
62 | } _1p_stripes[]; | ||
63 | }; | ||
64 | |||
65 | /* This can get bigger then a page. So support multiple page allocations | ||
66 | * _sp2d_free should be called even if _sp2d_alloc fails (by returning | ||
67 | * none-zero). | ||
68 | */ | ||
69 | static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width, | ||
70 | unsigned parity, struct __stripe_pages_2d **psp2d) | ||
71 | { | ||
72 | struct __stripe_pages_2d *sp2d; | ||
73 | unsigned data_devs = group_width - parity; | ||
74 | struct _alloc_all_bytes { | ||
75 | struct __alloc_stripe_pages_2d { | ||
76 | struct __stripe_pages_2d sp2d; | ||
77 | struct __1_page_stripe _1p_stripes[pages_in_unit]; | ||
78 | } __asp2d; | ||
79 | struct __alloc_1p_arrays { | ||
80 | struct page *pages[group_width]; | ||
81 | struct page *scribble[group_width]; | ||
82 | char page_is_read[data_devs]; | ||
83 | } __a1pa[pages_in_unit]; | ||
84 | } *_aab; | ||
85 | struct __alloc_1p_arrays *__a1pa; | ||
86 | struct __alloc_1p_arrays *__a1pa_end; | ||
87 | const unsigned sizeof__a1pa = sizeof(_aab->__a1pa[0]); | ||
88 | unsigned num_a1pa, alloc_size, i; | ||
89 | |||
90 | /* FIXME: check these numbers in ore_verify_layout */ | ||
91 | BUG_ON(sizeof(_aab->__asp2d) > PAGE_SIZE); | ||
92 | BUG_ON(sizeof__a1pa > PAGE_SIZE); | ||
93 | |||
94 | if (sizeof(*_aab) > PAGE_SIZE) { | ||
95 | num_a1pa = (PAGE_SIZE - sizeof(_aab->__asp2d)) / sizeof__a1pa; | ||
96 | alloc_size = sizeof(_aab->__asp2d) + sizeof__a1pa * num_a1pa; | ||
97 | } else { | ||
98 | num_a1pa = pages_in_unit; | ||
99 | alloc_size = sizeof(*_aab); | ||
100 | } | ||
101 | |||
102 | _aab = kzalloc(alloc_size, GFP_KERNEL); | ||
103 | if (unlikely(!_aab)) { | ||
104 | ORE_DBGMSG("!! Failed to alloc sp2d size=%d\n", alloc_size); | ||
105 | return -ENOMEM; | ||
106 | } | ||
107 | |||
108 | sp2d = &_aab->__asp2d.sp2d; | ||
109 | *psp2d = sp2d; /* From here Just call _sp2d_free */ | ||
110 | |||
111 | __a1pa = _aab->__a1pa; | ||
112 | __a1pa_end = __a1pa + num_a1pa; | ||
113 | |||
114 | for (i = 0; i < pages_in_unit; ++i) { | ||
115 | if (unlikely(__a1pa >= __a1pa_end)) { | ||
116 | num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa, | ||
117 | pages_in_unit - i); | ||
118 | |||
119 | __a1pa = kzalloc(num_a1pa * sizeof__a1pa, GFP_KERNEL); | ||
120 | if (unlikely(!__a1pa)) { | ||
121 | ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n", | ||
122 | num_a1pa); | ||
123 | return -ENOMEM; | ||
124 | } | ||
125 | __a1pa_end = __a1pa + num_a1pa; | ||
126 | /* First *pages is marked for kfree of the buffer */ | ||
127 | sp2d->_1p_stripes[i].alloc = true; | ||
128 | } | ||
129 | |||
130 | sp2d->_1p_stripes[i].pages = __a1pa->pages; | ||
131 | sp2d->_1p_stripes[i].scribble = __a1pa->scribble ; | ||
132 | sp2d->_1p_stripes[i].page_is_read = __a1pa->page_is_read; | ||
133 | ++__a1pa; | ||
134 | } | ||
135 | |||
136 | sp2d->parity = parity; | ||
137 | sp2d->data_devs = data_devs; | ||
138 | sp2d->pages_in_unit = pages_in_unit; | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static void _sp2d_reset(struct __stripe_pages_2d *sp2d, | ||
143 | const struct _ore_r4w_op *r4w, void *priv) | ||
144 | { | ||
145 | unsigned data_devs = sp2d->data_devs; | ||
146 | unsigned group_width = data_devs + sp2d->parity; | ||
147 | unsigned p; | ||
148 | |||
149 | if (!sp2d->needed) | ||
150 | return; | ||
151 | |||
152 | for (p = 0; p < sp2d->pages_in_unit; p++) { | ||
153 | struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; | ||
154 | |||
155 | if (_1ps->write_count < group_width) { | ||
156 | unsigned c; | ||
157 | |||
158 | for (c = 0; c < data_devs; c++) | ||
159 | if (_1ps->page_is_read[c]) { | ||
160 | struct page *page = _1ps->pages[c]; | ||
161 | |||
162 | r4w->put_page(priv, page); | ||
163 | _1ps->page_is_read[c] = false; | ||
164 | } | ||
165 | } | ||
166 | |||
167 | memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); | ||
168 | _1ps->write_count = 0; | ||
169 | _1ps->tx = NULL; | ||
170 | } | ||
171 | |||
172 | sp2d->needed = false; | ||
173 | } | ||
174 | |||
175 | static void _sp2d_free(struct __stripe_pages_2d *sp2d) | ||
176 | { | ||
177 | unsigned i; | ||
178 | |||
179 | if (!sp2d) | ||
180 | return; | ||
181 | |||
182 | for (i = 0; i < sp2d->pages_in_unit; ++i) { | ||
183 | if (sp2d->_1p_stripes[i].alloc) | ||
184 | kfree(sp2d->_1p_stripes[i].pages); | ||
185 | } | ||
186 | |||
187 | kfree(sp2d); | ||
188 | } | ||
189 | |||
190 | static unsigned _sp2d_min_pg(struct __stripe_pages_2d *sp2d) | ||
191 | { | ||
192 | unsigned p; | ||
193 | |||
194 | for (p = 0; p < sp2d->pages_in_unit; p++) { | ||
195 | struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; | ||
196 | |||
197 | if (_1ps->write_count) | ||
198 | return p; | ||
199 | } | ||
200 | |||
201 | return ~0; | ||
202 | } | ||
203 | |||
204 | static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d) | ||
205 | { | ||
206 | unsigned p; | ||
207 | |||
208 | for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { | ||
209 | struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; | ||
210 | |||
211 | if (_1ps->write_count) | ||
212 | return p; | ||
213 | } | ||
214 | |||
215 | return ~0; | ||
216 | } | ||
217 | |||
218 | static void _gen_xor_unit(struct __stripe_pages_2d *sp2d) | ||
219 | { | ||
220 | unsigned p; | ||
221 | for (p = 0; p < sp2d->pages_in_unit; p++) { | ||
222 | struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; | ||
223 | |||
224 | if (!_1ps->write_count) | ||
225 | continue; | ||
226 | |||
227 | init_async_submit(&_1ps->submit, | ||
228 | ASYNC_TX_XOR_ZERO_DST | ASYNC_TX_ACK, | ||
229 | NULL, | ||
230 | NULL, NULL, | ||
231 | (addr_conv_t *)_1ps->scribble); | ||
232 | |||
233 | /* TODO: raid6 */ | ||
234 | _1ps->tx = async_xor(_1ps->pages[sp2d->data_devs], _1ps->pages, | ||
235 | 0, sp2d->data_devs, PAGE_SIZE, | ||
236 | &_1ps->submit); | ||
237 | } | ||
238 | |||
239 | for (p = 0; p < sp2d->pages_in_unit; p++) { | ||
240 | struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; | ||
241 | /* NOTE: We wait for HW synchronously (I don't have such HW | ||
242 | * to test with.) Is parallelism needed with today's multi | ||
243 | * cores? | ||
244 | */ | ||
245 | async_tx_issue_pending(_1ps->tx); | ||
246 | } | ||
247 | } | ||
248 | |||
249 | void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d, | ||
250 | struct ore_striping_info *si, struct page *page) | ||
251 | { | ||
252 | struct __1_page_stripe *_1ps; | ||
253 | |||
254 | sp2d->needed = true; | ||
255 | |||
256 | _1ps = &sp2d->_1p_stripes[si->cur_pg]; | ||
257 | _1ps->pages[si->cur_comp] = page; | ||
258 | ++_1ps->write_count; | ||
259 | |||
260 | si->cur_pg = (si->cur_pg + 1) % sp2d->pages_in_unit; | ||
261 | /* si->cur_comp is advanced outside at main loop */ | ||
262 | } | ||
263 | |||
264 | void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len, | ||
265 | bool not_last) | ||
266 | { | ||
267 | struct osd_sg_entry *sge; | ||
268 | |||
269 | ORE_DBGMSG("dev=%d cur_len=0x%x not_last=%d cur_sg=%d " | ||
270 | "offset=0x%llx length=0x%x last_sgs_total=0x%x\n", | ||
271 | per_dev->dev, cur_len, not_last, per_dev->cur_sg, | ||
272 | _LLU(per_dev->offset), per_dev->length, | ||
273 | per_dev->last_sgs_total); | ||
274 | |||
275 | if (!per_dev->cur_sg) { | ||
276 | sge = per_dev->sglist; | ||
277 | |||
278 | /* First time we prepare two entries */ | ||
279 | if (per_dev->length) { | ||
280 | ++per_dev->cur_sg; | ||
281 | sge->offset = per_dev->offset; | ||
282 | sge->len = per_dev->length; | ||
283 | } else { | ||
284 | /* Here the parity is the first unit of this object. | ||
285 | * This happens every time we reach a parity device on | ||
286 | * the same stripe as the per_dev->offset. We need to | ||
287 | * just skip this unit. | ||
288 | */ | ||
289 | per_dev->offset += cur_len; | ||
290 | return; | ||
291 | } | ||
292 | } else { | ||
293 | /* finalize the last one */ | ||
294 | sge = &per_dev->sglist[per_dev->cur_sg - 1]; | ||
295 | sge->len = per_dev->length - per_dev->last_sgs_total; | ||
296 | } | ||
297 | |||
298 | if (not_last) { | ||
299 | /* Partly prepare the next one */ | ||
300 | struct osd_sg_entry *next_sge = sge + 1; | ||
301 | |||
302 | ++per_dev->cur_sg; | ||
303 | next_sge->offset = sge->offset + sge->len + cur_len; | ||
304 | /* Save cur len so we know how mutch was added next time */ | ||
305 | per_dev->last_sgs_total = per_dev->length; | ||
306 | next_sge->len = 0; | ||
307 | } else if (!sge->len) { | ||
308 | /* Optimize for when the last unit is a parity */ | ||
309 | --per_dev->cur_sg; | ||
310 | } | ||
311 | } | ||
312 | |||
313 | static int _alloc_read_4_write(struct ore_io_state *ios) | ||
314 | { | ||
315 | struct ore_layout *layout = ios->layout; | ||
316 | int ret; | ||
317 | /* We want to only read those pages not in cache so worst case | ||
318 | * is a stripe populated with every other page | ||
319 | */ | ||
320 | unsigned sgs_per_dev = ios->sp2d->pages_in_unit + 2; | ||
321 | |||
322 | ret = _ore_get_io_state(layout, ios->oc, | ||
323 | layout->group_width * layout->mirrors_p1, | ||
324 | sgs_per_dev, 0, &ios->ios_read_4_write); | ||
325 | return ret; | ||
326 | } | ||
327 | |||
328 | /* @si contains info of the to-be-inserted page. Update of @si should be | ||
329 | * maintained by caller. Specificaly si->dev, si->obj_offset, ... | ||
330 | */ | ||
331 | static int _add_to_read_4_write(struct ore_io_state *ios, | ||
332 | struct ore_striping_info *si, struct page *page) | ||
333 | { | ||
334 | struct request_queue *q; | ||
335 | struct ore_per_dev_state *per_dev; | ||
336 | struct ore_io_state *read_ios; | ||
337 | unsigned first_dev = si->dev - (si->dev % | ||
338 | (ios->layout->group_width * ios->layout->mirrors_p1)); | ||
339 | unsigned comp = si->dev - first_dev; | ||
340 | unsigned added_len; | ||
341 | |||
342 | if (!ios->ios_read_4_write) { | ||
343 | int ret = _alloc_read_4_write(ios); | ||
344 | |||
345 | if (unlikely(ret)) | ||
346 | return ret; | ||
347 | } | ||
348 | |||
349 | read_ios = ios->ios_read_4_write; | ||
350 | read_ios->numdevs = ios->layout->group_width * ios->layout->mirrors_p1; | ||
351 | |||
352 | per_dev = &read_ios->per_dev[comp]; | ||
353 | if (!per_dev->length) { | ||
354 | per_dev->bio = bio_kmalloc(GFP_KERNEL, | ||
355 | ios->sp2d->pages_in_unit); | ||
356 | if (unlikely(!per_dev->bio)) { | ||
357 | ORE_DBGMSG("Failed to allocate BIO size=%u\n", | ||
358 | ios->sp2d->pages_in_unit); | ||
359 | return -ENOMEM; | ||
360 | } | ||
361 | per_dev->offset = si->obj_offset; | ||
362 | per_dev->dev = si->dev; | ||
363 | } else if (si->obj_offset != (per_dev->offset + per_dev->length)) { | ||
364 | u64 gap = si->obj_offset - (per_dev->offset + per_dev->length); | ||
365 | |||
366 | _ore_add_sg_seg(per_dev, gap, true); | ||
367 | } | ||
368 | q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); | ||
369 | added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0); | ||
370 | if (unlikely(added_len != PAGE_SIZE)) { | ||
371 | ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", | ||
372 | per_dev->bio->bi_vcnt); | ||
373 | return -ENOMEM; | ||
374 | } | ||
375 | |||
376 | per_dev->length += PAGE_SIZE; | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) | ||
381 | { | ||
382 | struct bio_vec *bv; | ||
383 | unsigned i, d; | ||
384 | |||
385 | /* loop on all devices all pages */ | ||
386 | for (d = 0; d < ios->numdevs; d++) { | ||
387 | struct bio *bio = ios->per_dev[d].bio; | ||
388 | |||
389 | if (!bio) | ||
390 | continue; | ||
391 | |||
392 | __bio_for_each_segment(bv, bio, i, 0) { | ||
393 | struct page *page = bv->bv_page; | ||
394 | |||
395 | SetPageUptodate(page); | ||
396 | if (PageError(page)) | ||
397 | ClearPageError(page); | ||
398 | } | ||
399 | } | ||
400 | } | ||
401 | |||
402 | /* read_4_write is hacked to read the start of the first stripe and/or | ||
403 | * the end of the last stripe. If needed, with an sg-gap at each device/page. | ||
404 | * It is assumed to be called after the to_be_written pages of the first stripe | ||
405 | * are populating ios->sp2d[][] | ||
406 | * | ||
407 | * NOTE: We call ios->r4w->lock_fn for all pages needed for parity calculations | ||
408 | * These pages are held at sp2d[p].pages[c] but with | ||
409 | * sp2d[p].page_is_read[c] = true. At _sp2d_reset these pages are | ||
410 | * ios->r4w->lock_fn(). The ios->r4w->lock_fn might signal that the page is | ||
411 | * @uptodate=true, so we don't need to read it, only unlock, after IO. | ||
412 | * | ||
413 | * TODO: The read_4_write should calc a need_to_read_pages_count, if bigger then | ||
414 | * to-be-written count, we should consider the xor-in-place mode. | ||
415 | * need_to_read_pages_count is the actual number of pages not present in cache. | ||
416 | * maybe "devs_in_group - ios->sp2d[p].write_count" is a good enough | ||
417 | * approximation? In this mode the read pages are put in the empty places of | ||
418 | * ios->sp2d[p][*], xor is calculated the same way. These pages are | ||
419 | * allocated/freed and don't go through cache | ||
420 | */ | ||
421 | static int _read_4_write(struct ore_io_state *ios) | ||
422 | { | ||
423 | struct ore_io_state *ios_read; | ||
424 | struct ore_striping_info read_si; | ||
425 | struct __stripe_pages_2d *sp2d = ios->sp2d; | ||
426 | u64 offset = ios->si.first_stripe_start; | ||
427 | u64 last_stripe_end; | ||
428 | unsigned bytes_in_stripe = ios->si.bytes_in_stripe; | ||
429 | unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1; | ||
430 | int ret; | ||
431 | |||
432 | if (offset == ios->offset) /* Go to start collect $200 */ | ||
433 | goto read_last_stripe; | ||
434 | |||
435 | min_p = _sp2d_min_pg(sp2d); | ||
436 | max_p = _sp2d_max_pg(sp2d); | ||
437 | |||
438 | for (c = 0; ; c++) { | ||
439 | ore_calc_stripe_info(ios->layout, offset, 0, &read_si); | ||
440 | read_si.obj_offset += min_p * PAGE_SIZE; | ||
441 | offset += min_p * PAGE_SIZE; | ||
442 | for (p = min_p; p <= max_p; p++) { | ||
443 | struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; | ||
444 | struct page **pp = &_1ps->pages[c]; | ||
445 | bool uptodate; | ||
446 | |||
447 | if (*pp) | ||
448 | /* to-be-written pages start here */ | ||
449 | goto read_last_stripe; | ||
450 | |||
451 | *pp = ios->r4w->get_page(ios->private, offset, | ||
452 | &uptodate); | ||
453 | if (unlikely(!*pp)) | ||
454 | return -ENOMEM; | ||
455 | |||
456 | if (!uptodate) | ||
457 | _add_to_read_4_write(ios, &read_si, *pp); | ||
458 | |||
459 | /* Mark read-pages to be cache_released */ | ||
460 | _1ps->page_is_read[c] = true; | ||
461 | read_si.obj_offset += PAGE_SIZE; | ||
462 | offset += PAGE_SIZE; | ||
463 | } | ||
464 | offset += (sp2d->pages_in_unit - p) * PAGE_SIZE; | ||
465 | } | ||
466 | |||
467 | read_last_stripe: | ||
468 | offset = ios->offset + (ios->length + PAGE_SIZE - 1) / | ||
469 | PAGE_SIZE * PAGE_SIZE; | ||
470 | last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) | ||
471 | * bytes_in_stripe; | ||
472 | if (offset == last_stripe_end) /* Optimize for the aligned case */ | ||
473 | goto read_it; | ||
474 | |||
475 | ore_calc_stripe_info(ios->layout, offset, 0, &read_si); | ||
476 | p = read_si.unit_off / PAGE_SIZE; | ||
477 | c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, | ||
478 | ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); | ||
479 | |||
480 | BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end); | ||
481 | /* unaligned IO must be within a single stripe */ | ||
482 | |||
483 | if (min_p == sp2d->pages_in_unit) { | ||
484 | /* Didn't do it yet */ | ||
485 | min_p = _sp2d_min_pg(sp2d); | ||
486 | max_p = _sp2d_max_pg(sp2d); | ||
487 | } | ||
488 | |||
489 | while (offset < last_stripe_end) { | ||
490 | struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; | ||
491 | |||
492 | if ((min_p <= p) && (p <= max_p)) { | ||
493 | struct page *page; | ||
494 | bool uptodate; | ||
495 | |||
496 | BUG_ON(_1ps->pages[c]); | ||
497 | page = ios->r4w->get_page(ios->private, offset, | ||
498 | &uptodate); | ||
499 | if (unlikely(!page)) | ||
500 | return -ENOMEM; | ||
501 | |||
502 | _1ps->pages[c] = page; | ||
503 | /* Mark read-pages to be cache_released */ | ||
504 | _1ps->page_is_read[c] = true; | ||
505 | if (!uptodate) | ||
506 | _add_to_read_4_write(ios, &read_si, page); | ||
507 | } | ||
508 | |||
509 | offset += PAGE_SIZE; | ||
510 | if (p == (sp2d->pages_in_unit - 1)) { | ||
511 | ++c; | ||
512 | p = 0; | ||
513 | ore_calc_stripe_info(ios->layout, offset, 0, &read_si); | ||
514 | } else { | ||
515 | read_si.obj_offset += PAGE_SIZE; | ||
516 | ++p; | ||
517 | } | ||
518 | } | ||
519 | |||
520 | read_it: | ||
521 | ios_read = ios->ios_read_4_write; | ||
522 | if (!ios_read) | ||
523 | return 0; | ||
524 | |||
525 | /* FIXME: Ugly to signal _sbi_read_mirror that we have bio(s). Change | ||
526 | * to check for per_dev->bio | ||
527 | */ | ||
528 | ios_read->pages = ios->pages; | ||
529 | |||
530 | /* Now read these devices */ | ||
531 | for (i = 0; i < ios_read->numdevs; i += ios_read->layout->mirrors_p1) { | ||
532 | ret = _ore_read_mirror(ios_read, i); | ||
533 | if (unlikely(ret)) | ||
534 | return ret; | ||
535 | } | ||
536 | |||
537 | ret = ore_io_execute(ios_read); /* Synchronus execution */ | ||
538 | if (unlikely(ret)) { | ||
539 | ORE_DBGMSG("!! ore_io_execute => %d\n", ret); | ||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | _mark_read4write_pages_uptodate(ios_read, ret); | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | /* In writes @cur_len means length left. .i.e cur_len==0 is the last parity U */ | ||
548 | int _ore_add_parity_unit(struct ore_io_state *ios, | ||
549 | struct ore_striping_info *si, | ||
550 | struct ore_per_dev_state *per_dev, | ||
551 | unsigned cur_len) | ||
552 | { | ||
553 | if (ios->reading) { | ||
554 | BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev); | ||
555 | _ore_add_sg_seg(per_dev, cur_len, true); | ||
556 | } else { | ||
557 | struct __stripe_pages_2d *sp2d = ios->sp2d; | ||
558 | struct page **pages = ios->parity_pages + ios->cur_par_page; | ||
559 | unsigned num_pages; | ||
560 | unsigned array_start = 0; | ||
561 | unsigned i; | ||
562 | int ret; | ||
563 | |||
564 | si->cur_pg = _sp2d_min_pg(sp2d); | ||
565 | num_pages = _sp2d_max_pg(sp2d) + 1 - si->cur_pg; | ||
566 | |||
567 | if (!cur_len) /* If last stripe operate on parity comp */ | ||
568 | si->cur_comp = sp2d->data_devs; | ||
569 | |||
570 | if (!per_dev->length) { | ||
571 | per_dev->offset += si->cur_pg * PAGE_SIZE; | ||
572 | /* If first stripe, Read in all read4write pages | ||
573 | * (if needed) before we calculate the first parity. | ||
574 | */ | ||
575 | _read_4_write(ios); | ||
576 | } | ||
577 | |||
578 | for (i = 0; i < num_pages; i++) { | ||
579 | pages[i] = _raid_page_alloc(); | ||
580 | if (unlikely(!pages[i])) | ||
581 | return -ENOMEM; | ||
582 | |||
583 | ++(ios->cur_par_page); | ||
584 | } | ||
585 | |||
586 | BUG_ON(si->cur_comp != sp2d->data_devs); | ||
587 | BUG_ON(si->cur_pg + num_pages > sp2d->pages_in_unit); | ||
588 | |||
589 | ret = _ore_add_stripe_unit(ios, &array_start, 0, pages, | ||
590 | per_dev, num_pages * PAGE_SIZE); | ||
591 | if (unlikely(ret)) | ||
592 | return ret; | ||
593 | |||
594 | /* TODO: raid6 if (last_parity_dev) */ | ||
595 | _gen_xor_unit(sp2d); | ||
596 | _sp2d_reset(sp2d, ios->r4w, ios->private); | ||
597 | } | ||
598 | return 0; | ||
599 | } | ||
600 | |||
601 | int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) | ||
602 | { | ||
603 | struct ore_layout *layout = ios->layout; | ||
604 | |||
605 | if (ios->parity_pages) { | ||
606 | unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; | ||
607 | unsigned stripe_size = ios->si.bytes_in_stripe; | ||
608 | u64 last_stripe, first_stripe; | ||
609 | |||
610 | if (_sp2d_alloc(pages_in_unit, layout->group_width, | ||
611 | layout->parity, &ios->sp2d)) { | ||
612 | return -ENOMEM; | ||
613 | } | ||
614 | |||
615 | BUG_ON(ios->offset % PAGE_SIZE); | ||
616 | |||
617 | /* Round io down to last full strip */ | ||
618 | first_stripe = div_u64(ios->offset, stripe_size); | ||
619 | last_stripe = div_u64(ios->offset + ios->length, stripe_size); | ||
620 | |||
621 | /* If an IO spans more then a single stripe it must end at | ||
622 | * a stripe boundary. The reminder at the end is pushed into the | ||
623 | * next IO. | ||
624 | */ | ||
625 | if (last_stripe != first_stripe) { | ||
626 | ios->length = last_stripe * stripe_size - ios->offset; | ||
627 | |||
628 | BUG_ON(!ios->length); | ||
629 | ios->nr_pages = (ios->length + PAGE_SIZE - 1) / | ||
630 | PAGE_SIZE; | ||
631 | ios->si.length = ios->length; /*make it consistent */ | ||
632 | } | ||
633 | } | ||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | void _ore_free_raid_stuff(struct ore_io_state *ios) | ||
638 | { | ||
639 | if (ios->sp2d) { /* writing and raid */ | ||
640 | unsigned i; | ||
641 | |||
642 | for (i = 0; i < ios->cur_par_page; i++) { | ||
643 | struct page *page = ios->parity_pages[i]; | ||
644 | |||
645 | if (page) | ||
646 | _raid_page_free(page); | ||
647 | } | ||
648 | if (ios->extra_part_alloc) | ||
649 | kfree(ios->parity_pages); | ||
650 | /* If IO returned an error pages might need unlocking */ | ||
651 | _sp2d_reset(ios->sp2d, ios->r4w, ios->private); | ||
652 | _sp2d_free(ios->sp2d); | ||
653 | } else { | ||
654 | /* Will only be set if raid reading && sglist is big */ | ||
655 | if (ios->extra_part_alloc) | ||
656 | kfree(ios->per_dev[0].sglist); | ||
657 | } | ||
658 | if (ios->ios_read_4_write) | ||
659 | ore_put_io_state(ios->ios_read_4_write); | ||
660 | } | ||
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h new file mode 100644 index 000000000000..2ffd2c3c6e46 --- /dev/null +++ b/fs/exofs/ore_raid.h | |||
@@ -0,0 +1,79 @@ | |||
1 | /* | ||
2 | * Copyright (C) from 2011 | ||
3 | * Boaz Harrosh <bharrosh@panasas.com> | ||
4 | * | ||
5 | * This file is part of the objects raid engine (ore). | ||
6 | * | ||
7 | * It is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as published | ||
9 | * by the Free Software Foundation. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with "ore". If not, write to the Free Software Foundation, Inc: | ||
13 | * "Free Software Foundation <info@fsf.org>" | ||
14 | */ | ||
15 | |||
16 | #include <scsi/osd_ore.h> | ||
17 | |||
18 | #define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a) | ||
19 | |||
20 | #ifdef CONFIG_EXOFS_DEBUG | ||
21 | #define ORE_DBGMSG(fmt, a...) \ | ||
22 | printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a) | ||
23 | #else | ||
24 | #define ORE_DBGMSG(fmt, a...) \ | ||
25 | do { if (0) printk(fmt, ##a); } while (0) | ||
26 | #endif | ||
27 | |||
28 | /* u64 has problems with printk this will cast it to unsigned long long */ | ||
29 | #define _LLU(x) (unsigned long long)(x) | ||
30 | |||
31 | #define ORE_DBGMSG2(M...) do {} while (0) | ||
32 | /* #define ORE_DBGMSG2 ORE_DBGMSG */ | ||
33 | |||
34 | /* Calculate the component order in a stripe. eg the logical data unit | ||
35 | * address within the stripe of @dev given the @par_dev of this stripe. | ||
36 | */ | ||
37 | static inline unsigned _dev_order(unsigned devs_in_group, unsigned mirrors_p1, | ||
38 | unsigned par_dev, unsigned dev) | ||
39 | { | ||
40 | unsigned first_dev = dev - dev % devs_in_group; | ||
41 | |||
42 | dev -= first_dev; | ||
43 | par_dev -= first_dev; | ||
44 | |||
45 | if (devs_in_group == par_dev) /* The raid 0 case */ | ||
46 | return dev / mirrors_p1; | ||
47 | /* raid4/5/6 case */ | ||
48 | return ((devs_in_group + dev - par_dev - mirrors_p1) % devs_in_group) / | ||
49 | mirrors_p1; | ||
50 | } | ||
51 | |||
52 | /* ios_raid.c stuff needed by ios.c */ | ||
53 | int _ore_post_alloc_raid_stuff(struct ore_io_state *ios); | ||
54 | void _ore_free_raid_stuff(struct ore_io_state *ios); | ||
55 | |||
56 | void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len, | ||
57 | bool not_last); | ||
58 | int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si, | ||
59 | struct ore_per_dev_state *per_dev, unsigned cur_len); | ||
60 | void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d, | ||
61 | struct ore_striping_info *si, struct page *page); | ||
62 | static inline void _add_stripe_page(struct __stripe_pages_2d *sp2d, | ||
63 | struct ore_striping_info *si, struct page *page) | ||
64 | { | ||
65 | if (!sp2d) /* Inline the fast path */ | ||
66 | return; /* Hay no raid stuff */ | ||
67 | _ore_add_stripe_page(sp2d, si, page); | ||
68 | } | ||
69 | |||
70 | /* ios.c stuff needed by ios_raid.c */ | ||
71 | int _ore_get_io_state(struct ore_layout *layout, | ||
72 | struct ore_components *oc, unsigned numdevs, | ||
73 | unsigned sgs_per_dev, unsigned num_par_pages, | ||
74 | struct ore_io_state **pios); | ||
75 | int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, | ||
76 | unsigned pgbase, struct page **pages, | ||
77 | struct ore_per_dev_state *per_dev, int cur_len); | ||
78 | int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp); | ||
79 | int ore_io_execute(struct ore_io_state *ios); | ||
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 274894053b02..057b237b8b69 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -266,7 +266,7 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) | |||
266 | struct ore_io_state *ios; | 266 | struct ore_io_state *ios; |
267 | int ret; | 267 | int ret; |
268 | 268 | ||
269 | ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); | 269 | ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios); |
270 | if (unlikely(ret)) { | 270 | if (unlikely(ret)) { |
271 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); | 271 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); |
272 | return ret; | 272 | return ret; |
@@ -321,7 +321,7 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) | |||
321 | struct ore_io_state *ios; | 321 | struct ore_io_state *ios; |
322 | int ret; | 322 | int ret; |
323 | 323 | ||
324 | ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); | 324 | ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios); |
325 | if (unlikely(ret)) { | 325 | if (unlikely(ret)) { |
326 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); | 326 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); |
327 | return ret; | 327 | return ret; |
@@ -355,12 +355,12 @@ static const struct export_operations exofs_export_ops; | |||
355 | /* | 355 | /* |
356 | * Write the superblock to the OSD | 356 | * Write the superblock to the OSD |
357 | */ | 357 | */ |
358 | int exofs_sync_fs(struct super_block *sb, int wait) | 358 | static int exofs_sync_fs(struct super_block *sb, int wait) |
359 | { | 359 | { |
360 | struct exofs_sb_info *sbi; | 360 | struct exofs_sb_info *sbi; |
361 | struct exofs_fscb *fscb; | 361 | struct exofs_fscb *fscb; |
362 | struct ore_comp one_comp; | 362 | struct ore_comp one_comp; |
363 | struct ore_components comps; | 363 | struct ore_components oc; |
364 | struct ore_io_state *ios; | 364 | struct ore_io_state *ios; |
365 | int ret = -ENOMEM; | 365 | int ret = -ENOMEM; |
366 | 366 | ||
@@ -378,9 +378,9 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
378 | * the writeable info is set in exofs_sbi_write_stats() above. | 378 | * the writeable info is set in exofs_sbi_write_stats() above. |
379 | */ | 379 | */ |
380 | 380 | ||
381 | exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID); | 381 | exofs_init_comps(&oc, &one_comp, sbi, EXOFS_SUPER_ID); |
382 | 382 | ||
383 | ret = ore_get_io_state(&sbi->layout, &comps, &ios); | 383 | ret = ore_get_io_state(&sbi->layout, &oc, &ios); |
384 | if (unlikely(ret)) | 384 | if (unlikely(ret)) |
385 | goto out; | 385 | goto out; |
386 | 386 | ||
@@ -429,19 +429,20 @@ static void _exofs_print_device(const char *msg, const char *dev_path, | |||
429 | msg, dev_path ?: "", odi->osdname, _LLU(pid)); | 429 | msg, dev_path ?: "", odi->osdname, _LLU(pid)); |
430 | } | 430 | } |
431 | 431 | ||
432 | void exofs_free_sbi(struct exofs_sb_info *sbi) | 432 | static void exofs_free_sbi(struct exofs_sb_info *sbi) |
433 | { | 433 | { |
434 | while (sbi->comps.numdevs) { | 434 | unsigned numdevs = sbi->oc.numdevs; |
435 | int i = --sbi->comps.numdevs; | 435 | |
436 | struct osd_dev *od = sbi->comps.ods[i]; | 436 | while (numdevs) { |
437 | unsigned i = --numdevs; | ||
438 | struct osd_dev *od = ore_comp_dev(&sbi->oc, i); | ||
437 | 439 | ||
438 | if (od) { | 440 | if (od) { |
439 | sbi->comps.ods[i] = NULL; | 441 | ore_comp_set_dev(&sbi->oc, i, NULL); |
440 | osduld_put_device(od); | 442 | osduld_put_device(od); |
441 | } | 443 | } |
442 | } | 444 | } |
443 | if (sbi->comps.ods != sbi->_min_one_dev) | 445 | kfree(sbi->oc.ods); |
444 | kfree(sbi->comps.ods); | ||
445 | kfree(sbi); | 446 | kfree(sbi); |
446 | } | 447 | } |
447 | 448 | ||
@@ -468,7 +469,7 @@ static void exofs_put_super(struct super_block *sb) | |||
468 | msecs_to_jiffies(100)); | 469 | msecs_to_jiffies(100)); |
469 | } | 470 | } |
470 | 471 | ||
471 | _exofs_print_device("Unmounting", NULL, sbi->comps.ods[0], | 472 | _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0), |
472 | sbi->one_comp.obj.partition); | 473 | sbi->one_comp.obj.partition); |
473 | 474 | ||
474 | bdi_destroy(&sbi->bdi); | 475 | bdi_destroy(&sbi->bdi); |
@@ -479,76 +480,20 @@ static void exofs_put_super(struct super_block *sb) | |||
479 | static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | 480 | static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, |
480 | struct exofs_device_table *dt) | 481 | struct exofs_device_table *dt) |
481 | { | 482 | { |
482 | u64 stripe_length; | 483 | int ret; |
483 | 484 | ||
484 | sbi->data_map.odm_num_comps = | 485 | sbi->layout.stripe_unit = |
485 | le32_to_cpu(dt->dt_data_map.cb_num_comps); | ||
486 | sbi->data_map.odm_stripe_unit = | ||
487 | le64_to_cpu(dt->dt_data_map.cb_stripe_unit); | 486 | le64_to_cpu(dt->dt_data_map.cb_stripe_unit); |
488 | sbi->data_map.odm_group_width = | 487 | sbi->layout.group_width = |
489 | le32_to_cpu(dt->dt_data_map.cb_group_width); | 488 | le32_to_cpu(dt->dt_data_map.cb_group_width); |
490 | sbi->data_map.odm_group_depth = | 489 | sbi->layout.group_depth = |
491 | le32_to_cpu(dt->dt_data_map.cb_group_depth); | 490 | le32_to_cpu(dt->dt_data_map.cb_group_depth); |
492 | sbi->data_map.odm_mirror_cnt = | 491 | sbi->layout.mirrors_p1 = |
493 | le32_to_cpu(dt->dt_data_map.cb_mirror_cnt); | 492 | le32_to_cpu(dt->dt_data_map.cb_mirror_cnt) + 1; |
494 | sbi->data_map.odm_raid_algorithm = | 493 | sbi->layout.raid_algorithm = |
495 | le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); | 494 | le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); |
496 | 495 | ||
497 | /* FIXME: Only raid0 for now. if not so, do not mount */ | 496 | ret = ore_verify_layout(numdevs, &sbi->layout); |
498 | if (sbi->data_map.odm_num_comps != numdevs) { | ||
499 | EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n", | ||
500 | sbi->data_map.odm_num_comps, numdevs); | ||
501 | return -EINVAL; | ||
502 | } | ||
503 | if (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) { | ||
504 | EXOFS_ERR("Only RAID_0 for now\n"); | ||
505 | return -EINVAL; | ||
506 | } | ||
507 | if (0 != (numdevs % (sbi->data_map.odm_mirror_cnt + 1))) { | ||
508 | EXOFS_ERR("Data Map wrong, numdevs=%d mirrors=%d\n", | ||
509 | numdevs, sbi->data_map.odm_mirror_cnt); | ||
510 | return -EINVAL; | ||
511 | } | ||
512 | |||
513 | if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) { | ||
514 | EXOFS_ERR("Stripe Unit(0x%llx)" | ||
515 | " must be Multples of PAGE_SIZE(0x%lx)\n", | ||
516 | _LLU(sbi->data_map.odm_stripe_unit), PAGE_SIZE); | ||
517 | return -EINVAL; | ||
518 | } | ||
519 | |||
520 | sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit; | ||
521 | sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1; | ||
522 | |||
523 | if (sbi->data_map.odm_group_width) { | ||
524 | sbi->layout.group_width = sbi->data_map.odm_group_width; | ||
525 | sbi->layout.group_depth = sbi->data_map.odm_group_depth; | ||
526 | if (!sbi->layout.group_depth) { | ||
527 | EXOFS_ERR("group_depth == 0 && group_width != 0\n"); | ||
528 | return -EINVAL; | ||
529 | } | ||
530 | sbi->layout.group_count = sbi->data_map.odm_num_comps / | ||
531 | sbi->layout.mirrors_p1 / | ||
532 | sbi->data_map.odm_group_width; | ||
533 | } else { | ||
534 | if (sbi->data_map.odm_group_depth) { | ||
535 | printk(KERN_NOTICE "Warning: group_depth ignored " | ||
536 | "group_width == 0 && group_depth == %d\n", | ||
537 | sbi->data_map.odm_group_depth); | ||
538 | sbi->data_map.odm_group_depth = 0; | ||
539 | } | ||
540 | sbi->layout.group_width = sbi->data_map.odm_num_comps / | ||
541 | sbi->layout.mirrors_p1; | ||
542 | sbi->layout.group_depth = -1; | ||
543 | sbi->layout.group_count = 1; | ||
544 | } | ||
545 | |||
546 | stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit; | ||
547 | if (stripe_length >= (1ULL << 32)) { | ||
548 | EXOFS_ERR("Total Stripe length(0x%llx)" | ||
549 | " >= 32bit is not supported\n", _LLU(stripe_length)); | ||
550 | return -EINVAL; | ||
551 | } | ||
552 | 497 | ||
553 | EXOFS_DBGMSG("exofs: layout: " | 498 | EXOFS_DBGMSG("exofs: layout: " |
554 | "num_comps=%u stripe_unit=0x%x group_width=%u " | 499 | "num_comps=%u stripe_unit=0x%x group_width=%u " |
@@ -558,8 +503,8 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | |||
558 | sbi->layout.group_width, | 503 | sbi->layout.group_width, |
559 | _LLU(sbi->layout.group_depth), | 504 | _LLU(sbi->layout.group_depth), |
560 | sbi->layout.mirrors_p1, | 505 | sbi->layout.mirrors_p1, |
561 | sbi->data_map.odm_raid_algorithm); | 506 | sbi->layout.raid_algorithm); |
562 | return 0; | 507 | return ret; |
563 | } | 508 | } |
564 | 509 | ||
565 | static unsigned __ra_pages(struct ore_layout *layout) | 510 | static unsigned __ra_pages(struct ore_layout *layout) |
@@ -605,12 +550,40 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, | |||
605 | return !(odi->systemid_len || odi->osdname_len); | 550 | return !(odi->systemid_len || odi->osdname_len); |
606 | } | 551 | } |
607 | 552 | ||
553 | int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs, | ||
554 | struct exofs_dev **peds) | ||
555 | { | ||
556 | struct __alloc_ore_devs_and_exofs_devs { | ||
557 | /* Twice bigger table: See exofs_init_comps() and comment at | ||
558 | * exofs_read_lookup_dev_table() | ||
559 | */ | ||
560 | struct ore_dev *oreds[numdevs * 2 - 1]; | ||
561 | struct exofs_dev eds[numdevs]; | ||
562 | } *aoded; | ||
563 | struct exofs_dev *eds; | ||
564 | unsigned i; | ||
565 | |||
566 | aoded = kzalloc(sizeof(*aoded), GFP_KERNEL); | ||
567 | if (unlikely(!aoded)) { | ||
568 | EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", | ||
569 | numdevs); | ||
570 | return -ENOMEM; | ||
571 | } | ||
572 | |||
573 | sbi->oc.ods = aoded->oreds; | ||
574 | *peds = eds = aoded->eds; | ||
575 | for (i = 0; i < numdevs; ++i) | ||
576 | aoded->oreds[i] = &eds[i].ored; | ||
577 | return 0; | ||
578 | } | ||
579 | |||
608 | static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | 580 | static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, |
609 | struct osd_dev *fscb_od, | 581 | struct osd_dev *fscb_od, |
610 | unsigned table_count) | 582 | unsigned table_count) |
611 | { | 583 | { |
612 | struct ore_comp comp; | 584 | struct ore_comp comp; |
613 | struct exofs_device_table *dt; | 585 | struct exofs_device_table *dt; |
586 | struct exofs_dev *eds; | ||
614 | unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + | 587 | unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + |
615 | sizeof(*dt); | 588 | sizeof(*dt); |
616 | unsigned numdevs, i; | 589 | unsigned numdevs, i; |
@@ -623,7 +596,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | |||
623 | return -ENOMEM; | 596 | return -ENOMEM; |
624 | } | 597 | } |
625 | 598 | ||
626 | sbi->comps.numdevs = 0; | 599 | sbi->oc.numdevs = 0; |
627 | 600 | ||
628 | comp.obj.partition = sbi->one_comp.obj.partition; | 601 | comp.obj.partition = sbi->one_comp.obj.partition; |
629 | comp.obj.id = EXOFS_DEVTABLE_ID; | 602 | comp.obj.id = EXOFS_DEVTABLE_ID; |
@@ -647,20 +620,16 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | |||
647 | if (unlikely(ret)) | 620 | if (unlikely(ret)) |
648 | goto out; | 621 | goto out; |
649 | 622 | ||
650 | if (likely(numdevs > 1)) { | 623 | ret = __alloc_dev_table(sbi, numdevs, &eds); |
651 | unsigned size = numdevs * sizeof(sbi->comps.ods[0]); | 624 | if (unlikely(ret)) |
652 | 625 | goto out; | |
653 | /* Twice bigger table: See exofs_init_comps() and below | 626 | /* exofs round-robins the device table view according to inode |
654 | * comment | 627 | * number. We hold a: twice bigger table hence inodes can point |
655 | */ | 628 | * to any device and have a sequential view of the table |
656 | sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL); | 629 | * starting at this device. See exofs_init_comps() |
657 | if (unlikely(!sbi->comps.ods)) { | 630 | */ |
658 | EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", | 631 | memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0], |
659 | numdevs); | 632 | (numdevs - 1) * sizeof(sbi->oc.ods[0])); |
660 | ret = -ENOMEM; | ||
661 | goto out; | ||
662 | } | ||
663 | } | ||
664 | 633 | ||
665 | for (i = 0; i < numdevs; i++) { | 634 | for (i = 0; i < numdevs; i++) { |
666 | struct exofs_fscb fscb; | 635 | struct exofs_fscb fscb; |
@@ -676,13 +645,16 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | |||
676 | printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n", | 645 | printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n", |
677 | i, odi.osdname); | 646 | i, odi.osdname); |
678 | 647 | ||
648 | /* the exofs id is currently the table index */ | ||
649 | eds[i].did = i; | ||
650 | |||
679 | /* On all devices the device table is identical. The user can | 651 | /* On all devices the device table is identical. The user can |
680 | * specify any one of the participating devices on the command | 652 | * specify any one of the participating devices on the command |
681 | * line. We always keep them in device-table order. | 653 | * line. We always keep them in device-table order. |
682 | */ | 654 | */ |
683 | if (fscb_od && osduld_device_same(fscb_od, &odi)) { | 655 | if (fscb_od && osduld_device_same(fscb_od, &odi)) { |
684 | sbi->comps.ods[i] = fscb_od; | 656 | eds[i].ored.od = fscb_od; |
685 | ++sbi->comps.numdevs; | 657 | ++sbi->oc.numdevs; |
686 | fscb_od = NULL; | 658 | fscb_od = NULL; |
687 | continue; | 659 | continue; |
688 | } | 660 | } |
@@ -695,8 +667,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | |||
695 | goto out; | 667 | goto out; |
696 | } | 668 | } |
697 | 669 | ||
698 | sbi->comps.ods[i] = od; | 670 | eds[i].ored.od = od; |
699 | ++sbi->comps.numdevs; | 671 | ++sbi->oc.numdevs; |
700 | 672 | ||
701 | /* Read the fscb of the other devices to make sure the FS | 673 | /* Read the fscb of the other devices to make sure the FS |
702 | * partition is there. | 674 | * partition is there. |
@@ -718,21 +690,10 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | |||
718 | 690 | ||
719 | out: | 691 | out: |
720 | kfree(dt); | 692 | kfree(dt); |
721 | if (likely(!ret)) { | 693 | if (unlikely(fscb_od && !ret)) { |
722 | unsigned numdevs = sbi->comps.numdevs; | ||
723 | |||
724 | if (unlikely(fscb_od)) { | ||
725 | EXOFS_ERR("ERROR: Bad device-table container device not present\n"); | 694 | EXOFS_ERR("ERROR: Bad device-table container device not present\n"); |
726 | osduld_put_device(fscb_od); | 695 | osduld_put_device(fscb_od); |
727 | return -EINVAL; | 696 | return -EINVAL; |
728 | } | ||
729 | /* exofs round-robins the device table view according to inode | ||
730 | * number. We hold a: twice bigger table hence inodes can point | ||
731 | * to any device and have a sequential view of the table | ||
732 | * starting at this device. See exofs_init_comps() | ||
733 | */ | ||
734 | for (i = 0; i < numdevs - 1; ++i) | ||
735 | sbi->comps.ods[i + numdevs] = sbi->comps.ods[i]; | ||
736 | } | 697 | } |
737 | return ret; | 698 | return ret; |
738 | } | 699 | } |
@@ -783,10 +744,9 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
783 | sbi->one_comp.obj.partition = opts->pid; | 744 | sbi->one_comp.obj.partition = opts->pid; |
784 | sbi->one_comp.obj.id = 0; | 745 | sbi->one_comp.obj.id = 0; |
785 | exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); | 746 | exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); |
786 | sbi->comps.numdevs = 1; | 747 | sbi->oc.numdevs = 1; |
787 | sbi->comps.single_comp = EC_SINGLE_COMP; | 748 | sbi->oc.single_comp = EC_SINGLE_COMP; |
788 | sbi->comps.comps = &sbi->one_comp; | 749 | sbi->oc.comps = &sbi->one_comp; |
789 | sbi->comps.ods = sbi->_min_one_dev; | ||
790 | 750 | ||
791 | /* fill in some other data by hand */ | 751 | /* fill in some other data by hand */ |
792 | memset(sb->s_id, 0, sizeof(sb->s_id)); | 752 | memset(sb->s_id, 0, sizeof(sb->s_id)); |
@@ -835,7 +795,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
835 | if (unlikely(ret)) | 795 | if (unlikely(ret)) |
836 | goto free_sbi; | 796 | goto free_sbi; |
837 | } else { | 797 | } else { |
838 | sbi->comps.ods[0] = od; | 798 | struct exofs_dev *eds; |
799 | |||
800 | ret = __alloc_dev_table(sbi, 1, &eds); | ||
801 | if (unlikely(ret)) | ||
802 | goto free_sbi; | ||
803 | |||
804 | ore_comp_set_dev(&sbi->oc, 0, od); | ||
839 | } | 805 | } |
840 | 806 | ||
841 | __sbi_read_stats(sbi); | 807 | __sbi_read_stats(sbi); |
@@ -875,7 +841,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
875 | goto free_sbi; | 841 | goto free_sbi; |
876 | } | 842 | } |
877 | 843 | ||
878 | _exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0], | 844 | _exofs_print_device("Mounting", opts->dev_name, |
845 | ore_comp_dev(&sbi->oc, 0), | ||
879 | sbi->one_comp.obj.partition); | 846 | sbi->one_comp.obj.partition); |
880 | return 0; | 847 | return 0; |
881 | 848 | ||
@@ -924,7 +891,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
924 | uint64_t used = ULLONG_MAX; | 891 | uint64_t used = ULLONG_MAX; |
925 | int ret; | 892 | int ret; |
926 | 893 | ||
927 | ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); | 894 | ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios); |
928 | if (ret) { | 895 | if (ret) { |
929 | EXOFS_DBGMSG("ore_get_io_state failed.\n"); | 896 | EXOFS_DBGMSG("ore_get_io_state failed.\n"); |
930 | return ret; | 897 | return ret; |
@@ -981,7 +948,7 @@ static const struct super_operations exofs_sops = { | |||
981 | * EXPORT OPERATIONS | 948 | * EXPORT OPERATIONS |
982 | *****************************************************************************/ | 949 | *****************************************************************************/ |
983 | 950 | ||
984 | struct dentry *exofs_get_parent(struct dentry *child) | 951 | static struct dentry *exofs_get_parent(struct dentry *child) |
985 | { | 952 | { |
986 | unsigned long ino = exofs_parent_ino(child); | 953 | unsigned long ino = exofs_parent_ino(child); |
987 | 954 | ||
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index 5d979b4347b0..c922adc8ef41 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c | |||
@@ -46,28 +46,30 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name, | |||
46 | value, size, flags); | 46 | value, size, flags); |
47 | } | 47 | } |
48 | 48 | ||
49 | int | 49 | int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
50 | ext2_init_security(struct inode *inode, struct inode *dir, | 50 | void *fs_info) |
51 | const struct qstr *qstr) | ||
52 | { | 51 | { |
53 | int err; | 52 | const struct xattr *xattr; |
54 | size_t len; | 53 | int err = 0; |
55 | void *value; | ||
56 | char *name; | ||
57 | 54 | ||
58 | err = security_inode_init_security(inode, dir, qstr, &name, &value, &len); | 55 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
59 | if (err) { | 56 | err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, |
60 | if (err == -EOPNOTSUPP) | 57 | xattr->name, xattr->value, |
61 | return 0; | 58 | xattr->value_len, 0); |
62 | return err; | 59 | if (err < 0) |
60 | break; | ||
63 | } | 61 | } |
64 | err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, | ||
65 | name, value, len, 0); | ||
66 | kfree(name); | ||
67 | kfree(value); | ||
68 | return err; | 62 | return err; |
69 | } | 63 | } |
70 | 64 | ||
65 | int | ||
66 | ext2_init_security(struct inode *inode, struct inode *dir, | ||
67 | const struct qstr *qstr) | ||
68 | { | ||
69 | return security_inode_init_security(inode, dir, qstr, | ||
70 | &ext2_initxattrs, NULL); | ||
71 | } | ||
72 | |||
71 | const struct xattr_handler ext2_xattr_security_handler = { | 73 | const struct xattr_handler ext2_xattr_security_handler = { |
72 | .prefix = XATTR_SECURITY_PREFIX, | 74 | .prefix = XATTR_SECURITY_PREFIX, |
73 | .list = ext2_xattr_security_list, | 75 | .list = ext2_xattr_security_list, |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 04da6acde85d..12661e1deedd 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1134,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, | |||
1134 | return bh; | 1134 | return bh; |
1135 | if (buffer_uptodate(bh)) | 1135 | if (buffer_uptodate(bh)) |
1136 | return bh; | 1136 | return bh; |
1137 | ll_rw_block(READ_META, 1, &bh); | 1137 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
1138 | wait_on_buffer(bh); | 1138 | wait_on_buffer(bh); |
1139 | if (buffer_uptodate(bh)) | 1139 | if (buffer_uptodate(bh)) |
1140 | return bh; | 1140 | return bh; |
@@ -2807,7 +2807,7 @@ make_io: | |||
2807 | trace_ext3_load_inode(inode); | 2807 | trace_ext3_load_inode(inode); |
2808 | get_bh(bh); | 2808 | get_bh(bh); |
2809 | bh->b_end_io = end_buffer_read_sync; | 2809 | bh->b_end_io = end_buffer_read_sync; |
2810 | submit_bh(READ_META, bh); | 2810 | submit_bh(READ | REQ_META | REQ_PRIO, bh); |
2811 | wait_on_buffer(bh); | 2811 | wait_on_buffer(bh); |
2812 | if (!buffer_uptodate(bh)) { | 2812 | if (!buffer_uptodate(bh)) { |
2813 | ext3_error(inode->i_sb, "ext3_get_inode_loc", | 2813 | ext3_error(inode->i_sb, "ext3_get_inode_loc", |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 6e18a0b7750d..0629e09f6511 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -922,7 +922,8 @@ restart: | |||
922 | bh = ext3_getblk(NULL, dir, b++, 0, &err); | 922 | bh = ext3_getblk(NULL, dir, b++, 0, &err); |
923 | bh_use[ra_max] = bh; | 923 | bh_use[ra_max] = bh; |
924 | if (bh) | 924 | if (bh) |
925 | ll_rw_block(READ_META, 1, &bh); | 925 | ll_rw_block(READ | REQ_META | REQ_PRIO, |
926 | 1, &bh); | ||
926 | } | 927 | } |
927 | } | 928 | } |
928 | if ((bh = bh_use[ra_ptr++]) == NULL) | 929 | if ((bh = bh_use[ra_ptr++]) == NULL) |
@@ -2209,9 +2210,11 @@ static int ext3_symlink (struct inode * dir, | |||
2209 | /* | 2210 | /* |
2210 | * For non-fast symlinks, we just allocate inode and put it on | 2211 | * For non-fast symlinks, we just allocate inode and put it on |
2211 | * orphan list in the first transaction => we need bitmap, | 2212 | * orphan list in the first transaction => we need bitmap, |
2212 | * group descriptor, sb, inode block, quota blocks. | 2213 | * group descriptor, sb, inode block, quota blocks, and |
2214 | * possibly selinux xattr blocks. | ||
2213 | */ | 2215 | */ |
2214 | credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | 2216 | credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + |
2217 | EXT3_XATTR_TRANS_BLOCKS; | ||
2215 | } else { | 2218 | } else { |
2216 | /* | 2219 | /* |
2217 | * Fast symlink. We have to add entry to directory | 2220 | * Fast symlink. We have to add entry to directory |
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index b8d9f83aa5c5..3c218b8a51d4 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c | |||
@@ -48,28 +48,32 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name, | |||
48 | name, value, size, flags); | 48 | name, value, size, flags); |
49 | } | 49 | } |
50 | 50 | ||
51 | int | 51 | int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
52 | ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir, | 52 | void *fs_info) |
53 | const struct qstr *qstr) | ||
54 | { | 53 | { |
55 | int err; | 54 | const struct xattr *xattr; |
56 | size_t len; | 55 | handle_t *handle = fs_info; |
57 | void *value; | 56 | int err = 0; |
58 | char *name; | ||
59 | 57 | ||
60 | err = security_inode_init_security(inode, dir, qstr, &name, &value, &len); | 58 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
61 | if (err) { | 59 | err = ext3_xattr_set_handle(handle, inode, |
62 | if (err == -EOPNOTSUPP) | 60 | EXT3_XATTR_INDEX_SECURITY, |
63 | return 0; | 61 | xattr->name, xattr->value, |
64 | return err; | 62 | xattr->value_len, 0); |
63 | if (err < 0) | ||
64 | break; | ||
65 | } | 65 | } |
66 | err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY, | ||
67 | name, value, len, 0); | ||
68 | kfree(name); | ||
69 | kfree(value); | ||
70 | return err; | 66 | return err; |
71 | } | 67 | } |
72 | 68 | ||
69 | int | ||
70 | ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir, | ||
71 | const struct qstr *qstr) | ||
72 | { | ||
73 | return security_inode_init_security(inode, dir, qstr, | ||
74 | &ext3_initxattrs, handle); | ||
75 | } | ||
76 | |||
73 | const struct xattr_handler ext3_xattr_security_handler = { | 77 | const struct xattr_handler ext3_xattr_security_handler = { |
74 | .prefix = XATTR_SECURITY_PREFIX, | 78 | .prefix = XATTR_SECURITY_PREFIX, |
75 | .list = ext3_xattr_security_list, | 79 | .list = ext3_xattr_security_list, |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e717dfd2f2b4..b7d7bd0f066e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -175,6 +175,7 @@ struct mpage_da_data { | |||
175 | */ | 175 | */ |
176 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 176 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
177 | #define EXT4_IO_END_ERROR 0x0002 | 177 | #define EXT4_IO_END_ERROR 0x0002 |
178 | #define EXT4_IO_END_QUEUED 0x0004 | ||
178 | 179 | ||
179 | struct ext4_io_page { | 180 | struct ext4_io_page { |
180 | struct page *p_page; | 181 | struct page *p_page; |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index bb85757689b6..5802fa1dab18 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode) | |||
289 | 289 | ||
290 | static inline int ext4_should_writeback_data(struct inode *inode) | 290 | static inline int ext4_should_writeback_data(struct inode *inode) |
291 | { | 291 | { |
292 | if (!S_ISREG(inode->i_mode)) | ||
293 | return 0; | ||
294 | if (EXT4_JOURNAL(inode) == NULL) | 292 | if (EXT4_JOURNAL(inode) == NULL) |
295 | return 1; | 293 | return 1; |
294 | if (!S_ISREG(inode->i_mode)) | ||
295 | return 0; | ||
296 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) | 296 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) |
297 | return 0; | 297 | return 0; |
298 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 298 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index e4095e988eba..b9548f477bb8 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -224,53 +224,8 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin) | |||
224 | maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; | 224 | maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; |
225 | else | 225 | else |
226 | maxbytes = inode->i_sb->s_maxbytes; | 226 | maxbytes = inode->i_sb->s_maxbytes; |
227 | mutex_lock(&inode->i_mutex); | ||
228 | switch (origin) { | ||
229 | case SEEK_END: | ||
230 | offset += inode->i_size; | ||
231 | break; | ||
232 | case SEEK_CUR: | ||
233 | if (offset == 0) { | ||
234 | mutex_unlock(&inode->i_mutex); | ||
235 | return file->f_pos; | ||
236 | } | ||
237 | offset += file->f_pos; | ||
238 | break; | ||
239 | case SEEK_DATA: | ||
240 | /* | ||
241 | * In the generic case the entire file is data, so as long as | ||
242 | * offset isn't at the end of the file then the offset is data. | ||
243 | */ | ||
244 | if (offset >= inode->i_size) { | ||
245 | mutex_unlock(&inode->i_mutex); | ||
246 | return -ENXIO; | ||
247 | } | ||
248 | break; | ||
249 | case SEEK_HOLE: | ||
250 | /* | ||
251 | * There is a virtual hole at the end of the file, so as long as | ||
252 | * offset isn't i_size or larger, return i_size. | ||
253 | */ | ||
254 | if (offset >= inode->i_size) { | ||
255 | mutex_unlock(&inode->i_mutex); | ||
256 | return -ENXIO; | ||
257 | } | ||
258 | offset = inode->i_size; | ||
259 | break; | ||
260 | } | ||
261 | |||
262 | if (offset < 0 || offset > maxbytes) { | ||
263 | mutex_unlock(&inode->i_mutex); | ||
264 | return -EINVAL; | ||
265 | } | ||
266 | |||
267 | if (offset != file->f_pos) { | ||
268 | file->f_pos = offset; | ||
269 | file->f_version = 0; | ||
270 | } | ||
271 | mutex_unlock(&inode->i_mutex); | ||
272 | 227 | ||
273 | return offset; | 228 | return generic_file_llseek_size(file, offset, origin, maxbytes); |
274 | } | 229 | } |
275 | 230 | ||
276 | const struct file_operations ext4_file_operations = { | 231 | const struct file_operations ext4_file_operations = { |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b8602cde5b5a..0962642119c0 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -800,12 +800,17 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
800 | } | 800 | } |
801 | 801 | ||
802 | retry: | 802 | retry: |
803 | if (rw == READ && ext4_should_dioread_nolock(inode)) | 803 | if (rw == READ && ext4_should_dioread_nolock(inode)) { |
804 | if (unlikely(!list_empty(&ei->i_completed_io_list))) { | ||
805 | mutex_lock(&inode->i_mutex); | ||
806 | ext4_flush_completed_IO(inode); | ||
807 | mutex_unlock(&inode->i_mutex); | ||
808 | } | ||
804 | ret = __blockdev_direct_IO(rw, iocb, inode, | 809 | ret = __blockdev_direct_IO(rw, iocb, inode, |
805 | inode->i_sb->s_bdev, iov, | 810 | inode->i_sb->s_bdev, iov, |
806 | offset, nr_segs, | 811 | offset, nr_segs, |
807 | ext4_get_block, NULL, NULL, 0); | 812 | ext4_get_block, NULL, NULL, 0); |
808 | else { | 813 | } else { |
809 | ret = blockdev_direct_IO(rw, iocb, inode, iov, | 814 | ret = blockdev_direct_IO(rw, iocb, inode, iov, |
810 | offset, nr_segs, ext4_get_block); | 815 | offset, nr_segs, ext4_get_block); |
811 | 816 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d47264cafee0..986e2388f031 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -120,6 +120,9 @@ void ext4_evict_inode(struct inode *inode) | |||
120 | int err; | 120 | int err; |
121 | 121 | ||
122 | trace_ext4_evict_inode(inode); | 122 | trace_ext4_evict_inode(inode); |
123 | |||
124 | ext4_ioend_wait(inode); | ||
125 | |||
123 | if (inode->i_nlink) { | 126 | if (inode->i_nlink) { |
124 | /* | 127 | /* |
125 | * When journalling data dirty buffers are tracked only in the | 128 | * When journalling data dirty buffers are tracked only in the |
@@ -644,7 +647,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
644 | return bh; | 647 | return bh; |
645 | if (buffer_uptodate(bh)) | 648 | if (buffer_uptodate(bh)) |
646 | return bh; | 649 | return bh; |
647 | ll_rw_block(READ_META, 1, &bh); | 650 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
648 | wait_on_buffer(bh); | 651 | wait_on_buffer(bh); |
649 | if (buffer_uptodate(bh)) | 652 | if (buffer_uptodate(bh)) |
650 | return bh; | 653 | return bh; |
@@ -983,6 +986,8 @@ static int ext4_journalled_write_end(struct file *file, | |||
983 | from = pos & (PAGE_CACHE_SIZE - 1); | 986 | from = pos & (PAGE_CACHE_SIZE - 1); |
984 | to = from + len; | 987 | to = from + len; |
985 | 988 | ||
989 | BUG_ON(!ext4_handle_valid(handle)); | ||
990 | |||
986 | if (copied < len) { | 991 | if (copied < len) { |
987 | if (!PageUptodate(page)) | 992 | if (!PageUptodate(page)) |
988 | copied = 0; | 993 | copied = 0; |
@@ -1283,7 +1288,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1283 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) | 1288 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) |
1284 | err = ext4_bio_write_page(&io_submit, page, | 1289 | err = ext4_bio_write_page(&io_submit, page, |
1285 | len, mpd->wbc); | 1290 | len, mpd->wbc); |
1286 | else | 1291 | else if (buffer_uninit(page_bufs)) { |
1292 | ext4_set_bh_endio(page_bufs, inode); | ||
1293 | err = block_write_full_page_endio(page, | ||
1294 | noalloc_get_block_write, | ||
1295 | mpd->wbc, ext4_end_io_buffer_write); | ||
1296 | } else | ||
1287 | err = block_write_full_page(page, | 1297 | err = block_write_full_page(page, |
1288 | noalloc_get_block_write, mpd->wbc); | 1298 | noalloc_get_block_write, mpd->wbc); |
1289 | 1299 | ||
@@ -1699,6 +1709,8 @@ static int __ext4_journalled_writepage(struct page *page, | |||
1699 | goto out; | 1709 | goto out; |
1700 | } | 1710 | } |
1701 | 1711 | ||
1712 | BUG_ON(!ext4_handle_valid(handle)); | ||
1713 | |||
1702 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, | 1714 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, |
1703 | do_journal_get_write_access); | 1715 | do_journal_get_write_access); |
1704 | 1716 | ||
@@ -2668,8 +2680,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
2668 | goto out; | 2680 | goto out; |
2669 | } | 2681 | } |
2670 | 2682 | ||
2671 | io_end->flag = EXT4_IO_END_UNWRITTEN; | 2683 | /* |
2684 | * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now, | ||
2685 | * but being more careful is always safe for the future change. | ||
2686 | */ | ||
2672 | inode = io_end->inode; | 2687 | inode = io_end->inode; |
2688 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
2689 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
2690 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
2691 | } | ||
2673 | 2692 | ||
2674 | /* Add the io_end to per-inode completed io list*/ | 2693 | /* Add the io_end to per-inode completed io list*/ |
2675 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 2694 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
@@ -3279,7 +3298,7 @@ make_io: | |||
3279 | trace_ext4_load_inode(inode); | 3298 | trace_ext4_load_inode(inode); |
3280 | get_bh(bh); | 3299 | get_bh(bh); |
3281 | bh->b_end_io = end_buffer_read_sync; | 3300 | bh->b_end_io = end_buffer_read_sync; |
3282 | submit_bh(READ_META, bh); | 3301 | submit_bh(READ | REQ_META | REQ_PRIO, bh); |
3283 | wait_on_buffer(bh); | 3302 | wait_on_buffer(bh); |
3284 | if (!buffer_uptodate(bh)) { | 3303 | if (!buffer_uptodate(bh)) { |
3285 | EXT4_ERROR_INODE_BLOCK(inode, block, | 3304 | EXT4_ERROR_INODE_BLOCK(inode, block, |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 565a154e22d4..1c924faeb6c8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -922,7 +922,8 @@ restart: | |||
922 | bh = ext4_getblk(NULL, dir, b++, 0, &err); | 922 | bh = ext4_getblk(NULL, dir, b++, 0, &err); |
923 | bh_use[ra_max] = bh; | 923 | bh_use[ra_max] = bh; |
924 | if (bh) | 924 | if (bh) |
925 | ll_rw_block(READ_META, 1, &bh); | 925 | ll_rw_block(READ | REQ_META | REQ_PRIO, |
926 | 1, &bh); | ||
926 | } | 927 | } |
927 | } | 928 | } |
928 | if ((bh = bh_use[ra_ptr++]) == NULL) | 929 | if ((bh = bh_use[ra_ptr++]) == NULL) |
@@ -2253,9 +2254,11 @@ static int ext4_symlink(struct inode *dir, | |||
2253 | /* | 2254 | /* |
2254 | * For non-fast symlinks, we just allocate inode and put it on | 2255 | * For non-fast symlinks, we just allocate inode and put it on |
2255 | * orphan list in the first transaction => we need bitmap, | 2256 | * orphan list in the first transaction => we need bitmap, |
2256 | * group descriptor, sb, inode block, quota blocks. | 2257 | * group descriptor, sb, inode block, quota blocks, and |
2258 | * possibly selinux xattr blocks. | ||
2257 | */ | 2259 | */ |
2258 | credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | 2260 | credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + |
2261 | EXT4_XATTR_TRANS_BLOCKS; | ||
2259 | } else { | 2262 | } else { |
2260 | /* | 2263 | /* |
2261 | * Fast symlink. We have to add entry to directory | 2264 | * Fast symlink. We have to add entry to directory |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 430c401d0895..92f38ee13f8a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -142,7 +142,23 @@ static void ext4_end_io_work(struct work_struct *work) | |||
142 | unsigned long flags; | 142 | unsigned long flags; |
143 | int ret; | 143 | int ret; |
144 | 144 | ||
145 | mutex_lock(&inode->i_mutex); | 145 | if (!mutex_trylock(&inode->i_mutex)) { |
146 | /* | ||
147 | * Requeue the work instead of waiting so that the work | ||
148 | * items queued after this can be processed. | ||
149 | */ | ||
150 | queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); | ||
151 | /* | ||
152 | * To prevent the ext4-dio-unwritten thread from keeping | ||
153 | * requeueing end_io requests and occupying cpu for too long, | ||
154 | * yield the cpu if it sees an end_io request that has already | ||
155 | * been requeued. | ||
156 | */ | ||
157 | if (io->flag & EXT4_IO_END_QUEUED) | ||
158 | yield(); | ||
159 | io->flag |= EXT4_IO_END_QUEUED; | ||
160 | return; | ||
161 | } | ||
146 | ret = ext4_end_io_nolock(io); | 162 | ret = ext4_end_io_nolock(io); |
147 | if (ret < 0) { | 163 | if (ret < 0) { |
148 | mutex_unlock(&inode->i_mutex); | 164 | mutex_unlock(&inode->i_mutex); |
@@ -334,8 +350,10 @@ submit_and_retry: | |||
334 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | 350 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && |
335 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | 351 | (io_end->pages[io_end->num_io_pages-1] != io_page)) |
336 | goto submit_and_retry; | 352 | goto submit_and_retry; |
337 | if (buffer_uninit(bh)) | 353 | if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
338 | io->io_end->flag |= EXT4_IO_END_UNWRITTEN; | 354 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
355 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
356 | } | ||
339 | io->io_end->size += bh->b_size; | 357 | io->io_end->size += bh->b_size; |
340 | io->io_next_block++; | 358 | io->io_next_block++; |
341 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 359 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4687fea0c00f..44d0c8db2239 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -919,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head) | |||
919 | 919 | ||
920 | static void ext4_destroy_inode(struct inode *inode) | 920 | static void ext4_destroy_inode(struct inode *inode) |
921 | { | 921 | { |
922 | ext4_ioend_wait(inode); | ||
923 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | 922 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { |
924 | ext4_msg(inode->i_sb, KERN_ERR, | 923 | ext4_msg(inode->i_sb, KERN_ERR, |
925 | "Inode %lu (%p): orphan list check failed!", | 924 | "Inode %lu (%p): orphan list check failed!", |
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 007c3bfbf094..34e4350dd4d9 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c | |||
@@ -48,28 +48,32 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name, | |||
48 | name, value, size, flags); | 48 | name, value, size, flags); |
49 | } | 49 | } |
50 | 50 | ||
51 | int | 51 | int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
52 | ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, | 52 | void *fs_info) |
53 | const struct qstr *qstr) | ||
54 | { | 53 | { |
55 | int err; | 54 | const struct xattr *xattr; |
56 | size_t len; | 55 | handle_t *handle = fs_info; |
57 | void *value; | 56 | int err = 0; |
58 | char *name; | ||
59 | 57 | ||
60 | err = security_inode_init_security(inode, dir, qstr, &name, &value, &len); | 58 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
61 | if (err) { | 59 | err = ext4_xattr_set_handle(handle, inode, |
62 | if (err == -EOPNOTSUPP) | 60 | EXT4_XATTR_INDEX_SECURITY, |
63 | return 0; | 61 | xattr->name, xattr->value, |
64 | return err; | 62 | xattr->value_len, 0); |
63 | if (err < 0) | ||
64 | break; | ||
65 | } | 65 | } |
66 | err = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_SECURITY, | ||
67 | name, value, len, 0); | ||
68 | kfree(name); | ||
69 | kfree(value); | ||
70 | return err; | 66 | return err; |
71 | } | 67 | } |
72 | 68 | ||
69 | int | ||
70 | ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, | ||
71 | const struct qstr *qstr) | ||
72 | { | ||
73 | return security_inode_init_security(inode, dir, qstr, | ||
74 | &ext4_initxattrs, handle); | ||
75 | } | ||
76 | |||
73 | const struct xattr_handler ext4_xattr_security_handler = { | 77 | const struct xattr_handler ext4_xattr_security_handler = { |
74 | .prefix = XATTR_SECURITY_PREFIX, | 78 | .prefix = XATTR_SECURITY_PREFIX, |
75 | .list = ext4_xattr_security_list, | 79 | .list = ext4_xattr_security_list, |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4ad64732cbce..5efbd5d7701a 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -1231,7 +1231,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, | |||
1231 | struct super_block *sb = dir->i_sb; | 1231 | struct super_block *sb = dir->i_sb; |
1232 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 1232 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
1233 | struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ | 1233 | struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ |
1234 | struct msdos_dir_entry *de; | 1234 | struct msdos_dir_entry *uninitialized_var(de); |
1235 | int err, free_slots, i, nr_bhs; | 1235 | int err, free_slots, i, nr_bhs; |
1236 | loff_t pos, i_pos; | 1236 | loff_t pos, i_pos; |
1237 | 1237 | ||
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5942fec22c65..1726d7303047 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -1188,9 +1188,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, | |||
1188 | out: | 1188 | out: |
1189 | /* UTF-8 doesn't provide FAT semantics */ | 1189 | /* UTF-8 doesn't provide FAT semantics */ |
1190 | if (!strcmp(opts->iocharset, "utf8")) { | 1190 | if (!strcmp(opts->iocharset, "utf8")) { |
1191 | fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset" | 1191 | fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset" |
1192 | " for FAT filesystems, filesystem will be " | 1192 | " for FAT filesystems, filesystem will be " |
1193 | "case sensitive!\n"); | 1193 | "case sensitive!"); |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | /* If user doesn't specify allow_utime, it's initialized from dmask. */ | 1196 | /* If user doesn't specify allow_utime, it's initialized from dmask. */ |
@@ -1367,6 +1367,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1367 | sbi->free_clusters = -1; /* Don't know yet */ | 1367 | sbi->free_clusters = -1; /* Don't know yet */ |
1368 | sbi->free_clus_valid = 0; | 1368 | sbi->free_clus_valid = 0; |
1369 | sbi->prev_free = FAT_START_ENT; | 1369 | sbi->prev_free = FAT_START_ENT; |
1370 | sb->s_maxbytes = 0xffffffff; | ||
1370 | 1371 | ||
1371 | if (!sbi->fat_length && b->fat32_length) { | 1372 | if (!sbi->fat_length && b->fat32_length) { |
1372 | struct fat_boot_fsinfo *fsinfo; | 1373 | struct fat_boot_fsinfo *fsinfo; |
@@ -1377,8 +1378,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1377 | sbi->fat_length = le32_to_cpu(b->fat32_length); | 1378 | sbi->fat_length = le32_to_cpu(b->fat32_length); |
1378 | sbi->root_cluster = le32_to_cpu(b->root_cluster); | 1379 | sbi->root_cluster = le32_to_cpu(b->root_cluster); |
1379 | 1380 | ||
1380 | sb->s_maxbytes = 0xffffffff; | ||
1381 | |||
1382 | /* MC - if info_sector is 0, don't multiply by 0 */ | 1381 | /* MC - if info_sector is 0, don't multiply by 0 */ |
1383 | sbi->fsinfo_sector = le16_to_cpu(b->info_sector); | 1382 | sbi->fsinfo_sector = le16_to_cpu(b->info_sector); |
1384 | if (sbi->fsinfo_sector == 0) | 1383 | if (sbi->fsinfo_sector == 0) |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 640fc229df10..5cb8614508c3 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -258,10 +258,14 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, | |||
258 | forget->forget_one.nlookup = nlookup; | 258 | forget->forget_one.nlookup = nlookup; |
259 | 259 | ||
260 | spin_lock(&fc->lock); | 260 | spin_lock(&fc->lock); |
261 | fc->forget_list_tail->next = forget; | 261 | if (fc->connected) { |
262 | fc->forget_list_tail = forget; | 262 | fc->forget_list_tail->next = forget; |
263 | wake_up(&fc->waitq); | 263 | fc->forget_list_tail = forget; |
264 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); | 264 | wake_up(&fc->waitq); |
265 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); | ||
266 | } else { | ||
267 | kfree(forget); | ||
268 | } | ||
265 | spin_unlock(&fc->lock); | 269 | spin_unlock(&fc->lock); |
266 | } | 270 | } |
267 | 271 | ||
@@ -1358,6 +1362,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, | |||
1358 | if (outarg.namelen > FUSE_NAME_MAX) | 1362 | if (outarg.namelen > FUSE_NAME_MAX) |
1359 | goto err; | 1363 | goto err; |
1360 | 1364 | ||
1365 | err = -EINVAL; | ||
1366 | if (size != sizeof(outarg) + outarg.namelen + 1) | ||
1367 | goto err; | ||
1368 | |||
1361 | name.name = buf; | 1369 | name.name = buf; |
1362 | name.len = outarg.namelen; | 1370 | name.len = outarg.namelen; |
1363 | err = fuse_copy_one(cs, buf, outarg.namelen + 1); | 1371 | err = fuse_copy_one(cs, buf, outarg.namelen + 1); |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d480d9af46c9..594f07a81c28 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/compat.h> | 16 | #include <linux/compat.h> |
17 | #include <linux/swap.h> | ||
17 | 18 | ||
18 | static const struct file_operations fuse_direct_io_file_operations; | 19 | static const struct file_operations fuse_direct_io_file_operations; |
19 | 20 | ||
@@ -245,6 +246,12 @@ void fuse_release_common(struct file *file, int opcode) | |||
245 | req = ff->reserved_req; | 246 | req = ff->reserved_req; |
246 | fuse_prepare_release(ff, file->f_flags, opcode); | 247 | fuse_prepare_release(ff, file->f_flags, opcode); |
247 | 248 | ||
249 | if (ff->flock) { | ||
250 | struct fuse_release_in *inarg = &req->misc.release.in; | ||
251 | inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; | ||
252 | inarg->lock_owner = fuse_lock_owner_id(ff->fc, | ||
253 | (fl_owner_t) file); | ||
254 | } | ||
248 | /* Hold vfsmount and dentry until release is finished */ | 255 | /* Hold vfsmount and dentry until release is finished */ |
249 | path_get(&file->f_path); | 256 | path_get(&file->f_path); |
250 | req->misc.release.path = file->f_path; | 257 | req->misc.release.path = file->f_path; |
@@ -755,18 +762,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, | |||
755 | return req->misc.write.out.size; | 762 | return req->misc.write.out.size; |
756 | } | 763 | } |
757 | 764 | ||
758 | static int fuse_write_begin(struct file *file, struct address_space *mapping, | ||
759 | loff_t pos, unsigned len, unsigned flags, | ||
760 | struct page **pagep, void **fsdata) | ||
761 | { | ||
762 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
763 | |||
764 | *pagep = grab_cache_page_write_begin(mapping, index, flags); | ||
765 | if (!*pagep) | ||
766 | return -ENOMEM; | ||
767 | return 0; | ||
768 | } | ||
769 | |||
770 | void fuse_write_update_size(struct inode *inode, loff_t pos) | 765 | void fuse_write_update_size(struct inode *inode, loff_t pos) |
771 | { | 766 | { |
772 | struct fuse_conn *fc = get_fuse_conn(inode); | 767 | struct fuse_conn *fc = get_fuse_conn(inode); |
@@ -779,62 +774,6 @@ void fuse_write_update_size(struct inode *inode, loff_t pos) | |||
779 | spin_unlock(&fc->lock); | 774 | spin_unlock(&fc->lock); |
780 | } | 775 | } |
781 | 776 | ||
782 | static int fuse_buffered_write(struct file *file, struct inode *inode, | ||
783 | loff_t pos, unsigned count, struct page *page) | ||
784 | { | ||
785 | int err; | ||
786 | size_t nres; | ||
787 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
788 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
789 | struct fuse_req *req; | ||
790 | |||
791 | if (is_bad_inode(inode)) | ||
792 | return -EIO; | ||
793 | |||
794 | /* | ||
795 | * Make sure writepages on the same page are not mixed up with | ||
796 | * plain writes. | ||
797 | */ | ||
798 | fuse_wait_on_page_writeback(inode, page->index); | ||
799 | |||
800 | req = fuse_get_req(fc); | ||
801 | if (IS_ERR(req)) | ||
802 | return PTR_ERR(req); | ||
803 | |||
804 | req->in.argpages = 1; | ||
805 | req->num_pages = 1; | ||
806 | req->pages[0] = page; | ||
807 | req->page_offset = offset; | ||
808 | nres = fuse_send_write(req, file, pos, count, NULL); | ||
809 | err = req->out.h.error; | ||
810 | fuse_put_request(fc, req); | ||
811 | if (!err && !nres) | ||
812 | err = -EIO; | ||
813 | if (!err) { | ||
814 | pos += nres; | ||
815 | fuse_write_update_size(inode, pos); | ||
816 | if (count == PAGE_CACHE_SIZE) | ||
817 | SetPageUptodate(page); | ||
818 | } | ||
819 | fuse_invalidate_attr(inode); | ||
820 | return err ? err : nres; | ||
821 | } | ||
822 | |||
823 | static int fuse_write_end(struct file *file, struct address_space *mapping, | ||
824 | loff_t pos, unsigned len, unsigned copied, | ||
825 | struct page *page, void *fsdata) | ||
826 | { | ||
827 | struct inode *inode = mapping->host; | ||
828 | int res = 0; | ||
829 | |||
830 | if (copied) | ||
831 | res = fuse_buffered_write(file, inode, pos, copied, page); | ||
832 | |||
833 | unlock_page(page); | ||
834 | page_cache_release(page); | ||
835 | return res; | ||
836 | } | ||
837 | |||
838 | static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, | 777 | static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, |
839 | struct inode *inode, loff_t pos, | 778 | struct inode *inode, loff_t pos, |
840 | size_t count) | 779 | size_t count) |
@@ -908,6 +847,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, | |||
908 | pagefault_enable(); | 847 | pagefault_enable(); |
909 | flush_dcache_page(page); | 848 | flush_dcache_page(page); |
910 | 849 | ||
850 | mark_page_accessed(page); | ||
851 | |||
911 | if (!tmp) { | 852 | if (!tmp) { |
912 | unlock_page(page); | 853 | unlock_page(page); |
913 | page_cache_release(page); | 854 | page_cache_release(page); |
@@ -1559,11 +1500,14 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) | |||
1559 | struct fuse_conn *fc = get_fuse_conn(inode); | 1500 | struct fuse_conn *fc = get_fuse_conn(inode); |
1560 | int err; | 1501 | int err; |
1561 | 1502 | ||
1562 | if (fc->no_lock) { | 1503 | if (fc->no_flock) { |
1563 | err = flock_lock_file_wait(file, fl); | 1504 | err = flock_lock_file_wait(file, fl); |
1564 | } else { | 1505 | } else { |
1506 | struct fuse_file *ff = file->private_data; | ||
1507 | |||
1565 | /* emulate flock with POSIX locks */ | 1508 | /* emulate flock with POSIX locks */ |
1566 | fl->fl_owner = (fl_owner_t) file; | 1509 | fl->fl_owner = (fl_owner_t) file; |
1510 | ff->flock = true; | ||
1567 | err = fuse_setlk(file, fl, 1); | 1511 | err = fuse_setlk(file, fl, 1); |
1568 | } | 1512 | } |
1569 | 1513 | ||
@@ -2201,8 +2145,6 @@ static const struct address_space_operations fuse_file_aops = { | |||
2201 | .readpage = fuse_readpage, | 2145 | .readpage = fuse_readpage, |
2202 | .writepage = fuse_writepage, | 2146 | .writepage = fuse_writepage, |
2203 | .launder_page = fuse_launder_page, | 2147 | .launder_page = fuse_launder_page, |
2204 | .write_begin = fuse_write_begin, | ||
2205 | .write_end = fuse_write_end, | ||
2206 | .readpages = fuse_readpages, | 2148 | .readpages = fuse_readpages, |
2207 | .set_page_dirty = __set_page_dirty_nobuffers, | 2149 | .set_page_dirty = __set_page_dirty_nobuffers, |
2208 | .bmap = fuse_bmap, | 2150 | .bmap = fuse_bmap, |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c6aa2d4b8517..cf6db0a93219 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -135,6 +135,9 @@ struct fuse_file { | |||
135 | 135 | ||
136 | /** Wait queue head for poll */ | 136 | /** Wait queue head for poll */ |
137 | wait_queue_head_t poll_wait; | 137 | wait_queue_head_t poll_wait; |
138 | |||
139 | /** Has flock been performed on this file? */ | ||
140 | bool flock:1; | ||
138 | }; | 141 | }; |
139 | 142 | ||
140 | /** One input argument of a request */ | 143 | /** One input argument of a request */ |
@@ -448,7 +451,7 @@ struct fuse_conn { | |||
448 | /** Is removexattr not implemented by fs? */ | 451 | /** Is removexattr not implemented by fs? */ |
449 | unsigned no_removexattr:1; | 452 | unsigned no_removexattr:1; |
450 | 453 | ||
451 | /** Are file locking primitives not implemented by fs? */ | 454 | /** Are posix file locking primitives not implemented by fs? */ |
452 | unsigned no_lock:1; | 455 | unsigned no_lock:1; |
453 | 456 | ||
454 | /** Is access not implemented by fs? */ | 457 | /** Is access not implemented by fs? */ |
@@ -472,6 +475,9 @@ struct fuse_conn { | |||
472 | /** Don't apply umask to creation modes */ | 475 | /** Don't apply umask to creation modes */ |
473 | unsigned dont_mask:1; | 476 | unsigned dont_mask:1; |
474 | 477 | ||
478 | /** Are BSD file locking primitives not implemented by fs? */ | ||
479 | unsigned no_flock:1; | ||
480 | |||
475 | /** The number of requests waiting for completion */ | 481 | /** The number of requests waiting for completion */ |
476 | atomic_t num_waiting; | 482 | atomic_t num_waiting; |
477 | 483 | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 38f84cd48b67..add96f6ffda5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -71,7 +71,7 @@ struct fuse_mount_data { | |||
71 | unsigned blksize; | 71 | unsigned blksize; |
72 | }; | 72 | }; |
73 | 73 | ||
74 | struct fuse_forget_link *fuse_alloc_forget() | 74 | struct fuse_forget_link *fuse_alloc_forget(void) |
75 | { | 75 | { |
76 | return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); | 76 | return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); |
77 | } | 77 | } |
@@ -809,6 +809,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
809 | fc->async_read = 1; | 809 | fc->async_read = 1; |
810 | if (!(arg->flags & FUSE_POSIX_LOCKS)) | 810 | if (!(arg->flags & FUSE_POSIX_LOCKS)) |
811 | fc->no_lock = 1; | 811 | fc->no_lock = 1; |
812 | if (arg->minor >= 17) { | ||
813 | if (!(arg->flags & FUSE_FLOCK_LOCKS)) | ||
814 | fc->no_flock = 1; | ||
815 | } else { | ||
816 | if (!(arg->flags & FUSE_POSIX_LOCKS)) | ||
817 | fc->no_flock = 1; | ||
818 | } | ||
812 | if (arg->flags & FUSE_ATOMIC_O_TRUNC) | 819 | if (arg->flags & FUSE_ATOMIC_O_TRUNC) |
813 | fc->atomic_o_trunc = 1; | 820 | fc->atomic_o_trunc = 1; |
814 | if (arg->minor >= 9) { | 821 | if (arg->minor >= 9) { |
@@ -823,6 +830,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
823 | } else { | 830 | } else { |
824 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; | 831 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; |
825 | fc->no_lock = 1; | 832 | fc->no_lock = 1; |
833 | fc->no_flock = 1; | ||
826 | } | 834 | } |
827 | 835 | ||
828 | fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); | 836 | fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); |
@@ -843,7 +851,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) | |||
843 | arg->minor = FUSE_KERNEL_MINOR_VERSION; | 851 | arg->minor = FUSE_KERNEL_MINOR_VERSION; |
844 | arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; | 852 | arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; |
845 | arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | | 853 | arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | |
846 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; | 854 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | |
855 | FUSE_FLOCK_LOCKS; | ||
847 | req->in.h.opcode = FUSE_INIT; | 856 | req->in.h.opcode = FUSE_INIT; |
848 | req->in.numargs = 1; | 857 | req->in.numargs = 1; |
849 | req->in.args[0].size = sizeof(*arg); | 858 | req->in.args[0].size = sizeof(*arg); |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 34501b64bc47..65978d7885c8 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -82,7 +82,7 @@ static int gfs2_set_mode(struct inode *inode, umode_t mode) | |||
82 | iattr.ia_valid = ATTR_MODE; | 82 | iattr.ia_valid = ATTR_MODE; |
83 | iattr.ia_mode = mode; | 83 | iattr.ia_mode = mode; |
84 | 84 | ||
85 | error = gfs2_setattr_simple(GFS2_I(inode), &iattr); | 85 | error = gfs2_setattr_simple(inode, &iattr); |
86 | } | 86 | } |
87 | 87 | ||
88 | return error; | 88 | return error; |
@@ -160,6 +160,7 @@ out: | |||
160 | 160 | ||
161 | int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) | 161 | int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) |
162 | { | 162 | { |
163 | struct inode *inode = &ip->i_inode; | ||
163 | struct posix_acl *acl; | 164 | struct posix_acl *acl; |
164 | char *data; | 165 | char *data; |
165 | unsigned int len; | 166 | unsigned int len; |
@@ -169,7 +170,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) | |||
169 | if (IS_ERR(acl)) | 170 | if (IS_ERR(acl)) |
170 | return PTR_ERR(acl); | 171 | return PTR_ERR(acl); |
171 | if (!acl) | 172 | if (!acl) |
172 | return gfs2_setattr_simple(ip, attr); | 173 | return gfs2_setattr_simple(inode, attr); |
173 | 174 | ||
174 | error = posix_acl_chmod(&acl, GFP_NOFS, attr->ia_mode); | 175 | error = posix_acl_chmod(&acl, GFP_NOFS, attr->ia_mode); |
175 | if (error) | 176 | if (error) |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index f9fbbe96c222..4858e1fed8b1 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -663,7 +663,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
663 | if (&ip->i_inode == sdp->sd_rindex) | 663 | if (&ip->i_inode == sdp->sd_rindex) |
664 | rblocks += 2 * RES_STATFS; | 664 | rblocks += 2 * RES_STATFS; |
665 | if (alloc_required) | 665 | if (alloc_required) |
666 | rblocks += gfs2_rg_blocks(al); | 666 | rblocks += gfs2_rg_blocks(ip); |
667 | 667 | ||
668 | error = gfs2_trans_begin(sdp, rblocks, | 668 | error = gfs2_trans_begin(sdp, rblocks, |
669 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | 669 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); |
@@ -787,7 +787,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, | |||
787 | u64 to = pos + copied; | 787 | u64 to = pos + copied; |
788 | void *kaddr; | 788 | void *kaddr; |
789 | unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); | 789 | unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); |
790 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; | ||
791 | 790 | ||
792 | BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); | 791 | BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); |
793 | kaddr = kmap_atomic(page, KM_USER0); | 792 | kaddr = kmap_atomic(page, KM_USER0); |
@@ -804,7 +803,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, | |||
804 | if (copied) { | 803 | if (copied) { |
805 | if (inode->i_size < to) | 804 | if (inode->i_size < to) |
806 | i_size_write(inode, to); | 805 | i_size_write(inode, to); |
807 | gfs2_dinode_out(ip, di); | ||
808 | mark_inode_dirty(inode); | 806 | mark_inode_dirty(inode); |
809 | } | 807 | } |
810 | 808 | ||
@@ -873,10 +871,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
873 | gfs2_page_add_databufs(ip, page, from, to); | 871 | gfs2_page_add_databufs(ip, page, from, to); |
874 | 872 | ||
875 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 873 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
876 | if (ret > 0) { | ||
877 | gfs2_dinode_out(ip, dibh->b_data); | ||
878 | mark_inode_dirty(inode); | ||
879 | } | ||
880 | 874 | ||
881 | if (inode == sdp->sd_rindex) { | 875 | if (inode == sdp->sd_rindex) { |
882 | adjust_fs_space(inode); | 876 | adjust_fs_space(inode); |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 7878c473ae62..41d494d79709 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
11 | #include <linux/completion.h> | 11 | #include <linux/completion.h> |
12 | #include <linux/buffer_head.h> | 12 | #include <linux/buffer_head.h> |
13 | #include <linux/blkdev.h> | ||
13 | #include <linux/gfs2_ondisk.h> | 14 | #include <linux/gfs2_ondisk.h> |
14 | #include <linux/crc32.h> | 15 | #include <linux/crc32.h> |
15 | 16 | ||
@@ -36,11 +37,6 @@ struct metapath { | |||
36 | __u16 mp_list[GFS2_MAX_META_HEIGHT]; | 37 | __u16 mp_list[GFS2_MAX_META_HEIGHT]; |
37 | }; | 38 | }; |
38 | 39 | ||
39 | typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh, | ||
40 | struct buffer_head *bh, __be64 *top, | ||
41 | __be64 *bottom, unsigned int height, | ||
42 | void *data); | ||
43 | |||
44 | struct strip_mine { | 40 | struct strip_mine { |
45 | int sm_first; | 41 | int sm_first; |
46 | unsigned int sm_height; | 42 | unsigned int sm_height; |
@@ -273,6 +269,30 @@ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp | |||
273 | return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; | 269 | return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; |
274 | } | 270 | } |
275 | 271 | ||
272 | static void gfs2_metapath_ra(struct gfs2_glock *gl, | ||
273 | const struct buffer_head *bh, const __be64 *pos) | ||
274 | { | ||
275 | struct buffer_head *rabh; | ||
276 | const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size); | ||
277 | const __be64 *t; | ||
278 | |||
279 | for (t = pos; t < endp; t++) { | ||
280 | if (!*t) | ||
281 | continue; | ||
282 | |||
283 | rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE); | ||
284 | if (trylock_buffer(rabh)) { | ||
285 | if (!buffer_uptodate(rabh)) { | ||
286 | rabh->b_end_io = end_buffer_read_sync; | ||
287 | submit_bh(READA | REQ_META, rabh); | ||
288 | continue; | ||
289 | } | ||
290 | unlock_buffer(rabh); | ||
291 | } | ||
292 | brelse(rabh); | ||
293 | } | ||
294 | } | ||
295 | |||
276 | /** | 296 | /** |
277 | * lookup_metapath - Walk the metadata tree to a specific point | 297 | * lookup_metapath - Walk the metadata tree to a specific point |
278 | * @ip: The inode | 298 | * @ip: The inode |
@@ -432,12 +452,14 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
432 | { | 452 | { |
433 | struct gfs2_inode *ip = GFS2_I(inode); | 453 | struct gfs2_inode *ip = GFS2_I(inode); |
434 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 454 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
455 | struct super_block *sb = sdp->sd_vfs; | ||
435 | struct buffer_head *dibh = mp->mp_bh[0]; | 456 | struct buffer_head *dibh = mp->mp_bh[0]; |
436 | u64 bn, dblock = 0; | 457 | u64 bn, dblock = 0; |
437 | unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; | 458 | unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; |
438 | unsigned dblks = 0; | 459 | unsigned dblks = 0; |
439 | unsigned ptrs_per_blk; | 460 | unsigned ptrs_per_blk; |
440 | const unsigned end_of_metadata = height - 1; | 461 | const unsigned end_of_metadata = height - 1; |
462 | int ret; | ||
441 | int eob = 0; | 463 | int eob = 0; |
442 | enum alloc_state state; | 464 | enum alloc_state state; |
443 | __be64 *ptr; | 465 | __be64 *ptr; |
@@ -540,6 +562,15 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
540 | dblock = bn; | 562 | dblock = bn; |
541 | while (n-- > 0) | 563 | while (n-- > 0) |
542 | *ptr++ = cpu_to_be64(bn++); | 564 | *ptr++ = cpu_to_be64(bn++); |
565 | if (buffer_zeronew(bh_map)) { | ||
566 | ret = sb_issue_zeroout(sb, dblock, dblks, | ||
567 | GFP_NOFS); | ||
568 | if (ret) { | ||
569 | fs_err(sdp, | ||
570 | "Failed to zero data buffers\n"); | ||
571 | clear_buffer_zeronew(bh_map); | ||
572 | } | ||
573 | } | ||
543 | break; | 574 | break; |
544 | } | 575 | } |
545 | } while ((state != ALLOC_DATA) || !dblock); | 576 | } while ((state != ALLOC_DATA) || !dblock); |
@@ -668,76 +699,6 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi | |||
668 | } | 699 | } |
669 | 700 | ||
670 | /** | 701 | /** |
671 | * recursive_scan - recursively scan through the end of a file | ||
672 | * @ip: the inode | ||
673 | * @dibh: the dinode buffer | ||
674 | * @mp: the path through the metadata to the point to start | ||
675 | * @height: the height the recursion is at | ||
676 | * @block: the indirect block to look at | ||
677 | * @first: 1 if this is the first block | ||
678 | * @bc: the call to make for each piece of metadata | ||
679 | * @data: data opaque to this function to pass to @bc | ||
680 | * | ||
681 | * When this is first called @height and @block should be zero and | ||
682 | * @first should be 1. | ||
683 | * | ||
684 | * Returns: errno | ||
685 | */ | ||
686 | |||
687 | static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, | ||
688 | struct metapath *mp, unsigned int height, | ||
689 | u64 block, int first, block_call_t bc, | ||
690 | void *data) | ||
691 | { | ||
692 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
693 | struct buffer_head *bh = NULL; | ||
694 | __be64 *top, *bottom; | ||
695 | u64 bn; | ||
696 | int error; | ||
697 | int mh_size = sizeof(struct gfs2_meta_header); | ||
698 | |||
699 | if (!height) { | ||
700 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
701 | if (error) | ||
702 | return error; | ||
703 | dibh = bh; | ||
704 | |||
705 | top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; | ||
706 | bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; | ||
707 | } else { | ||
708 | error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh); | ||
709 | if (error) | ||
710 | return error; | ||
711 | |||
712 | top = (__be64 *)(bh->b_data + mh_size) + | ||
713 | (first ? mp->mp_list[height] : 0); | ||
714 | |||
715 | bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; | ||
716 | } | ||
717 | |||
718 | error = bc(ip, dibh, bh, top, bottom, height, data); | ||
719 | if (error) | ||
720 | goto out; | ||
721 | |||
722 | if (height < ip->i_height - 1) | ||
723 | for (; top < bottom; top++, first = 0) { | ||
724 | if (!*top) | ||
725 | continue; | ||
726 | |||
727 | bn = be64_to_cpu(*top); | ||
728 | |||
729 | error = recursive_scan(ip, dibh, mp, height + 1, bn, | ||
730 | first, bc, data); | ||
731 | if (error) | ||
732 | break; | ||
733 | } | ||
734 | |||
735 | out: | ||
736 | brelse(bh); | ||
737 | return error; | ||
738 | } | ||
739 | |||
740 | /** | ||
741 | * do_strip - Look for a layer a particular layer of the file and strip it off | 702 | * do_strip - Look for a layer a particular layer of the file and strip it off |
742 | * @ip: the inode | 703 | * @ip: the inode |
743 | * @dibh: the dinode buffer | 704 | * @dibh: the dinode buffer |
@@ -752,9 +713,8 @@ out: | |||
752 | 713 | ||
753 | static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | 714 | static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, |
754 | struct buffer_head *bh, __be64 *top, __be64 *bottom, | 715 | struct buffer_head *bh, __be64 *top, __be64 *bottom, |
755 | unsigned int height, void *data) | 716 | unsigned int height, struct strip_mine *sm) |
756 | { | 717 | { |
757 | struct strip_mine *sm = data; | ||
758 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 718 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
759 | struct gfs2_rgrp_list rlist; | 719 | struct gfs2_rgrp_list rlist; |
760 | u64 bn, bstart; | 720 | u64 bn, bstart; |
@@ -783,11 +743,6 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
783 | else if (ip->i_depth) | 743 | else if (ip->i_depth) |
784 | revokes = sdp->sd_inptrs; | 744 | revokes = sdp->sd_inptrs; |
785 | 745 | ||
786 | if (ip != GFS2_I(sdp->sd_rindex)) | ||
787 | error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); | ||
788 | else if (!sdp->sd_rgrps) | ||
789 | error = gfs2_ri_update(ip); | ||
790 | |||
791 | if (error) | 746 | if (error) |
792 | return error; | 747 | return error; |
793 | 748 | ||
@@ -805,7 +760,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
805 | blen++; | 760 | blen++; |
806 | else { | 761 | else { |
807 | if (bstart) | 762 | if (bstart) |
808 | gfs2_rlist_add(sdp, &rlist, bstart); | 763 | gfs2_rlist_add(ip, &rlist, bstart); |
809 | 764 | ||
810 | bstart = bn; | 765 | bstart = bn; |
811 | blen = 1; | 766 | blen = 1; |
@@ -813,7 +768,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
813 | } | 768 | } |
814 | 769 | ||
815 | if (bstart) | 770 | if (bstart) |
816 | gfs2_rlist_add(sdp, &rlist, bstart); | 771 | gfs2_rlist_add(ip, &rlist, bstart); |
817 | else | 772 | else |
818 | goto out; /* Nothing to do */ | 773 | goto out; /* Nothing to do */ |
819 | 774 | ||
@@ -887,12 +842,82 @@ out_rg_gunlock: | |||
887 | out_rlist: | 842 | out_rlist: |
888 | gfs2_rlist_free(&rlist); | 843 | gfs2_rlist_free(&rlist); |
889 | out: | 844 | out: |
890 | if (ip != GFS2_I(sdp->sd_rindex)) | ||
891 | gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); | ||
892 | return error; | 845 | return error; |
893 | } | 846 | } |
894 | 847 | ||
895 | /** | 848 | /** |
849 | * recursive_scan - recursively scan through the end of a file | ||
850 | * @ip: the inode | ||
851 | * @dibh: the dinode buffer | ||
852 | * @mp: the path through the metadata to the point to start | ||
853 | * @height: the height the recursion is at | ||
854 | * @block: the indirect block to look at | ||
855 | * @first: 1 if this is the first block | ||
856 | * @sm: data opaque to this function to pass to @bc | ||
857 | * | ||
858 | * When this is first called @height and @block should be zero and | ||
859 | * @first should be 1. | ||
860 | * | ||
861 | * Returns: errno | ||
862 | */ | ||
863 | |||
864 | static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, | ||
865 | struct metapath *mp, unsigned int height, | ||
866 | u64 block, int first, struct strip_mine *sm) | ||
867 | { | ||
868 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
869 | struct buffer_head *bh = NULL; | ||
870 | __be64 *top, *bottom; | ||
871 | u64 bn; | ||
872 | int error; | ||
873 | int mh_size = sizeof(struct gfs2_meta_header); | ||
874 | |||
875 | if (!height) { | ||
876 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
877 | if (error) | ||
878 | return error; | ||
879 | dibh = bh; | ||
880 | |||
881 | top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; | ||
882 | bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; | ||
883 | } else { | ||
884 | error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh); | ||
885 | if (error) | ||
886 | return error; | ||
887 | |||
888 | top = (__be64 *)(bh->b_data + mh_size) + | ||
889 | (first ? mp->mp_list[height] : 0); | ||
890 | |||
891 | bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; | ||
892 | } | ||
893 | |||
894 | error = do_strip(ip, dibh, bh, top, bottom, height, sm); | ||
895 | if (error) | ||
896 | goto out; | ||
897 | |||
898 | if (height < ip->i_height - 1) { | ||
899 | |||
900 | gfs2_metapath_ra(ip->i_gl, bh, top); | ||
901 | |||
902 | for (; top < bottom; top++, first = 0) { | ||
903 | if (!*top) | ||
904 | continue; | ||
905 | |||
906 | bn = be64_to_cpu(*top); | ||
907 | |||
908 | error = recursive_scan(ip, dibh, mp, height + 1, bn, | ||
909 | first, sm); | ||
910 | if (error) | ||
911 | break; | ||
912 | } | ||
913 | } | ||
914 | out: | ||
915 | brelse(bh); | ||
916 | return error; | ||
917 | } | ||
918 | |||
919 | |||
920 | /** | ||
896 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate | 921 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate |
897 | * | 922 | * |
898 | * This is partly borrowed from ext3. | 923 | * This is partly borrowed from ext3. |
@@ -1031,7 +1056,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) | |||
1031 | sm.sm_first = !!size; | 1056 | sm.sm_first = !!size; |
1032 | sm.sm_height = height; | 1057 | sm.sm_height = height; |
1033 | 1058 | ||
1034 | error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm); | 1059 | error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm); |
1035 | if (error) | 1060 | if (error) |
1036 | break; | 1061 | break; |
1037 | } | 1062 | } |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 1cc2f8ec52a2..8ccad2467cb6 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -240,16 +240,15 @@ fail: | |||
240 | return error; | 240 | return error; |
241 | } | 241 | } |
242 | 242 | ||
243 | static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf, | 243 | static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf, |
244 | u64 offset, unsigned int size) | 244 | unsigned int size) |
245 | { | 245 | { |
246 | struct buffer_head *dibh; | 246 | struct buffer_head *dibh; |
247 | int error; | 247 | int error; |
248 | 248 | ||
249 | error = gfs2_meta_inode_buffer(ip, &dibh); | 249 | error = gfs2_meta_inode_buffer(ip, &dibh); |
250 | if (!error) { | 250 | if (!error) { |
251 | offset += sizeof(struct gfs2_dinode); | 251 | memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size); |
252 | memcpy(buf, dibh->b_data + offset, size); | ||
253 | brelse(dibh); | 252 | brelse(dibh); |
254 | } | 253 | } |
255 | 254 | ||
@@ -261,13 +260,12 @@ static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf, | |||
261 | * gfs2_dir_read_data - Read a data from a directory inode | 260 | * gfs2_dir_read_data - Read a data from a directory inode |
262 | * @ip: The GFS2 Inode | 261 | * @ip: The GFS2 Inode |
263 | * @buf: The buffer to place result into | 262 | * @buf: The buffer to place result into |
264 | * @offset: File offset to begin jdata_readng from | ||
265 | * @size: Amount of data to transfer | 263 | * @size: Amount of data to transfer |
266 | * | 264 | * |
267 | * Returns: The amount of data actually copied or the error | 265 | * Returns: The amount of data actually copied or the error |
268 | */ | 266 | */ |
269 | static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, | 267 | static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf, |
270 | unsigned int size, unsigned ra) | 268 | unsigned int size) |
271 | { | 269 | { |
272 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 270 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
273 | u64 lblock, dblock; | 271 | u64 lblock, dblock; |
@@ -275,24 +273,14 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, | |||
275 | unsigned int o; | 273 | unsigned int o; |
276 | int copied = 0; | 274 | int copied = 0; |
277 | int error = 0; | 275 | int error = 0; |
278 | u64 disksize = i_size_read(&ip->i_inode); | ||
279 | |||
280 | if (offset >= disksize) | ||
281 | return 0; | ||
282 | |||
283 | if (offset + size > disksize) | ||
284 | size = disksize - offset; | ||
285 | |||
286 | if (!size) | ||
287 | return 0; | ||
288 | 276 | ||
289 | if (gfs2_is_stuffed(ip)) | 277 | if (gfs2_is_stuffed(ip)) |
290 | return gfs2_dir_read_stuffed(ip, buf, offset, size); | 278 | return gfs2_dir_read_stuffed(ip, buf, size); |
291 | 279 | ||
292 | if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) | 280 | if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) |
293 | return -EINVAL; | 281 | return -EINVAL; |
294 | 282 | ||
295 | lblock = offset; | 283 | lblock = 0; |
296 | o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); | 284 | o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); |
297 | 285 | ||
298 | while (copied < size) { | 286 | while (copied < size) { |
@@ -311,8 +299,6 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, | |||
311 | if (error || !dblock) | 299 | if (error || !dblock) |
312 | goto fail; | 300 | goto fail; |
313 | BUG_ON(extlen < 1); | 301 | BUG_ON(extlen < 1); |
314 | if (!ra) | ||
315 | extlen = 1; | ||
316 | bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); | 302 | bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); |
317 | } else { | 303 | } else { |
318 | error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh); | 304 | error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh); |
@@ -328,7 +314,7 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, | |||
328 | extlen--; | 314 | extlen--; |
329 | memcpy(buf, bh->b_data + o, amount); | 315 | memcpy(buf, bh->b_data + o, amount); |
330 | brelse(bh); | 316 | brelse(bh); |
331 | buf += amount; | 317 | buf += (amount/sizeof(__be64)); |
332 | copied += amount; | 318 | copied += amount; |
333 | lblock++; | 319 | lblock++; |
334 | o = sizeof(struct gfs2_meta_header); | 320 | o = sizeof(struct gfs2_meta_header); |
@@ -371,7 +357,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) | |||
371 | if (hc == NULL) | 357 | if (hc == NULL) |
372 | return ERR_PTR(-ENOMEM); | 358 | return ERR_PTR(-ENOMEM); |
373 | 359 | ||
374 | ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1); | 360 | ret = gfs2_dir_read_data(ip, hc, hsize); |
375 | if (ret < 0) { | 361 | if (ret < 0) { |
376 | kfree(hc); | 362 | kfree(hc); |
377 | return ERR_PTR(ret); | 363 | return ERR_PTR(ret); |
@@ -1695,7 +1681,6 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) | |||
1695 | const struct qstr *name = &dentry->d_name; | 1681 | const struct qstr *name = &dentry->d_name; |
1696 | struct gfs2_dirent *dent, *prev = NULL; | 1682 | struct gfs2_dirent *dent, *prev = NULL; |
1697 | struct buffer_head *bh; | 1683 | struct buffer_head *bh; |
1698 | int error; | ||
1699 | 1684 | ||
1700 | /* Returns _either_ the entry (if its first in block) or the | 1685 | /* Returns _either_ the entry (if its first in block) or the |
1701 | previous entry otherwise */ | 1686 | previous entry otherwise */ |
@@ -1724,22 +1709,15 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) | |||
1724 | } | 1709 | } |
1725 | brelse(bh); | 1710 | brelse(bh); |
1726 | 1711 | ||
1727 | error = gfs2_meta_inode_buffer(dip, &bh); | ||
1728 | if (error) | ||
1729 | return error; | ||
1730 | |||
1731 | if (!dip->i_entries) | 1712 | if (!dip->i_entries) |
1732 | gfs2_consist_inode(dip); | 1713 | gfs2_consist_inode(dip); |
1733 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | ||
1734 | dip->i_entries--; | 1714 | dip->i_entries--; |
1735 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; | 1715 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; |
1736 | if (S_ISDIR(dentry->d_inode->i_mode)) | 1716 | if (S_ISDIR(dentry->d_inode->i_mode)) |
1737 | drop_nlink(&dip->i_inode); | 1717 | drop_nlink(&dip->i_inode); |
1738 | gfs2_dinode_out(dip, bh->b_data); | ||
1739 | brelse(bh); | ||
1740 | mark_inode_dirty(&dip->i_inode); | 1718 | mark_inode_dirty(&dip->i_inode); |
1741 | 1719 | ||
1742 | return error; | 1720 | return 0; |
1743 | } | 1721 | } |
1744 | 1722 | ||
1745 | /** | 1723 | /** |
@@ -1829,10 +1807,6 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
1829 | if (error) | 1807 | if (error) |
1830 | goto out_put; | 1808 | goto out_put; |
1831 | 1809 | ||
1832 | error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); | ||
1833 | if (error) | ||
1834 | goto out_qs; | ||
1835 | |||
1836 | /* Count the number of leaves */ | 1810 | /* Count the number of leaves */ |
1837 | bh = leaf_bh; | 1811 | bh = leaf_bh; |
1838 | 1812 | ||
@@ -1847,7 +1821,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
1847 | if (blk != leaf_no) | 1821 | if (blk != leaf_no) |
1848 | brelse(bh); | 1822 | brelse(bh); |
1849 | 1823 | ||
1850 | gfs2_rlist_add(sdp, &rlist, blk); | 1824 | gfs2_rlist_add(dip, &rlist, blk); |
1851 | l_blocks++; | 1825 | l_blocks++; |
1852 | } | 1826 | } |
1853 | 1827 | ||
@@ -1911,8 +1885,6 @@ out_rg_gunlock: | |||
1911 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); | 1885 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); |
1912 | out_rlist: | 1886 | out_rlist: |
1913 | gfs2_rlist_free(&rlist); | 1887 | gfs2_rlist_free(&rlist); |
1914 | gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); | ||
1915 | out_qs: | ||
1916 | gfs2_quota_unhold(dip); | 1888 | gfs2_quota_unhold(dip); |
1917 | out_put: | 1889 | out_put: |
1918 | gfs2_alloc_put(dip); | 1890 | gfs2_alloc_put(dip); |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index edeb9e802903..ce36a56dfeac 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -59,15 +59,24 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) | |||
59 | struct gfs2_holder i_gh; | 59 | struct gfs2_holder i_gh; |
60 | loff_t error; | 60 | loff_t error; |
61 | 61 | ||
62 | if (origin == 2) { | 62 | switch (origin) { |
63 | case SEEK_END: /* These reference inode->i_size */ | ||
64 | case SEEK_DATA: | ||
65 | case SEEK_HOLE: | ||
63 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, | 66 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, |
64 | &i_gh); | 67 | &i_gh); |
65 | if (!error) { | 68 | if (!error) { |
66 | error = generic_file_llseek_unlocked(file, offset, origin); | 69 | error = generic_file_llseek(file, offset, origin); |
67 | gfs2_glock_dq_uninit(&i_gh); | 70 | gfs2_glock_dq_uninit(&i_gh); |
68 | } | 71 | } |
69 | } else | 72 | break; |
70 | error = generic_file_llseek_unlocked(file, offset, origin); | 73 | case SEEK_CUR: |
74 | case SEEK_SET: | ||
75 | error = generic_file_llseek(file, offset, origin); | ||
76 | break; | ||
77 | default: | ||
78 | error = -EINVAL; | ||
79 | } | ||
71 | 80 | ||
72 | return error; | 81 | return error; |
73 | } | 82 | } |
@@ -357,8 +366,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
357 | unsigned int data_blocks, ind_blocks, rblocks; | 366 | unsigned int data_blocks, ind_blocks, rblocks; |
358 | struct gfs2_holder gh; | 367 | struct gfs2_holder gh; |
359 | struct gfs2_alloc *al; | 368 | struct gfs2_alloc *al; |
369 | loff_t size; | ||
360 | int ret; | 370 | int ret; |
361 | 371 | ||
372 | /* Wait if fs is frozen. This is racy so we check again later on | ||
373 | * and retry if the fs has been frozen after the page lock has | ||
374 | * been acquired | ||
375 | */ | ||
376 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
377 | |||
362 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 378 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
363 | ret = gfs2_glock_nq(&gh); | 379 | ret = gfs2_glock_nq(&gh); |
364 | if (ret) | 380 | if (ret) |
@@ -367,8 +383,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
367 | set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); | 383 | set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); |
368 | set_bit(GIF_SW_PAGED, &ip->i_flags); | 384 | set_bit(GIF_SW_PAGED, &ip->i_flags); |
369 | 385 | ||
370 | if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) | 386 | if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) { |
387 | lock_page(page); | ||
388 | if (!PageUptodate(page) || page->mapping != inode->i_mapping) { | ||
389 | ret = -EAGAIN; | ||
390 | unlock_page(page); | ||
391 | } | ||
371 | goto out_unlock; | 392 | goto out_unlock; |
393 | } | ||
394 | |||
372 | ret = -ENOMEM; | 395 | ret = -ENOMEM; |
373 | al = gfs2_alloc_get(ip); | 396 | al = gfs2_alloc_get(ip); |
374 | if (al == NULL) | 397 | if (al == NULL) |
@@ -388,7 +411,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
388 | rblocks += data_blocks ? data_blocks : 1; | 411 | rblocks += data_blocks ? data_blocks : 1; |
389 | if (ind_blocks || data_blocks) { | 412 | if (ind_blocks || data_blocks) { |
390 | rblocks += RES_STATFS + RES_QUOTA; | 413 | rblocks += RES_STATFS + RES_QUOTA; |
391 | rblocks += gfs2_rg_blocks(al); | 414 | rblocks += gfs2_rg_blocks(ip); |
392 | } | 415 | } |
393 | ret = gfs2_trans_begin(sdp, rblocks, 0); | 416 | ret = gfs2_trans_begin(sdp, rblocks, 0); |
394 | if (ret) | 417 | if (ret) |
@@ -396,21 +419,29 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
396 | 419 | ||
397 | lock_page(page); | 420 | lock_page(page); |
398 | ret = -EINVAL; | 421 | ret = -EINVAL; |
399 | last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT; | 422 | size = i_size_read(inode); |
400 | if (page->index > last_index) | 423 | last_index = (size - 1) >> PAGE_CACHE_SHIFT; |
401 | goto out_unlock_page; | 424 | /* Check page index against inode size */ |
425 | if (size == 0 || (page->index > last_index)) | ||
426 | goto out_trans_end; | ||
427 | |||
428 | ret = -EAGAIN; | ||
429 | /* If truncated, we must retry the operation, we may have raced | ||
430 | * with the glock demotion code. | ||
431 | */ | ||
432 | if (!PageUptodate(page) || page->mapping != inode->i_mapping) | ||
433 | goto out_trans_end; | ||
434 | |||
435 | /* Unstuff, if required, and allocate backing blocks for page */ | ||
402 | ret = 0; | 436 | ret = 0; |
403 | if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping) | 437 | if (gfs2_is_stuffed(ip)) |
404 | goto out_unlock_page; | ||
405 | if (gfs2_is_stuffed(ip)) { | ||
406 | ret = gfs2_unstuff_dinode(ip, page); | 438 | ret = gfs2_unstuff_dinode(ip, page); |
407 | if (ret) | 439 | if (ret == 0) |
408 | goto out_unlock_page; | 440 | ret = gfs2_allocate_page_backing(page); |
409 | } | ||
410 | ret = gfs2_allocate_page_backing(page); | ||
411 | 441 | ||
412 | out_unlock_page: | 442 | out_trans_end: |
413 | unlock_page(page); | 443 | if (ret) |
444 | unlock_page(page); | ||
414 | gfs2_trans_end(sdp); | 445 | gfs2_trans_end(sdp); |
415 | out_trans_fail: | 446 | out_trans_fail: |
416 | gfs2_inplace_release(ip); | 447 | gfs2_inplace_release(ip); |
@@ -422,11 +453,17 @@ out_unlock: | |||
422 | gfs2_glock_dq(&gh); | 453 | gfs2_glock_dq(&gh); |
423 | out: | 454 | out: |
424 | gfs2_holder_uninit(&gh); | 455 | gfs2_holder_uninit(&gh); |
425 | if (ret == -ENOMEM) | 456 | if (ret == 0) { |
426 | ret = VM_FAULT_OOM; | 457 | set_page_dirty(page); |
427 | else if (ret) | 458 | /* This check must be post dropping of transaction lock */ |
428 | ret = VM_FAULT_SIGBUS; | 459 | if (inode->i_sb->s_frozen == SB_UNFROZEN) { |
429 | return ret; | 460 | wait_on_page_writeback(page); |
461 | } else { | ||
462 | ret = -EAGAIN; | ||
463 | unlock_page(page); | ||
464 | } | ||
465 | } | ||
466 | return block_page_mkwrite_return(ret); | ||
430 | } | 467 | } |
431 | 468 | ||
432 | static const struct vm_operations_struct gfs2_vm_ops = { | 469 | static const struct vm_operations_struct gfs2_vm_ops = { |
@@ -551,8 +588,16 @@ static int gfs2_close(struct inode *inode, struct file *file) | |||
551 | * @end: the end position in the file to sync | 588 | * @end: the end position in the file to sync |
552 | * @datasync: set if we can ignore timestamp changes | 589 | * @datasync: set if we can ignore timestamp changes |
553 | * | 590 | * |
554 | * The VFS will flush data for us. We only need to worry | 591 | * We split the data flushing here so that we don't wait for the data |
555 | * about metadata here. | 592 | * until after we've also sent the metadata to disk. Note that for |
593 | * data=ordered, we will write & wait for the data at the log flush | ||
594 | * stage anyway, so this is unlikely to make much of a difference | ||
595 | * except in the data=writeback case. | ||
596 | * | ||
597 | * If the fdatawrite fails due to any reason except -EIO, we will | ||
598 | * continue the remainder of the fsync, although we'll still report | ||
599 | * the error at the end. This is to match filemap_write_and_wait_range() | ||
600 | * behaviour. | ||
556 | * | 601 | * |
557 | * Returns: errno | 602 | * Returns: errno |
558 | */ | 603 | */ |
@@ -560,30 +605,34 @@ static int gfs2_close(struct inode *inode, struct file *file) | |||
560 | static int gfs2_fsync(struct file *file, loff_t start, loff_t end, | 605 | static int gfs2_fsync(struct file *file, loff_t start, loff_t end, |
561 | int datasync) | 606 | int datasync) |
562 | { | 607 | { |
563 | struct inode *inode = file->f_mapping->host; | 608 | struct address_space *mapping = file->f_mapping; |
609 | struct inode *inode = mapping->host; | ||
564 | int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); | 610 | int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); |
565 | struct gfs2_inode *ip = GFS2_I(inode); | 611 | struct gfs2_inode *ip = GFS2_I(inode); |
566 | int ret; | 612 | int ret, ret1 = 0; |
567 | 613 | ||
568 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 614 | if (mapping->nrpages) { |
569 | if (ret) | 615 | ret1 = filemap_fdatawrite_range(mapping, start, end); |
570 | return ret; | 616 | if (ret1 == -EIO) |
571 | mutex_lock(&inode->i_mutex); | 617 | return ret1; |
618 | } | ||
572 | 619 | ||
573 | if (datasync) | 620 | if (datasync) |
574 | sync_state &= ~I_DIRTY_SYNC; | 621 | sync_state &= ~I_DIRTY_SYNC; |
575 | 622 | ||
576 | if (sync_state) { | 623 | if (sync_state) { |
577 | ret = sync_inode_metadata(inode, 1); | 624 | ret = sync_inode_metadata(inode, 1); |
578 | if (ret) { | 625 | if (ret) |
579 | mutex_unlock(&inode->i_mutex); | ||
580 | return ret; | 626 | return ret; |
581 | } | 627 | if (gfs2_is_jdata(ip)) |
582 | gfs2_ail_flush(ip->i_gl); | 628 | filemap_write_and_wait(mapping); |
629 | gfs2_ail_flush(ip->i_gl, 1); | ||
583 | } | 630 | } |
584 | 631 | ||
585 | mutex_unlock(&inode->i_mutex); | 632 | if (mapping->nrpages) |
586 | return 0; | 633 | ret = filemap_fdatawait_range(mapping, start, end); |
634 | |||
635 | return ret ? ret : ret1; | ||
587 | } | 636 | } |
588 | 637 | ||
589 | /** | 638 | /** |
@@ -620,135 +669,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
620 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 669 | return generic_file_aio_write(iocb, iov, nr_segs, pos); |
621 | } | 670 | } |
622 | 671 | ||
623 | static int empty_write_end(struct page *page, unsigned from, | ||
624 | unsigned to, int mode) | ||
625 | { | ||
626 | struct inode *inode = page->mapping->host; | ||
627 | struct gfs2_inode *ip = GFS2_I(inode); | ||
628 | struct buffer_head *bh; | ||
629 | unsigned offset, blksize = 1 << inode->i_blkbits; | ||
630 | pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
631 | |||
632 | zero_user(page, from, to-from); | ||
633 | mark_page_accessed(page); | ||
634 | |||
635 | if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
636 | if (!gfs2_is_writeback(ip)) | ||
637 | gfs2_page_add_databufs(ip, page, from, to); | ||
638 | |||
639 | block_commit_write(page, from, to); | ||
640 | return 0; | ||
641 | } | ||
642 | |||
643 | offset = 0; | ||
644 | bh = page_buffers(page); | ||
645 | while (offset < to) { | ||
646 | if (offset >= from) { | ||
647 | set_buffer_uptodate(bh); | ||
648 | mark_buffer_dirty(bh); | ||
649 | clear_buffer_new(bh); | ||
650 | write_dirty_buffer(bh, WRITE); | ||
651 | } | ||
652 | offset += blksize; | ||
653 | bh = bh->b_this_page; | ||
654 | } | ||
655 | |||
656 | offset = 0; | ||
657 | bh = page_buffers(page); | ||
658 | while (offset < to) { | ||
659 | if (offset >= from) { | ||
660 | wait_on_buffer(bh); | ||
661 | if (!buffer_uptodate(bh)) | ||
662 | return -EIO; | ||
663 | } | ||
664 | offset += blksize; | ||
665 | bh = bh->b_this_page; | ||
666 | } | ||
667 | return 0; | ||
668 | } | ||
669 | |||
670 | static int needs_empty_write(sector_t block, struct inode *inode) | ||
671 | { | ||
672 | int error; | ||
673 | struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; | ||
674 | |||
675 | bh_map.b_size = 1 << inode->i_blkbits; | ||
676 | error = gfs2_block_map(inode, block, &bh_map, 0); | ||
677 | if (unlikely(error)) | ||
678 | return error; | ||
679 | return !buffer_mapped(&bh_map); | ||
680 | } | ||
681 | |||
682 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to, | ||
683 | int mode) | ||
684 | { | ||
685 | struct inode *inode = page->mapping->host; | ||
686 | unsigned start, end, next, blksize; | ||
687 | sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
688 | int ret; | ||
689 | |||
690 | blksize = 1 << inode->i_blkbits; | ||
691 | next = end = 0; | ||
692 | while (next < from) { | ||
693 | next += blksize; | ||
694 | block++; | ||
695 | } | ||
696 | start = next; | ||
697 | do { | ||
698 | next += blksize; | ||
699 | ret = needs_empty_write(block, inode); | ||
700 | if (unlikely(ret < 0)) | ||
701 | return ret; | ||
702 | if (ret == 0) { | ||
703 | if (end) { | ||
704 | ret = __block_write_begin(page, start, end - start, | ||
705 | gfs2_block_map); | ||
706 | if (unlikely(ret)) | ||
707 | return ret; | ||
708 | ret = empty_write_end(page, start, end, mode); | ||
709 | if (unlikely(ret)) | ||
710 | return ret; | ||
711 | end = 0; | ||
712 | } | ||
713 | start = next; | ||
714 | } | ||
715 | else | ||
716 | end = next; | ||
717 | block++; | ||
718 | } while (next < to); | ||
719 | |||
720 | if (end) { | ||
721 | ret = __block_write_begin(page, start, end - start, gfs2_block_map); | ||
722 | if (unlikely(ret)) | ||
723 | return ret; | ||
724 | ret = empty_write_end(page, start, end, mode); | ||
725 | if (unlikely(ret)) | ||
726 | return ret; | ||
727 | } | ||
728 | |||
729 | return 0; | ||
730 | } | ||
731 | |||
732 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | 672 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, |
733 | int mode) | 673 | int mode) |
734 | { | 674 | { |
735 | struct gfs2_inode *ip = GFS2_I(inode); | 675 | struct gfs2_inode *ip = GFS2_I(inode); |
736 | struct buffer_head *dibh; | 676 | struct buffer_head *dibh; |
737 | int error; | 677 | int error; |
738 | u64 start = offset >> PAGE_CACHE_SHIFT; | 678 | unsigned int nr_blks; |
739 | unsigned int start_offset = offset & ~PAGE_CACHE_MASK; | 679 | sector_t lblock = offset >> inode->i_blkbits; |
740 | u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; | ||
741 | pgoff_t curr; | ||
742 | struct page *page; | ||
743 | unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; | ||
744 | unsigned int from, to; | ||
745 | |||
746 | if (!end_offset) | ||
747 | end_offset = PAGE_CACHE_SIZE; | ||
748 | 680 | ||
749 | error = gfs2_meta_inode_buffer(ip, &dibh); | 681 | error = gfs2_meta_inode_buffer(ip, &dibh); |
750 | if (unlikely(error)) | 682 | if (unlikely(error)) |
751 | goto out; | 683 | return error; |
752 | 684 | ||
753 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 685 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
754 | 686 | ||
@@ -758,40 +690,31 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | |||
758 | goto out; | 690 | goto out; |
759 | } | 691 | } |
760 | 692 | ||
761 | curr = start; | 693 | while (len) { |
762 | offset = start << PAGE_CACHE_SHIFT; | 694 | struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; |
763 | from = start_offset; | 695 | bh_map.b_size = len; |
764 | to = PAGE_CACHE_SIZE; | 696 | set_buffer_zeronew(&bh_map); |
765 | while (curr <= end) { | ||
766 | page = grab_cache_page_write_begin(inode->i_mapping, curr, | ||
767 | AOP_FLAG_NOFS); | ||
768 | if (unlikely(!page)) { | ||
769 | error = -ENOMEM; | ||
770 | goto out; | ||
771 | } | ||
772 | 697 | ||
773 | if (curr == end) | 698 | error = gfs2_block_map(inode, lblock, &bh_map, 1); |
774 | to = end_offset; | 699 | if (unlikely(error)) |
775 | error = write_empty_blocks(page, from, to, mode); | ||
776 | if (!error && offset + to > inode->i_size && | ||
777 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
778 | i_size_write(inode, offset + to); | ||
779 | } | ||
780 | unlock_page(page); | ||
781 | page_cache_release(page); | ||
782 | if (error) | ||
783 | goto out; | 700 | goto out; |
784 | curr++; | 701 | len -= bh_map.b_size; |
785 | offset += PAGE_CACHE_SIZE; | 702 | nr_blks = bh_map.b_size >> inode->i_blkbits; |
786 | from = 0; | 703 | lblock += nr_blks; |
704 | if (!buffer_new(&bh_map)) | ||
705 | continue; | ||
706 | if (unlikely(!buffer_zeronew(&bh_map))) { | ||
707 | error = -EIO; | ||
708 | goto out; | ||
709 | } | ||
787 | } | 710 | } |
711 | if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) | ||
712 | i_size_write(inode, offset + len); | ||
788 | 713 | ||
789 | gfs2_dinode_out(ip, dibh->b_data); | ||
790 | mark_inode_dirty(inode); | 714 | mark_inode_dirty(inode); |
791 | 715 | ||
792 | brelse(dibh); | ||
793 | |||
794 | out: | 716 | out: |
717 | brelse(dibh); | ||
795 | return error; | 718 | return error; |
796 | } | 719 | } |
797 | 720 | ||
@@ -799,7 +722,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, | |||
799 | unsigned int *data_blocks, unsigned int *ind_blocks) | 722 | unsigned int *data_blocks, unsigned int *ind_blocks) |
800 | { | 723 | { |
801 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 724 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
802 | unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; | 725 | unsigned int max_blocks = ip->i_rgd->rd_free_clone; |
803 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); | 726 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); |
804 | 727 | ||
805 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { | 728 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { |
@@ -831,6 +754,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | |||
831 | int error; | 754 | int error; |
832 | loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); | 755 | loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); |
833 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | 756 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; |
757 | loff_t max_chunk_size = UINT_MAX & bsize_mask; | ||
834 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | 758 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; |
835 | 759 | ||
836 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | 760 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ |
@@ -884,11 +808,12 @@ retry: | |||
884 | goto out_qunlock; | 808 | goto out_qunlock; |
885 | } | 809 | } |
886 | max_bytes = bytes; | 810 | max_bytes = bytes; |
887 | calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); | 811 | calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, |
812 | &max_bytes, &data_blocks, &ind_blocks); | ||
888 | al->al_requested = data_blocks + ind_blocks; | 813 | al->al_requested = data_blocks + ind_blocks; |
889 | 814 | ||
890 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | 815 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + |
891 | RES_RG_HDR + gfs2_rg_blocks(al); | 816 | RES_RG_HDR + gfs2_rg_blocks(ip); |
892 | if (gfs2_is_jdata(ip)) | 817 | if (gfs2_is_jdata(ip)) |
893 | rblocks += data_blocks ? data_blocks : 1; | 818 | rblocks += data_blocks ? data_blocks : 1; |
894 | 819 | ||
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index da21ecaafcc2..78418b4fa857 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -28,40 +28,55 @@ | |||
28 | #include "trans.h" | 28 | #include "trans.h" |
29 | #include "dir.h" | 29 | #include "dir.h" |
30 | 30 | ||
31 | static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) | ||
32 | { | ||
33 | fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n", | ||
34 | bh, (unsigned long long)bh->b_blocknr, bh->b_state, | ||
35 | bh->b_page->mapping, bh->b_page->flags); | ||
36 | fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n", | ||
37 | gl->gl_name.ln_type, gl->gl_name.ln_number, | ||
38 | gfs2_glock2aspace(gl)); | ||
39 | gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n"); | ||
40 | } | ||
41 | |||
31 | /** | 42 | /** |
32 | * __gfs2_ail_flush - remove all buffers for a given lock from the AIL | 43 | * __gfs2_ail_flush - remove all buffers for a given lock from the AIL |
33 | * @gl: the glock | 44 | * @gl: the glock |
45 | * @fsync: set when called from fsync (not all buffers will be clean) | ||
34 | * | 46 | * |
35 | * None of the buffers should be dirty, locked, or pinned. | 47 | * None of the buffers should be dirty, locked, or pinned. |
36 | */ | 48 | */ |
37 | 49 | ||
38 | static void __gfs2_ail_flush(struct gfs2_glock *gl) | 50 | static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) |
39 | { | 51 | { |
40 | struct gfs2_sbd *sdp = gl->gl_sbd; | 52 | struct gfs2_sbd *sdp = gl->gl_sbd; |
41 | struct list_head *head = &gl->gl_ail_list; | 53 | struct list_head *head = &gl->gl_ail_list; |
42 | struct gfs2_bufdata *bd; | 54 | struct gfs2_bufdata *bd, *tmp; |
43 | struct buffer_head *bh; | 55 | struct buffer_head *bh; |
56 | const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); | ||
57 | sector_t blocknr; | ||
44 | 58 | ||
59 | gfs2_log_lock(sdp); | ||
45 | spin_lock(&sdp->sd_ail_lock); | 60 | spin_lock(&sdp->sd_ail_lock); |
46 | while (!list_empty(head)) { | 61 | list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) { |
47 | bd = list_entry(head->next, struct gfs2_bufdata, | ||
48 | bd_ail_gl_list); | ||
49 | bh = bd->bd_bh; | 62 | bh = bd->bd_bh; |
50 | gfs2_remove_from_ail(bd); | 63 | if (bh->b_state & b_state) { |
51 | bd->bd_bh = NULL; | 64 | if (fsync) |
65 | continue; | ||
66 | gfs2_ail_error(gl, bh); | ||
67 | } | ||
68 | blocknr = bh->b_blocknr; | ||
52 | bh->b_private = NULL; | 69 | bh->b_private = NULL; |
53 | spin_unlock(&sdp->sd_ail_lock); | 70 | gfs2_remove_from_ail(bd); /* drops ref on bh */ |
54 | 71 | ||
55 | bd->bd_blkno = bh->b_blocknr; | 72 | bd->bd_bh = NULL; |
56 | gfs2_log_lock(sdp); | 73 | bd->bd_blkno = blocknr; |
57 | gfs2_assert_withdraw(sdp, !buffer_busy(bh)); | ||
58 | gfs2_trans_add_revoke(sdp, bd); | ||
59 | gfs2_log_unlock(sdp); | ||
60 | 74 | ||
61 | spin_lock(&sdp->sd_ail_lock); | 75 | gfs2_trans_add_revoke(sdp, bd); |
62 | } | 76 | } |
63 | gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); | 77 | BUG_ON(!fsync && atomic_read(&gl->gl_ail_count)); |
64 | spin_unlock(&sdp->sd_ail_lock); | 78 | spin_unlock(&sdp->sd_ail_lock); |
79 | gfs2_log_unlock(sdp); | ||
65 | } | 80 | } |
66 | 81 | ||
67 | 82 | ||
@@ -84,13 +99,13 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | |||
84 | BUG_ON(current->journal_info); | 99 | BUG_ON(current->journal_info); |
85 | current->journal_info = &tr; | 100 | current->journal_info = &tr; |
86 | 101 | ||
87 | __gfs2_ail_flush(gl); | 102 | __gfs2_ail_flush(gl, 0); |
88 | 103 | ||
89 | gfs2_trans_end(sdp); | 104 | gfs2_trans_end(sdp); |
90 | gfs2_log_flush(sdp, NULL); | 105 | gfs2_log_flush(sdp, NULL); |
91 | } | 106 | } |
92 | 107 | ||
93 | void gfs2_ail_flush(struct gfs2_glock *gl) | 108 | void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) |
94 | { | 109 | { |
95 | struct gfs2_sbd *sdp = gl->gl_sbd; | 110 | struct gfs2_sbd *sdp = gl->gl_sbd; |
96 | unsigned int revokes = atomic_read(&gl->gl_ail_count); | 111 | unsigned int revokes = atomic_read(&gl->gl_ail_count); |
@@ -102,7 +117,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl) | |||
102 | ret = gfs2_trans_begin(sdp, 0, revokes); | 117 | ret = gfs2_trans_begin(sdp, 0, revokes); |
103 | if (ret) | 118 | if (ret) |
104 | return; | 119 | return; |
105 | __gfs2_ail_flush(gl); | 120 | __gfs2_ail_flush(gl, fsync); |
106 | gfs2_trans_end(sdp); | 121 | gfs2_trans_end(sdp); |
107 | gfs2_log_flush(sdp, NULL); | 122 | gfs2_log_flush(sdp, NULL); |
108 | } | 123 | } |
@@ -119,6 +134,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl) | |||
119 | static void rgrp_go_sync(struct gfs2_glock *gl) | 134 | static void rgrp_go_sync(struct gfs2_glock *gl) |
120 | { | 135 | { |
121 | struct address_space *metamapping = gfs2_glock2aspace(gl); | 136 | struct address_space *metamapping = gfs2_glock2aspace(gl); |
137 | struct gfs2_rgrpd *rgd; | ||
122 | int error; | 138 | int error; |
123 | 139 | ||
124 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) | 140 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) |
@@ -130,6 +146,12 @@ static void rgrp_go_sync(struct gfs2_glock *gl) | |||
130 | error = filemap_fdatawait(metamapping); | 146 | error = filemap_fdatawait(metamapping); |
131 | mapping_set_error(metamapping, error); | 147 | mapping_set_error(metamapping, error); |
132 | gfs2_ail_empty_gl(gl); | 148 | gfs2_ail_empty_gl(gl); |
149 | |||
150 | spin_lock(&gl->gl_spin); | ||
151 | rgd = gl->gl_object; | ||
152 | if (rgd) | ||
153 | gfs2_free_clones(rgd); | ||
154 | spin_unlock(&gl->gl_spin); | ||
133 | } | 155 | } |
134 | 156 | ||
135 | /** | 157 | /** |
@@ -430,33 +452,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | |||
430 | } | 452 | } |
431 | 453 | ||
432 | /** | 454 | /** |
433 | * rgrp_go_lock - operation done after an rgrp lock is locked by | ||
434 | * a first holder on this node. | ||
435 | * @gl: the glock | ||
436 | * @flags: | ||
437 | * | ||
438 | * Returns: errno | ||
439 | */ | ||
440 | |||
441 | static int rgrp_go_lock(struct gfs2_holder *gh) | ||
442 | { | ||
443 | return gfs2_rgrp_bh_get(gh->gh_gl->gl_object); | ||
444 | } | ||
445 | |||
446 | /** | ||
447 | * rgrp_go_unlock - operation done before an rgrp lock is unlocked by | ||
448 | * a last holder on this node. | ||
449 | * @gl: the glock | ||
450 | * @flags: | ||
451 | * | ||
452 | */ | ||
453 | |||
454 | static void rgrp_go_unlock(struct gfs2_holder *gh) | ||
455 | { | ||
456 | gfs2_rgrp_bh_put(gh->gh_gl->gl_object); | ||
457 | } | ||
458 | |||
459 | /** | ||
460 | * trans_go_sync - promote/demote the transaction glock | 455 | * trans_go_sync - promote/demote the transaction glock |
461 | * @gl: the glock | 456 | * @gl: the glock |
462 | * @state: the requested state | 457 | * @state: the requested state |
@@ -558,8 +553,8 @@ const struct gfs2_glock_operations gfs2_inode_glops = { | |||
558 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 553 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
559 | .go_xmote_th = rgrp_go_sync, | 554 | .go_xmote_th = rgrp_go_sync, |
560 | .go_inval = rgrp_go_inval, | 555 | .go_inval = rgrp_go_inval, |
561 | .go_lock = rgrp_go_lock, | 556 | .go_lock = gfs2_rgrp_go_lock, |
562 | .go_unlock = rgrp_go_unlock, | 557 | .go_unlock = gfs2_rgrp_go_unlock, |
563 | .go_dump = gfs2_rgrp_dump, | 558 | .go_dump = gfs2_rgrp_dump, |
564 | .go_type = LM_TYPE_RGRP, | 559 | .go_type = LM_TYPE_RGRP, |
565 | .go_flags = GLOF_ASPACE, | 560 | .go_flags = GLOF_ASPACE, |
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index 6fce409b5a50..bf95a2dc1662 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h | |||
@@ -23,6 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops; | |||
23 | extern const struct gfs2_glock_operations gfs2_journal_glops; | 23 | extern const struct gfs2_glock_operations gfs2_journal_glops; |
24 | extern const struct gfs2_glock_operations *gfs2_glops_list[]; | 24 | extern const struct gfs2_glock_operations *gfs2_glops_list[]; |
25 | 25 | ||
26 | extern void gfs2_ail_flush(struct gfs2_glock *gl); | 26 | extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync); |
27 | 27 | ||
28 | #endif /* __GLOPS_DOT_H__ */ | 28 | #endif /* __GLOPS_DOT_H__ */ |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 892ac37de8ae..7389dfdcc9ef 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
19 | #include <linux/rculist_bl.h> | 19 | #include <linux/rculist_bl.h> |
20 | #include <linux/completion.h> | 20 | #include <linux/completion.h> |
21 | #include <linux/rbtree.h> | ||
21 | 22 | ||
22 | #define DIO_WAIT 0x00000010 | 23 | #define DIO_WAIT 0x00000010 |
23 | #define DIO_METADATA 0x00000020 | 24 | #define DIO_METADATA 0x00000020 |
@@ -78,8 +79,7 @@ struct gfs2_bitmap { | |||
78 | }; | 79 | }; |
79 | 80 | ||
80 | struct gfs2_rgrpd { | 81 | struct gfs2_rgrpd { |
81 | struct list_head rd_list; /* Link with superblock */ | 82 | struct rb_node rd_node; /* Link with superblock */ |
82 | struct list_head rd_list_mru; | ||
83 | struct gfs2_glock *rd_gl; /* Glock for this rgrp */ | 83 | struct gfs2_glock *rd_gl; /* Glock for this rgrp */ |
84 | u64 rd_addr; /* grp block disk address */ | 84 | u64 rd_addr; /* grp block disk address */ |
85 | u64 rd_data0; /* first data location */ | 85 | u64 rd_data0; /* first data location */ |
@@ -91,10 +91,7 @@ struct gfs2_rgrpd { | |||
91 | u32 rd_dinodes; | 91 | u32 rd_dinodes; |
92 | u64 rd_igeneration; | 92 | u64 rd_igeneration; |
93 | struct gfs2_bitmap *rd_bits; | 93 | struct gfs2_bitmap *rd_bits; |
94 | struct mutex rd_mutex; | ||
95 | struct gfs2_log_element rd_le; | ||
96 | struct gfs2_sbd *rd_sbd; | 94 | struct gfs2_sbd *rd_sbd; |
97 | unsigned int rd_bh_count; | ||
98 | u32 rd_last_alloc; | 95 | u32 rd_last_alloc; |
99 | u32 rd_flags; | 96 | u32 rd_flags; |
100 | #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ | 97 | #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ |
@@ -106,12 +103,15 @@ struct gfs2_rgrpd { | |||
106 | enum gfs2_state_bits { | 103 | enum gfs2_state_bits { |
107 | BH_Pinned = BH_PrivateStart, | 104 | BH_Pinned = BH_PrivateStart, |
108 | BH_Escaped = BH_PrivateStart + 1, | 105 | BH_Escaped = BH_PrivateStart + 1, |
106 | BH_Zeronew = BH_PrivateStart + 2, | ||
109 | }; | 107 | }; |
110 | 108 | ||
111 | BUFFER_FNS(Pinned, pinned) | 109 | BUFFER_FNS(Pinned, pinned) |
112 | TAS_BUFFER_FNS(Pinned, pinned) | 110 | TAS_BUFFER_FNS(Pinned, pinned) |
113 | BUFFER_FNS(Escaped, escaped) | 111 | BUFFER_FNS(Escaped, escaped) |
114 | TAS_BUFFER_FNS(Escaped, escaped) | 112 | TAS_BUFFER_FNS(Escaped, escaped) |
113 | BUFFER_FNS(Zeronew, zeronew) | ||
114 | TAS_BUFFER_FNS(Zeronew, zeronew) | ||
115 | 115 | ||
116 | struct gfs2_bufdata { | 116 | struct gfs2_bufdata { |
117 | struct buffer_head *bd_bh; | 117 | struct buffer_head *bd_bh; |
@@ -246,7 +246,6 @@ struct gfs2_glock { | |||
246 | 246 | ||
247 | struct gfs2_alloc { | 247 | struct gfs2_alloc { |
248 | /* Quota stuff */ | 248 | /* Quota stuff */ |
249 | |||
250 | struct gfs2_quota_data *al_qd[2*MAXQUOTAS]; | 249 | struct gfs2_quota_data *al_qd[2*MAXQUOTAS]; |
251 | struct gfs2_holder al_qd_ghs[2*MAXQUOTAS]; | 250 | struct gfs2_holder al_qd_ghs[2*MAXQUOTAS]; |
252 | unsigned int al_qd_num; | 251 | unsigned int al_qd_num; |
@@ -255,18 +254,13 @@ struct gfs2_alloc { | |||
255 | u32 al_alloced; /* Filled in by gfs2_alloc_*() */ | 254 | u32 al_alloced; /* Filled in by gfs2_alloc_*() */ |
256 | 255 | ||
257 | /* Filled in by gfs2_inplace_reserve() */ | 256 | /* Filled in by gfs2_inplace_reserve() */ |
258 | |||
259 | unsigned int al_line; | ||
260 | char *al_file; | ||
261 | struct gfs2_holder al_ri_gh; | ||
262 | struct gfs2_holder al_rgd_gh; | 257 | struct gfs2_holder al_rgd_gh; |
263 | struct gfs2_rgrpd *al_rgd; | ||
264 | |||
265 | }; | 258 | }; |
266 | 259 | ||
267 | enum { | 260 | enum { |
268 | GIF_INVALID = 0, | 261 | GIF_INVALID = 0, |
269 | GIF_QD_LOCKED = 1, | 262 | GIF_QD_LOCKED = 1, |
263 | GIF_ALLOC_FAILED = 2, | ||
270 | GIF_SW_PAGED = 3, | 264 | GIF_SW_PAGED = 3, |
271 | }; | 265 | }; |
272 | 266 | ||
@@ -282,6 +276,7 @@ struct gfs2_inode { | |||
282 | struct gfs2_holder i_iopen_gh; | 276 | struct gfs2_holder i_iopen_gh; |
283 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ | 277 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ |
284 | struct gfs2_alloc *i_alloc; | 278 | struct gfs2_alloc *i_alloc; |
279 | struct gfs2_rgrpd *i_rgd; | ||
285 | u64 i_goal; /* goal block for allocations */ | 280 | u64 i_goal; /* goal block for allocations */ |
286 | struct rw_semaphore i_rw_mutex; | 281 | struct rw_semaphore i_rw_mutex; |
287 | struct list_head i_trunc_list; | 282 | struct list_head i_trunc_list; |
@@ -574,9 +569,7 @@ struct gfs2_sbd { | |||
574 | int sd_rindex_uptodate; | 569 | int sd_rindex_uptodate; |
575 | spinlock_t sd_rindex_spin; | 570 | spinlock_t sd_rindex_spin; |
576 | struct mutex sd_rindex_mutex; | 571 | struct mutex sd_rindex_mutex; |
577 | struct list_head sd_rindex_list; | 572 | struct rb_root sd_rindex_tree; |
578 | struct list_head sd_rindex_mru_list; | ||
579 | struct gfs2_rgrpd *sd_rindex_forward; | ||
580 | unsigned int sd_rgrps; | 573 | unsigned int sd_rgrps; |
581 | unsigned int sd_max_rg_data; | 574 | unsigned int sd_max_rg_data; |
582 | 575 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 900cf986aadc..cfd4959b218c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -583,7 +583,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
583 | goto fail_quota_locks; | 583 | goto fail_quota_locks; |
584 | 584 | ||
585 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 585 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
586 | al->al_rgd->rd_length + | 586 | dip->i_rgd->rd_length + |
587 | 2 * RES_DINODE + | 587 | 2 * RES_DINODE + |
588 | RES_STATFS + RES_QUOTA, 0); | 588 | RES_STATFS + RES_QUOTA, 0); |
589 | if (error) | 589 | if (error) |
@@ -613,8 +613,7 @@ fail_end_trans: | |||
613 | gfs2_trans_end(sdp); | 613 | gfs2_trans_end(sdp); |
614 | 614 | ||
615 | fail_ipreserv: | 615 | fail_ipreserv: |
616 | if (dip->i_alloc->al_rgd) | 616 | gfs2_inplace_release(dip); |
617 | gfs2_inplace_release(dip); | ||
618 | 617 | ||
619 | fail_quota_locks: | 618 | fail_quota_locks: |
620 | gfs2_quota_unlock(dip); | 619 | gfs2_quota_unlock(dip); |
@@ -624,31 +623,29 @@ fail: | |||
624 | return error; | 623 | return error; |
625 | } | 624 | } |
626 | 625 | ||
627 | static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, | 626 | int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
628 | const struct qstr *qstr) | 627 | void *fs_info) |
629 | { | 628 | { |
630 | int err; | 629 | const struct xattr *xattr; |
631 | size_t len; | 630 | int err = 0; |
632 | void *value; | 631 | |
633 | char *name; | 632 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
634 | 633 | err = __gfs2_xattr_set(inode, xattr->name, xattr->value, | |
635 | err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, | 634 | xattr->value_len, 0, |
636 | &name, &value, &len); | 635 | GFS2_EATYPE_SECURITY); |
637 | 636 | if (err < 0) | |
638 | if (err) { | 637 | break; |
639 | if (err == -EOPNOTSUPP) | ||
640 | return 0; | ||
641 | return err; | ||
642 | } | 638 | } |
643 | |||
644 | err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0, | ||
645 | GFS2_EATYPE_SECURITY); | ||
646 | kfree(value); | ||
647 | kfree(name); | ||
648 | |||
649 | return err; | 639 | return err; |
650 | } | 640 | } |
651 | 641 | ||
642 | static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, | ||
643 | const struct qstr *qstr) | ||
644 | { | ||
645 | return security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, | ||
646 | &gfs2_initxattrs, NULL); | ||
647 | } | ||
648 | |||
652 | /** | 649 | /** |
653 | * gfs2_create_inode - Create a new inode | 650 | * gfs2_create_inode - Create a new inode |
654 | * @dir: The parent directory | 651 | * @dir: The parent directory |
@@ -663,7 +660,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, | |||
663 | 660 | ||
664 | static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | 661 | static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, |
665 | unsigned int mode, dev_t dev, const char *symname, | 662 | unsigned int mode, dev_t dev, const char *symname, |
666 | unsigned int size) | 663 | unsigned int size, int excl) |
667 | { | 664 | { |
668 | const struct qstr *name = &dentry->d_name; | 665 | const struct qstr *name = &dentry->d_name; |
669 | struct gfs2_holder ghs[2]; | 666 | struct gfs2_holder ghs[2]; |
@@ -683,6 +680,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
683 | goto fail; | 680 | goto fail; |
684 | 681 | ||
685 | error = create_ok(dip, name, mode); | 682 | error = create_ok(dip, name, mode); |
683 | if ((error == -EEXIST) && S_ISREG(mode) && !excl) { | ||
684 | inode = gfs2_lookupi(dir, &dentry->d_name, 0); | ||
685 | gfs2_glock_dq_uninit(ghs); | ||
686 | d_instantiate(dentry, inode); | ||
687 | return IS_ERR(inode) ? PTR_ERR(inode) : 0; | ||
688 | } | ||
686 | if (error) | 689 | if (error) |
687 | goto fail_gunlock; | 690 | goto fail_gunlock; |
688 | 691 | ||
@@ -725,21 +728,22 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
725 | brelse(bh); | 728 | brelse(bh); |
726 | 729 | ||
727 | gfs2_trans_end(sdp); | 730 | gfs2_trans_end(sdp); |
728 | if (dip->i_alloc->al_rgd) | 731 | gfs2_inplace_release(dip); |
729 | gfs2_inplace_release(dip); | ||
730 | gfs2_quota_unlock(dip); | 732 | gfs2_quota_unlock(dip); |
731 | gfs2_alloc_put(dip); | 733 | gfs2_alloc_put(dip); |
732 | gfs2_glock_dq_uninit_m(2, ghs); | ||
733 | mark_inode_dirty(inode); | 734 | mark_inode_dirty(inode); |
735 | gfs2_glock_dq_uninit_m(2, ghs); | ||
734 | d_instantiate(dentry, inode); | 736 | d_instantiate(dentry, inode); |
735 | return 0; | 737 | return 0; |
736 | 738 | ||
737 | fail_gunlock2: | 739 | fail_gunlock2: |
738 | gfs2_glock_dq_uninit(ghs + 1); | 740 | gfs2_glock_dq_uninit(ghs + 1); |
739 | if (inode && !IS_ERR(inode)) | ||
740 | iput(inode); | ||
741 | fail_gunlock: | 741 | fail_gunlock: |
742 | gfs2_glock_dq_uninit(ghs); | 742 | gfs2_glock_dq_uninit(ghs); |
743 | if (inode && !IS_ERR(inode)) { | ||
744 | set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags); | ||
745 | iput(inode); | ||
746 | } | ||
743 | fail: | 747 | fail: |
744 | if (bh) | 748 | if (bh) |
745 | brelse(bh); | 749 | brelse(bh); |
@@ -758,24 +762,10 @@ fail: | |||
758 | static int gfs2_create(struct inode *dir, struct dentry *dentry, | 762 | static int gfs2_create(struct inode *dir, struct dentry *dentry, |
759 | int mode, struct nameidata *nd) | 763 | int mode, struct nameidata *nd) |
760 | { | 764 | { |
761 | struct inode *inode; | 765 | int excl = 0; |
762 | int ret; | 766 | if (nd && (nd->flags & LOOKUP_EXCL)) |
763 | 767 | excl = 1; | |
764 | for (;;) { | 768 | return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl); |
765 | ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0); | ||
766 | if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL))) | ||
767 | return ret; | ||
768 | |||
769 | inode = gfs2_lookupi(dir, &dentry->d_name, 0); | ||
770 | if (inode) { | ||
771 | if (!IS_ERR(inode)) | ||
772 | break; | ||
773 | return PTR_ERR(inode); | ||
774 | } | ||
775 | } | ||
776 | |||
777 | d_instantiate(dentry, inode); | ||
778 | return 0; | ||
779 | } | 769 | } |
780 | 770 | ||
781 | /** | 771 | /** |
@@ -902,7 +892,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
902 | goto out_gunlock_q; | 892 | goto out_gunlock_q; |
903 | 893 | ||
904 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 894 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
905 | gfs2_rg_blocks(al) + | 895 | gfs2_rg_blocks(dip) + |
906 | 2 * RES_DINODE + RES_STATFS + | 896 | 2 * RES_DINODE + RES_STATFS + |
907 | RES_QUOTA, 0); | 897 | RES_QUOTA, 0); |
908 | if (error) | 898 | if (error) |
@@ -924,8 +914,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
924 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 914 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
925 | inc_nlink(&ip->i_inode); | 915 | inc_nlink(&ip->i_inode); |
926 | ip->i_inode.i_ctime = CURRENT_TIME; | 916 | ip->i_inode.i_ctime = CURRENT_TIME; |
927 | gfs2_dinode_out(ip, dibh->b_data); | 917 | ihold(inode); |
928 | mark_inode_dirty(&ip->i_inode); | 918 | d_instantiate(dentry, inode); |
919 | mark_inode_dirty(inode); | ||
929 | 920 | ||
930 | out_brelse: | 921 | out_brelse: |
931 | brelse(dibh); | 922 | brelse(dibh); |
@@ -947,11 +938,6 @@ out_child: | |||
947 | out_parent: | 938 | out_parent: |
948 | gfs2_holder_uninit(ghs); | 939 | gfs2_holder_uninit(ghs); |
949 | gfs2_holder_uninit(ghs + 1); | 940 | gfs2_holder_uninit(ghs + 1); |
950 | if (!error) { | ||
951 | ihold(inode); | ||
952 | d_instantiate(dentry, inode); | ||
953 | mark_inode_dirty(inode); | ||
954 | } | ||
955 | return error; | 941 | return error; |
956 | } | 942 | } |
957 | 943 | ||
@@ -1024,8 +1010,6 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip, | |||
1024 | clear_nlink(inode); | 1010 | clear_nlink(inode); |
1025 | else | 1011 | else |
1026 | drop_nlink(inode); | 1012 | drop_nlink(inode); |
1027 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
1028 | gfs2_dinode_out(ip, bh->b_data); | ||
1029 | mark_inode_dirty(inode); | 1013 | mark_inode_dirty(inode); |
1030 | if (inode->i_nlink == 0) | 1014 | if (inode->i_nlink == 0) |
1031 | gfs2_unlink_di(inode); | 1015 | gfs2_unlink_di(inode); |
@@ -1053,13 +1037,8 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
1053 | struct buffer_head *bh; | 1037 | struct buffer_head *bh; |
1054 | struct gfs2_holder ghs[3]; | 1038 | struct gfs2_holder ghs[3]; |
1055 | struct gfs2_rgrpd *rgd; | 1039 | struct gfs2_rgrpd *rgd; |
1056 | struct gfs2_holder ri_gh; | ||
1057 | int error; | 1040 | int error; |
1058 | 1041 | ||
1059 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
1060 | if (error) | ||
1061 | return error; | ||
1062 | |||
1063 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 1042 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
1064 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 1043 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
1065 | 1044 | ||
@@ -1116,7 +1095,6 @@ out_child: | |||
1116 | gfs2_glock_dq(ghs); | 1095 | gfs2_glock_dq(ghs); |
1117 | out_parent: | 1096 | out_parent: |
1118 | gfs2_holder_uninit(ghs); | 1097 | gfs2_holder_uninit(ghs); |
1119 | gfs2_glock_dq_uninit(&ri_gh); | ||
1120 | return error; | 1098 | return error; |
1121 | } | 1099 | } |
1122 | 1100 | ||
@@ -1139,7 +1117,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
1139 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) | 1117 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) |
1140 | return -ENAMETOOLONG; | 1118 | return -ENAMETOOLONG; |
1141 | 1119 | ||
1142 | return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size); | 1120 | return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size, 0); |
1143 | } | 1121 | } |
1144 | 1122 | ||
1145 | /** | 1123 | /** |
@@ -1153,7 +1131,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
1153 | 1131 | ||
1154 | static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | 1132 | static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
1155 | { | 1133 | { |
1156 | return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0); | 1134 | return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0); |
1157 | } | 1135 | } |
1158 | 1136 | ||
1159 | /** | 1137 | /** |
@@ -1168,7 +1146,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1168 | static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, | 1146 | static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, |
1169 | dev_t dev) | 1147 | dev_t dev) |
1170 | { | 1148 | { |
1171 | return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0); | 1149 | return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0); |
1172 | } | 1150 | } |
1173 | 1151 | ||
1174 | /* | 1152 | /* |
@@ -1234,7 +1212,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1234 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 1212 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
1235 | struct gfs2_inode *nip = NULL; | 1213 | struct gfs2_inode *nip = NULL; |
1236 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 1214 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
1237 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh; | 1215 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; |
1238 | struct gfs2_rgrpd *nrgd; | 1216 | struct gfs2_rgrpd *nrgd; |
1239 | unsigned int num_gh; | 1217 | unsigned int num_gh; |
1240 | int dir_rename = 0; | 1218 | int dir_rename = 0; |
@@ -1248,10 +1226,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1248 | return 0; | 1226 | return 0; |
1249 | } | 1227 | } |
1250 | 1228 | ||
1251 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
1252 | if (error) | ||
1253 | return error; | ||
1254 | |||
1255 | if (odip != ndip) { | 1229 | if (odip != ndip) { |
1256 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, | 1230 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
1257 | 0, &r_gh); | 1231 | 0, &r_gh); |
@@ -1388,12 +1362,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1388 | 1362 | ||
1389 | al->al_requested = sdp->sd_max_dirres; | 1363 | al->al_requested = sdp->sd_max_dirres; |
1390 | 1364 | ||
1391 | error = gfs2_inplace_reserve_ri(ndip); | 1365 | error = gfs2_inplace_reserve(ndip); |
1392 | if (error) | 1366 | if (error) |
1393 | goto out_gunlock_q; | 1367 | goto out_gunlock_q; |
1394 | 1368 | ||
1395 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 1369 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
1396 | gfs2_rg_blocks(al) + | 1370 | gfs2_rg_blocks(ndip) + |
1397 | 4 * RES_DINODE + 4 * RES_LEAF + | 1371 | 4 * RES_DINODE + 4 * RES_LEAF + |
1398 | RES_STATFS + RES_QUOTA + 4, 0); | 1372 | RES_STATFS + RES_QUOTA + 4, 0); |
1399 | if (error) | 1373 | if (error) |
@@ -1459,7 +1433,6 @@ out_gunlock_r: | |||
1459 | if (r_gh.gh_gl) | 1433 | if (r_gh.gh_gl) |
1460 | gfs2_glock_dq_uninit(&r_gh); | 1434 | gfs2_glock_dq_uninit(&r_gh); |
1461 | out: | 1435 | out: |
1462 | gfs2_glock_dq_uninit(&ri_gh); | ||
1463 | return error; | 1436 | return error; |
1464 | } | 1437 | } |
1465 | 1438 | ||
@@ -1563,21 +1536,10 @@ int gfs2_permission(struct inode *inode, int mask) | |||
1563 | return error; | 1536 | return error; |
1564 | } | 1537 | } |
1565 | 1538 | ||
1566 | static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | 1539 | static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr) |
1567 | { | 1540 | { |
1568 | struct inode *inode = &ip->i_inode; | ||
1569 | struct buffer_head *dibh; | ||
1570 | int error; | ||
1571 | |||
1572 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1573 | if (error) | ||
1574 | return error; | ||
1575 | |||
1576 | setattr_copy(inode, attr); | 1541 | setattr_copy(inode, attr); |
1577 | mark_inode_dirty(inode); | 1542 | mark_inode_dirty(inode); |
1578 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1579 | gfs2_dinode_out(ip, dibh->b_data); | ||
1580 | brelse(dibh); | ||
1581 | return 0; | 1543 | return 0; |
1582 | } | 1544 | } |
1583 | 1545 | ||
@@ -1589,19 +1551,19 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | |||
1589 | * Returns: errno | 1551 | * Returns: errno |
1590 | */ | 1552 | */ |
1591 | 1553 | ||
1592 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | 1554 | int gfs2_setattr_simple(struct inode *inode, struct iattr *attr) |
1593 | { | 1555 | { |
1594 | int error; | 1556 | int error; |
1595 | 1557 | ||
1596 | if (current->journal_info) | 1558 | if (current->journal_info) |
1597 | return __gfs2_setattr_simple(ip, attr); | 1559 | return __gfs2_setattr_simple(inode, attr); |
1598 | 1560 | ||
1599 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0); | 1561 | error = gfs2_trans_begin(GFS2_SB(inode), RES_DINODE, 0); |
1600 | if (error) | 1562 | if (error) |
1601 | return error; | 1563 | return error; |
1602 | 1564 | ||
1603 | error = __gfs2_setattr_simple(ip, attr); | 1565 | error = __gfs2_setattr_simple(inode, attr); |
1604 | gfs2_trans_end(GFS2_SB(&ip->i_inode)); | 1566 | gfs2_trans_end(GFS2_SB(inode)); |
1605 | return error; | 1567 | return error; |
1606 | } | 1568 | } |
1607 | 1569 | ||
@@ -1639,7 +1601,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
1639 | if (error) | 1601 | if (error) |
1640 | goto out_gunlock_q; | 1602 | goto out_gunlock_q; |
1641 | 1603 | ||
1642 | error = gfs2_setattr_simple(ip, attr); | 1604 | error = gfs2_setattr_simple(inode, attr); |
1643 | if (error) | 1605 | if (error) |
1644 | goto out_end_trans; | 1606 | goto out_end_trans; |
1645 | 1607 | ||
@@ -1695,12 +1657,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1695 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) | 1657 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) |
1696 | error = gfs2_acl_chmod(ip, attr); | 1658 | error = gfs2_acl_chmod(ip, attr); |
1697 | else | 1659 | else |
1698 | error = gfs2_setattr_simple(ip, attr); | 1660 | error = gfs2_setattr_simple(inode, attr); |
1699 | 1661 | ||
1700 | out: | 1662 | out: |
1701 | gfs2_glock_dq_uninit(&i_gh); | ||
1702 | if (!error) | 1663 | if (!error) |
1703 | mark_inode_dirty(inode); | 1664 | mark_inode_dirty(inode); |
1665 | gfs2_glock_dq_uninit(&i_gh); | ||
1704 | return error; | 1666 | return error; |
1705 | } | 1667 | } |
1706 | 1668 | ||
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 8d90e0c07672..276e7b52b658 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -109,7 +109,7 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip); | |||
109 | extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | 109 | extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, |
110 | int is_root); | 110 | int is_root); |
111 | extern int gfs2_permission(struct inode *inode, int mask); | 111 | extern int gfs2_permission(struct inode *inode, int mask); |
112 | extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); | 112 | extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr); |
113 | extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | 113 | extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); |
114 | extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); | 114 | extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); |
115 | 115 | ||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 85c62923ee29..598646434362 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -624,9 +624,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
624 | bh->b_end_io = end_buffer_write_sync; | 624 | bh->b_end_io = end_buffer_write_sync; |
625 | get_bh(bh); | 625 | get_bh(bh); |
626 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) | 626 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
627 | submit_bh(WRITE_SYNC | REQ_META, bh); | 627 | submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); |
628 | else | 628 | else |
629 | submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); | 629 | submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh); |
630 | wait_on_buffer(bh); | 630 | wait_on_buffer(bh); |
631 | 631 | ||
632 | if (!buffer_uptodate(bh)) | 632 | if (!buffer_uptodate(bh)) |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 05bbb124699f..0301be655b12 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -60,6 +60,29 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) | |||
60 | trace_gfs2_pin(bd, 1); | 60 | trace_gfs2_pin(bd, 1); |
61 | } | 61 | } |
62 | 62 | ||
63 | static bool buffer_is_rgrp(const struct gfs2_bufdata *bd) | ||
64 | { | ||
65 | return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP; | ||
66 | } | ||
67 | |||
68 | static void maybe_release_space(struct gfs2_bufdata *bd) | ||
69 | { | ||
70 | struct gfs2_glock *gl = bd->bd_gl; | ||
71 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
72 | struct gfs2_rgrpd *rgd = gl->gl_object; | ||
73 | unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; | ||
74 | struct gfs2_bitmap *bi = rgd->rd_bits + index; | ||
75 | |||
76 | if (bi->bi_clone == 0) | ||
77 | return; | ||
78 | if (sdp->sd_args.ar_discard) | ||
79 | gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi); | ||
80 | memcpy(bi->bi_clone + bi->bi_offset, | ||
81 | bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); | ||
82 | clear_bit(GBF_FULL, &bi->bi_flags); | ||
83 | rgd->rd_free_clone = rgd->rd_free; | ||
84 | } | ||
85 | |||
63 | /** | 86 | /** |
64 | * gfs2_unpin - Unpin a buffer | 87 | * gfs2_unpin - Unpin a buffer |
65 | * @sdp: the filesystem the buffer belongs to | 88 | * @sdp: the filesystem the buffer belongs to |
@@ -81,6 +104,9 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
81 | mark_buffer_dirty(bh); | 104 | mark_buffer_dirty(bh); |
82 | clear_buffer_pinned(bh); | 105 | clear_buffer_pinned(bh); |
83 | 106 | ||
107 | if (buffer_is_rgrp(bd)) | ||
108 | maybe_release_space(bd); | ||
109 | |||
84 | spin_lock(&sdp->sd_ail_lock); | 110 | spin_lock(&sdp->sd_ail_lock); |
85 | if (bd->bd_ail) { | 111 | if (bd->bd_ail) { |
86 | list_del(&bd->bd_ail_st_list); | 112 | list_del(&bd->bd_ail_st_list); |
@@ -469,42 +495,6 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | |||
469 | gfs2_revoke_clean(sdp); | 495 | gfs2_revoke_clean(sdp); |
470 | } | 496 | } |
471 | 497 | ||
472 | static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
473 | { | ||
474 | struct gfs2_rgrpd *rgd; | ||
475 | struct gfs2_trans *tr = current->journal_info; | ||
476 | |||
477 | tr->tr_touched = 1; | ||
478 | |||
479 | rgd = container_of(le, struct gfs2_rgrpd, rd_le); | ||
480 | |||
481 | gfs2_log_lock(sdp); | ||
482 | if (!list_empty(&le->le_list)){ | ||
483 | gfs2_log_unlock(sdp); | ||
484 | return; | ||
485 | } | ||
486 | gfs2_rgrp_bh_hold(rgd); | ||
487 | sdp->sd_log_num_rg++; | ||
488 | list_add(&le->le_list, &sdp->sd_log_le_rg); | ||
489 | gfs2_log_unlock(sdp); | ||
490 | } | ||
491 | |||
492 | static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
493 | { | ||
494 | struct list_head *head = &sdp->sd_log_le_rg; | ||
495 | struct gfs2_rgrpd *rgd; | ||
496 | |||
497 | while (!list_empty(head)) { | ||
498 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list); | ||
499 | list_del_init(&rgd->rd_le.le_list); | ||
500 | sdp->sd_log_num_rg--; | ||
501 | |||
502 | gfs2_rgrp_repolish_clones(rgd); | ||
503 | gfs2_rgrp_bh_put(rgd); | ||
504 | } | ||
505 | gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); | ||
506 | } | ||
507 | |||
508 | /** | 498 | /** |
509 | * databuf_lo_add - Add a databuf to the transaction. | 499 | * databuf_lo_add - Add a databuf to the transaction. |
510 | * | 500 | * |
@@ -705,8 +695,6 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
705 | 695 | ||
706 | brelse(bh_log); | 696 | brelse(bh_log); |
707 | brelse(bh_ip); | 697 | brelse(bh_ip); |
708 | if (error) | ||
709 | break; | ||
710 | 698 | ||
711 | sdp->sd_replayed_blocks++; | 699 | sdp->sd_replayed_blocks++; |
712 | } | 700 | } |
@@ -771,8 +759,6 @@ const struct gfs2_log_operations gfs2_revoke_lops = { | |||
771 | }; | 759 | }; |
772 | 760 | ||
773 | const struct gfs2_log_operations gfs2_rg_lops = { | 761 | const struct gfs2_log_operations gfs2_rg_lops = { |
774 | .lo_add = rg_lo_add, | ||
775 | .lo_after_commit = rg_lo_after_commit, | ||
776 | .lo_name = "rg", | 762 | .lo_name = "rg", |
777 | }; | 763 | }; |
778 | 764 | ||
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 747238cd9f96..be29858900f6 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb | |||
37 | { | 37 | { |
38 | struct buffer_head *bh, *head; | 38 | struct buffer_head *bh, *head; |
39 | int nr_underway = 0; | 39 | int nr_underway = 0; |
40 | int write_op = REQ_META | | 40 | int write_op = REQ_META | REQ_PRIO | |
41 | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | 41 | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); |
42 | 42 | ||
43 | BUG_ON(!PageLocked(page)); | 43 | BUG_ON(!PageLocked(page)); |
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, | |||
225 | } | 225 | } |
226 | bh->b_end_io = end_buffer_read_sync; | 226 | bh->b_end_io = end_buffer_read_sync; |
227 | get_bh(bh); | 227 | get_bh(bh); |
228 | submit_bh(READ_SYNC | REQ_META, bh); | 228 | submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh); |
229 | if (!(flags & DIO_WAIT)) | 229 | if (!(flags & DIO_WAIT)) |
230 | return 0; | 230 | return 0; |
231 | 231 | ||
@@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) | |||
435 | if (buffer_uptodate(first_bh)) | 435 | if (buffer_uptodate(first_bh)) |
436 | goto out; | 436 | goto out; |
437 | if (!buffer_locked(first_bh)) | 437 | if (!buffer_locked(first_bh)) |
438 | ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); | 438 | ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh); |
439 | 439 | ||
440 | dblock++; | 440 | dblock++; |
441 | extlen--; | 441 | extlen--; |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3bc073a4cf82..7e823bbd2453 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -77,8 +77,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
77 | 77 | ||
78 | spin_lock_init(&sdp->sd_rindex_spin); | 78 | spin_lock_init(&sdp->sd_rindex_spin); |
79 | mutex_init(&sdp->sd_rindex_mutex); | 79 | mutex_init(&sdp->sd_rindex_mutex); |
80 | INIT_LIST_HEAD(&sdp->sd_rindex_list); | 80 | sdp->sd_rindex_tree.rb_node = NULL; |
81 | INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); | ||
82 | 81 | ||
83 | INIT_LIST_HEAD(&sdp->sd_jindex_list); | 82 | INIT_LIST_HEAD(&sdp->sd_jindex_list); |
84 | spin_lock_init(&sdp->sd_jindex_spin); | 83 | spin_lock_init(&sdp->sd_jindex_spin); |
@@ -224,7 +223,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) | |||
224 | 223 | ||
225 | bio->bi_end_io = end_bio_io_page; | 224 | bio->bi_end_io = end_bio_io_page; |
226 | bio->bi_private = page; | 225 | bio->bi_private = page; |
227 | submit_bio(READ_SYNC | REQ_META, bio); | 226 | submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio); |
228 | wait_on_page_locked(page); | 227 | wait_on_page_locked(page); |
229 | bio_put(bio); | 228 | bio_put(bio); |
230 | if (!PageUptodate(page)) { | 229 | if (!PageUptodate(page)) { |
@@ -652,7 +651,6 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
652 | fs_err(sdp, "can't lookup journal index: %d\n", error); | 651 | fs_err(sdp, "can't lookup journal index: %d\n", error); |
653 | return PTR_ERR(sdp->sd_jindex); | 652 | return PTR_ERR(sdp->sd_jindex); |
654 | } | 653 | } |
655 | ip = GFS2_I(sdp->sd_jindex); | ||
656 | 654 | ||
657 | /* Load in the journal index special file */ | 655 | /* Load in the journal index special file */ |
658 | 656 | ||
@@ -764,7 +762,6 @@ fail: | |||
764 | static int init_inodes(struct gfs2_sbd *sdp, int undo) | 762 | static int init_inodes(struct gfs2_sbd *sdp, int undo) |
765 | { | 763 | { |
766 | int error = 0; | 764 | int error = 0; |
767 | struct gfs2_inode *ip; | ||
768 | struct inode *master = sdp->sd_master_dir->d_inode; | 765 | struct inode *master = sdp->sd_master_dir->d_inode; |
769 | 766 | ||
770 | if (undo) | 767 | if (undo) |
@@ -789,7 +786,6 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
789 | fs_err(sdp, "can't get resource index inode: %d\n", error); | 786 | fs_err(sdp, "can't get resource index inode: %d\n", error); |
790 | goto fail_statfs; | 787 | goto fail_statfs; |
791 | } | 788 | } |
792 | ip = GFS2_I(sdp->sd_rindex); | ||
793 | sdp->sd_rindex_uptodate = 0; | 789 | sdp->sd_rindex_uptodate = 0; |
794 | 790 | ||
795 | /* Read in the quota inode */ | 791 | /* Read in the quota inode */ |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 42e8d23bc047..7e528dc14f85 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -638,15 +638,18 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
638 | unsigned long index = loc >> PAGE_CACHE_SHIFT; | 638 | unsigned long index = loc >> PAGE_CACHE_SHIFT; |
639 | unsigned offset = loc & (PAGE_CACHE_SIZE - 1); | 639 | unsigned offset = loc & (PAGE_CACHE_SIZE - 1); |
640 | unsigned blocksize, iblock, pos; | 640 | unsigned blocksize, iblock, pos; |
641 | struct buffer_head *bh, *dibh; | 641 | struct buffer_head *bh; |
642 | struct page *page; | 642 | struct page *page; |
643 | void *kaddr, *ptr; | 643 | void *kaddr, *ptr; |
644 | struct gfs2_quota q, *qp; | 644 | struct gfs2_quota q, *qp; |
645 | int err, nbytes; | 645 | int err, nbytes; |
646 | u64 size; | 646 | u64 size; |
647 | 647 | ||
648 | if (gfs2_is_stuffed(ip)) | 648 | if (gfs2_is_stuffed(ip)) { |
649 | gfs2_unstuff_dinode(ip, NULL); | 649 | err = gfs2_unstuff_dinode(ip, NULL); |
650 | if (err) | ||
651 | return err; | ||
652 | } | ||
650 | 653 | ||
651 | memset(&q, 0, sizeof(struct gfs2_quota)); | 654 | memset(&q, 0, sizeof(struct gfs2_quota)); |
652 | err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q)); | 655 | err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q)); |
@@ -709,7 +712,7 @@ get_a_page: | |||
709 | set_buffer_uptodate(bh); | 712 | set_buffer_uptodate(bh); |
710 | 713 | ||
711 | if (!buffer_uptodate(bh)) { | 714 | if (!buffer_uptodate(bh)) { |
712 | ll_rw_block(READ_META, 1, &bh); | 715 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
713 | wait_on_buffer(bh); | 716 | wait_on_buffer(bh); |
714 | if (!buffer_uptodate(bh)) | 717 | if (!buffer_uptodate(bh)) |
715 | goto unlock_out; | 718 | goto unlock_out; |
@@ -736,22 +739,13 @@ get_a_page: | |||
736 | goto get_a_page; | 739 | goto get_a_page; |
737 | } | 740 | } |
738 | 741 | ||
739 | /* Update the disk inode timestamp and size (if extended) */ | ||
740 | err = gfs2_meta_inode_buffer(ip, &dibh); | ||
741 | if (err) | ||
742 | goto out; | ||
743 | |||
744 | size = loc + sizeof(struct gfs2_quota); | 742 | size = loc + sizeof(struct gfs2_quota); |
745 | if (size > inode->i_size) | 743 | if (size > inode->i_size) |
746 | i_size_write(inode, size); | 744 | i_size_write(inode, size); |
747 | inode->i_mtime = inode->i_atime = CURRENT_TIME; | 745 | inode->i_mtime = inode->i_atime = CURRENT_TIME; |
748 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
749 | gfs2_dinode_out(ip, dibh->b_data); | ||
750 | brelse(dibh); | ||
751 | mark_inode_dirty(inode); | 746 | mark_inode_dirty(inode); |
752 | |||
753 | out: | ||
754 | return err; | 747 | return err; |
748 | |||
755 | unlock_out: | 749 | unlock_out: |
756 | unlock_page(page); | 750 | unlock_page(page); |
757 | page_cache_release(page); | 751 | page_cache_release(page); |
@@ -822,7 +816,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
822 | goto out_alloc; | 816 | goto out_alloc; |
823 | 817 | ||
824 | if (nalloc) | 818 | if (nalloc) |
825 | blocks += gfs2_rg_blocks(al) + nalloc * ind_blocks + RES_STATFS; | 819 | blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS; |
826 | 820 | ||
827 | error = gfs2_trans_begin(sdp, blocks, 0); | 821 | error = gfs2_trans_begin(sdp, blocks, 0); |
828 | if (error) | 822 | if (error) |
@@ -936,7 +930,9 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) | |||
936 | unsigned int x; | 930 | unsigned int x; |
937 | int error = 0; | 931 | int error = 0; |
938 | 932 | ||
939 | gfs2_quota_hold(ip, uid, gid); | 933 | error = gfs2_quota_hold(ip, uid, gid); |
934 | if (error) | ||
935 | return error; | ||
940 | 936 | ||
941 | if (capable(CAP_SYS_RESOURCE) || | 937 | if (capable(CAP_SYS_RESOURCE) || |
942 | sdp->sd_args.ar_quota != GFS2_QUOTA_ON) | 938 | sdp->sd_args.ar_quota != GFS2_QUOTA_ON) |
@@ -1607,7 +1603,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1607 | error = gfs2_inplace_reserve(ip); | 1603 | error = gfs2_inplace_reserve(ip); |
1608 | if (error) | 1604 | if (error) |
1609 | goto out_alloc; | 1605 | goto out_alloc; |
1610 | blocks += gfs2_rg_blocks(al); | 1606 | blocks += gfs2_rg_blocks(ip); |
1611 | } | 1607 | } |
1612 | 1608 | ||
1613 | /* Some quotas span block boundaries and can update two blocks, | 1609 | /* Some quotas span block boundaries and can update two blocks, |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7f8af1eb02de..96bd6d759f29 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
16 | #include <linux/prefetch.h> | 16 | #include <linux/prefetch.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/rbtree.h> | ||
18 | 19 | ||
19 | #include "gfs2.h" | 20 | #include "gfs2.h" |
20 | #include "incore.h" | 21 | #include "incore.h" |
@@ -328,18 +329,22 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) | |||
328 | 329 | ||
329 | struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) | 330 | struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) |
330 | { | 331 | { |
331 | struct gfs2_rgrpd *rgd; | 332 | struct rb_node **newn; |
333 | struct gfs2_rgrpd *cur; | ||
332 | 334 | ||
333 | spin_lock(&sdp->sd_rindex_spin); | 335 | spin_lock(&sdp->sd_rindex_spin); |
334 | 336 | newn = &sdp->sd_rindex_tree.rb_node; | |
335 | list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) { | 337 | while (*newn) { |
336 | if (rgrp_contains_block(rgd, blk)) { | 338 | cur = rb_entry(*newn, struct gfs2_rgrpd, rd_node); |
337 | list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); | 339 | if (blk < cur->rd_addr) |
340 | newn = &((*newn)->rb_left); | ||
341 | else if (blk >= cur->rd_data0 + cur->rd_data) | ||
342 | newn = &((*newn)->rb_right); | ||
343 | else { | ||
338 | spin_unlock(&sdp->sd_rindex_spin); | 344 | spin_unlock(&sdp->sd_rindex_spin); |
339 | return rgd; | 345 | return cur; |
340 | } | 346 | } |
341 | } | 347 | } |
342 | |||
343 | spin_unlock(&sdp->sd_rindex_spin); | 348 | spin_unlock(&sdp->sd_rindex_spin); |
344 | 349 | ||
345 | return NULL; | 350 | return NULL; |
@@ -354,8 +359,15 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) | |||
354 | 359 | ||
355 | struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) | 360 | struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) |
356 | { | 361 | { |
357 | gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list)); | 362 | const struct rb_node *n; |
358 | return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list); | 363 | struct gfs2_rgrpd *rgd; |
364 | |||
365 | spin_lock(&sdp->sd_rindex_spin); | ||
366 | n = rb_first(&sdp->sd_rindex_tree); | ||
367 | rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); | ||
368 | spin_unlock(&sdp->sd_rindex_spin); | ||
369 | |||
370 | return rgd; | ||
359 | } | 371 | } |
360 | 372 | ||
361 | /** | 373 | /** |
@@ -367,47 +379,60 @@ struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) | |||
367 | 379 | ||
368 | struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) | 380 | struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) |
369 | { | 381 | { |
370 | if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list) | 382 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
383 | const struct rb_node *n; | ||
384 | |||
385 | spin_lock(&sdp->sd_rindex_spin); | ||
386 | n = rb_next(&rgd->rd_node); | ||
387 | if (n == NULL) | ||
388 | n = rb_first(&sdp->sd_rindex_tree); | ||
389 | |||
390 | if (unlikely(&rgd->rd_node == n)) { | ||
391 | spin_unlock(&sdp->sd_rindex_spin); | ||
371 | return NULL; | 392 | return NULL; |
372 | return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list); | 393 | } |
394 | rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); | ||
395 | spin_unlock(&sdp->sd_rindex_spin); | ||
396 | return rgd; | ||
373 | } | 397 | } |
374 | 398 | ||
375 | static void clear_rgrpdi(struct gfs2_sbd *sdp) | 399 | void gfs2_free_clones(struct gfs2_rgrpd *rgd) |
376 | { | 400 | { |
377 | struct list_head *head; | 401 | int x; |
402 | |||
403 | for (x = 0; x < rgd->rd_length; x++) { | ||
404 | struct gfs2_bitmap *bi = rgd->rd_bits + x; | ||
405 | kfree(bi->bi_clone); | ||
406 | bi->bi_clone = NULL; | ||
407 | } | ||
408 | } | ||
409 | |||
410 | void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) | ||
411 | { | ||
412 | struct rb_node *n; | ||
378 | struct gfs2_rgrpd *rgd; | 413 | struct gfs2_rgrpd *rgd; |
379 | struct gfs2_glock *gl; | 414 | struct gfs2_glock *gl; |
380 | 415 | ||
381 | spin_lock(&sdp->sd_rindex_spin); | 416 | while ((n = rb_first(&sdp->sd_rindex_tree))) { |
382 | sdp->sd_rindex_forward = NULL; | 417 | rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); |
383 | spin_unlock(&sdp->sd_rindex_spin); | ||
384 | |||
385 | head = &sdp->sd_rindex_list; | ||
386 | while (!list_empty(head)) { | ||
387 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list); | ||
388 | gl = rgd->rd_gl; | 418 | gl = rgd->rd_gl; |
389 | 419 | ||
390 | list_del(&rgd->rd_list); | 420 | rb_erase(n, &sdp->sd_rindex_tree); |
391 | list_del(&rgd->rd_list_mru); | ||
392 | 421 | ||
393 | if (gl) { | 422 | if (gl) { |
423 | spin_lock(&gl->gl_spin); | ||
394 | gl->gl_object = NULL; | 424 | gl->gl_object = NULL; |
425 | spin_unlock(&gl->gl_spin); | ||
395 | gfs2_glock_add_to_lru(gl); | 426 | gfs2_glock_add_to_lru(gl); |
396 | gfs2_glock_put(gl); | 427 | gfs2_glock_put(gl); |
397 | } | 428 | } |
398 | 429 | ||
430 | gfs2_free_clones(rgd); | ||
399 | kfree(rgd->rd_bits); | 431 | kfree(rgd->rd_bits); |
400 | kmem_cache_free(gfs2_rgrpd_cachep, rgd); | 432 | kmem_cache_free(gfs2_rgrpd_cachep, rgd); |
401 | } | 433 | } |
402 | } | 434 | } |
403 | 435 | ||
404 | void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) | ||
405 | { | ||
406 | mutex_lock(&sdp->sd_rindex_mutex); | ||
407 | clear_rgrpdi(sdp); | ||
408 | mutex_unlock(&sdp->sd_rindex_mutex); | ||
409 | } | ||
410 | |||
411 | static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) | 436 | static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) |
412 | { | 437 | { |
413 | printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); | 438 | printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); |
@@ -524,22 +549,34 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) | |||
524 | return total_data; | 549 | return total_data; |
525 | } | 550 | } |
526 | 551 | ||
527 | static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf) | 552 | static void rgd_insert(struct gfs2_rgrpd *rgd) |
528 | { | 553 | { |
529 | const struct gfs2_rindex *str = buf; | 554 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
555 | struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; | ||
556 | |||
557 | /* Figure out where to put new node */ | ||
558 | while (*newn) { | ||
559 | struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd, | ||
560 | rd_node); | ||
561 | |||
562 | parent = *newn; | ||
563 | if (rgd->rd_addr < cur->rd_addr) | ||
564 | newn = &((*newn)->rb_left); | ||
565 | else if (rgd->rd_addr > cur->rd_addr) | ||
566 | newn = &((*newn)->rb_right); | ||
567 | else | ||
568 | return; | ||
569 | } | ||
530 | 570 | ||
531 | rgd->rd_addr = be64_to_cpu(str->ri_addr); | 571 | rb_link_node(&rgd->rd_node, parent, newn); |
532 | rgd->rd_length = be32_to_cpu(str->ri_length); | 572 | rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); |
533 | rgd->rd_data0 = be64_to_cpu(str->ri_data0); | ||
534 | rgd->rd_data = be32_to_cpu(str->ri_data); | ||
535 | rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes); | ||
536 | } | 573 | } |
537 | 574 | ||
538 | /** | 575 | /** |
539 | * read_rindex_entry - Pull in a new resource index entry from the disk | 576 | * read_rindex_entry - Pull in a new resource index entry from the disk |
540 | * @gl: The glock covering the rindex inode | 577 | * @gl: The glock covering the rindex inode |
541 | * | 578 | * |
542 | * Returns: 0 on success, error code otherwise | 579 | * Returns: 0 on success, > 0 on EOF, error code otherwise |
543 | */ | 580 | */ |
544 | 581 | ||
545 | static int read_rindex_entry(struct gfs2_inode *ip, | 582 | static int read_rindex_entry(struct gfs2_inode *ip, |
@@ -547,44 +584,53 @@ static int read_rindex_entry(struct gfs2_inode *ip, | |||
547 | { | 584 | { |
548 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 585 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
549 | loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); | 586 | loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); |
550 | char buf[sizeof(struct gfs2_rindex)]; | 587 | struct gfs2_rindex buf; |
551 | int error; | 588 | int error; |
552 | struct gfs2_rgrpd *rgd; | 589 | struct gfs2_rgrpd *rgd; |
553 | 590 | ||
554 | error = gfs2_internal_read(ip, ra_state, buf, &pos, | 591 | if (pos >= i_size_read(&ip->i_inode)) |
592 | return 1; | ||
593 | |||
594 | error = gfs2_internal_read(ip, ra_state, (char *)&buf, &pos, | ||
555 | sizeof(struct gfs2_rindex)); | 595 | sizeof(struct gfs2_rindex)); |
556 | if (!error) | 596 | |
557 | return 0; | 597 | if (error != sizeof(struct gfs2_rindex)) |
558 | if (error != sizeof(struct gfs2_rindex)) { | 598 | return (error == 0) ? 1 : error; |
559 | if (error > 0) | ||
560 | error = -EIO; | ||
561 | return error; | ||
562 | } | ||
563 | 599 | ||
564 | rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); | 600 | rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); |
565 | error = -ENOMEM; | 601 | error = -ENOMEM; |
566 | if (!rgd) | 602 | if (!rgd) |
567 | return error; | 603 | return error; |
568 | 604 | ||
569 | mutex_init(&rgd->rd_mutex); | ||
570 | lops_init_le(&rgd->rd_le, &gfs2_rg_lops); | ||
571 | rgd->rd_sbd = sdp; | 605 | rgd->rd_sbd = sdp; |
606 | rgd->rd_addr = be64_to_cpu(buf.ri_addr); | ||
607 | rgd->rd_length = be32_to_cpu(buf.ri_length); | ||
608 | rgd->rd_data0 = be64_to_cpu(buf.ri_data0); | ||
609 | rgd->rd_data = be32_to_cpu(buf.ri_data); | ||
610 | rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); | ||
572 | 611 | ||
573 | list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list); | ||
574 | list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); | ||
575 | |||
576 | gfs2_rindex_in(rgd, buf); | ||
577 | error = compute_bitstructs(rgd); | 612 | error = compute_bitstructs(rgd); |
578 | if (error) | 613 | if (error) |
579 | return error; | 614 | goto fail; |
580 | 615 | ||
581 | error = gfs2_glock_get(sdp, rgd->rd_addr, | 616 | error = gfs2_glock_get(sdp, rgd->rd_addr, |
582 | &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); | 617 | &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); |
583 | if (error) | 618 | if (error) |
584 | return error; | 619 | goto fail; |
585 | 620 | ||
586 | rgd->rd_gl->gl_object = rgd; | 621 | rgd->rd_gl->gl_object = rgd; |
587 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; | 622 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; |
623 | if (rgd->rd_data > sdp->sd_max_rg_data) | ||
624 | sdp->sd_max_rg_data = rgd->rd_data; | ||
625 | spin_lock(&sdp->sd_rindex_spin); | ||
626 | rgd_insert(rgd); | ||
627 | sdp->sd_rgrps++; | ||
628 | spin_unlock(&sdp->sd_rindex_spin); | ||
629 | return error; | ||
630 | |||
631 | fail: | ||
632 | kfree(rgd->rd_bits); | ||
633 | kmem_cache_free(gfs2_rgrpd_cachep, rgd); | ||
588 | return error; | 634 | return error; |
589 | } | 635 | } |
590 | 636 | ||
@@ -595,40 +641,28 @@ static int read_rindex_entry(struct gfs2_inode *ip, | |||
595 | * Returns: 0 on successful update, error code otherwise | 641 | * Returns: 0 on successful update, error code otherwise |
596 | */ | 642 | */ |
597 | 643 | ||
598 | int gfs2_ri_update(struct gfs2_inode *ip) | 644 | static int gfs2_ri_update(struct gfs2_inode *ip) |
599 | { | 645 | { |
600 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 646 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
601 | struct inode *inode = &ip->i_inode; | 647 | struct inode *inode = &ip->i_inode; |
602 | struct file_ra_state ra_state; | 648 | struct file_ra_state ra_state; |
603 | u64 rgrp_count = i_size_read(inode); | ||
604 | struct gfs2_rgrpd *rgd; | ||
605 | unsigned int max_data = 0; | ||
606 | int error; | 649 | int error; |
607 | 650 | ||
608 | do_div(rgrp_count, sizeof(struct gfs2_rindex)); | ||
609 | clear_rgrpdi(sdp); | ||
610 | |||
611 | file_ra_state_init(&ra_state, inode->i_mapping); | 651 | file_ra_state_init(&ra_state, inode->i_mapping); |
612 | for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) { | 652 | do { |
613 | error = read_rindex_entry(ip, &ra_state); | 653 | error = read_rindex_entry(ip, &ra_state); |
614 | if (error) { | 654 | } while (error == 0); |
615 | clear_rgrpdi(sdp); | 655 | |
616 | return error; | 656 | if (error < 0) |
617 | } | 657 | return error; |
618 | } | ||
619 | 658 | ||
620 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
621 | if (rgd->rd_data > max_data) | ||
622 | max_data = rgd->rd_data; | ||
623 | sdp->sd_max_rg_data = max_data; | ||
624 | sdp->sd_rindex_uptodate = 1; | 659 | sdp->sd_rindex_uptodate = 1; |
625 | return 0; | 660 | return 0; |
626 | } | 661 | } |
627 | 662 | ||
628 | /** | 663 | /** |
629 | * gfs2_rindex_hold - Grab a lock on the rindex | 664 | * gfs2_rindex_update - Update the rindex if required |
630 | * @sdp: The GFS2 superblock | 665 | * @sdp: The GFS2 superblock |
631 | * @ri_gh: the glock holder | ||
632 | * | 666 | * |
633 | * We grab a lock on the rindex inode to make sure that it doesn't | 667 | * We grab a lock on the rindex inode to make sure that it doesn't |
634 | * change whilst we are performing an operation. We keep this lock | 668 | * change whilst we are performing an operation. We keep this lock |
@@ -640,30 +674,29 @@ int gfs2_ri_update(struct gfs2_inode *ip) | |||
640 | * special file, which might have been updated if someone expanded the | 674 | * special file, which might have been updated if someone expanded the |
641 | * filesystem (via gfs2_grow utility), which adds new resource groups. | 675 | * filesystem (via gfs2_grow utility), which adds new resource groups. |
642 | * | 676 | * |
643 | * Returns: 0 on success, error code otherwise | 677 | * Returns: 0 on succeess, error code otherwise |
644 | */ | 678 | */ |
645 | 679 | ||
646 | int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) | 680 | int gfs2_rindex_update(struct gfs2_sbd *sdp) |
647 | { | 681 | { |
648 | struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); | 682 | struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); |
649 | struct gfs2_glock *gl = ip->i_gl; | 683 | struct gfs2_glock *gl = ip->i_gl; |
650 | int error; | 684 | struct gfs2_holder ri_gh; |
651 | 685 | int error = 0; | |
652 | error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh); | ||
653 | if (error) | ||
654 | return error; | ||
655 | 686 | ||
656 | /* Read new copy from disk if we don't have the latest */ | 687 | /* Read new copy from disk if we don't have the latest */ |
657 | if (!sdp->sd_rindex_uptodate) { | 688 | if (!sdp->sd_rindex_uptodate) { |
658 | mutex_lock(&sdp->sd_rindex_mutex); | 689 | mutex_lock(&sdp->sd_rindex_mutex); |
659 | if (!sdp->sd_rindex_uptodate) { | 690 | error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); |
691 | if (error) | ||
692 | return error; | ||
693 | if (!sdp->sd_rindex_uptodate) | ||
660 | error = gfs2_ri_update(ip); | 694 | error = gfs2_ri_update(ip); |
661 | if (error) | 695 | gfs2_glock_dq_uninit(&ri_gh); |
662 | gfs2_glock_dq_uninit(ri_gh); | ||
663 | } | ||
664 | mutex_unlock(&sdp->sd_rindex_mutex); | 696 | mutex_unlock(&sdp->sd_rindex_mutex); |
665 | } | 697 | } |
666 | 698 | ||
699 | |||
667 | return error; | 700 | return error; |
668 | } | 701 | } |
669 | 702 | ||
@@ -694,7 +727,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) | |||
694 | } | 727 | } |
695 | 728 | ||
696 | /** | 729 | /** |
697 | * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps | 730 | * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps |
698 | * @rgd: the struct gfs2_rgrpd describing the RG to read in | 731 | * @rgd: the struct gfs2_rgrpd describing the RG to read in |
699 | * | 732 | * |
700 | * Read in all of a Resource Group's header and bitmap blocks. | 733 | * Read in all of a Resource Group's header and bitmap blocks. |
@@ -703,8 +736,9 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) | |||
703 | * Returns: errno | 736 | * Returns: errno |
704 | */ | 737 | */ |
705 | 738 | ||
706 | int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | 739 | int gfs2_rgrp_go_lock(struct gfs2_holder *gh) |
707 | { | 740 | { |
741 | struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; | ||
708 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 742 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
709 | struct gfs2_glock *gl = rgd->rd_gl; | 743 | struct gfs2_glock *gl = rgd->rd_gl; |
710 | unsigned int length = rgd->rd_length; | 744 | unsigned int length = rgd->rd_length; |
@@ -712,17 +746,6 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | |||
712 | unsigned int x, y; | 746 | unsigned int x, y; |
713 | int error; | 747 | int error; |
714 | 748 | ||
715 | mutex_lock(&rgd->rd_mutex); | ||
716 | |||
717 | spin_lock(&sdp->sd_rindex_spin); | ||
718 | if (rgd->rd_bh_count) { | ||
719 | rgd->rd_bh_count++; | ||
720 | spin_unlock(&sdp->sd_rindex_spin); | ||
721 | mutex_unlock(&rgd->rd_mutex); | ||
722 | return 0; | ||
723 | } | ||
724 | spin_unlock(&sdp->sd_rindex_spin); | ||
725 | |||
726 | for (x = 0; x < length; x++) { | 749 | for (x = 0; x < length; x++) { |
727 | bi = rgd->rd_bits + x; | 750 | bi = rgd->rd_bits + x; |
728 | error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); | 751 | error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); |
@@ -747,15 +770,9 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | |||
747 | clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); | 770 | clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); |
748 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); | 771 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); |
749 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); | 772 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); |
773 | rgd->rd_free_clone = rgd->rd_free; | ||
750 | } | 774 | } |
751 | 775 | ||
752 | spin_lock(&sdp->sd_rindex_spin); | ||
753 | rgd->rd_free_clone = rgd->rd_free; | ||
754 | rgd->rd_bh_count++; | ||
755 | spin_unlock(&sdp->sd_rindex_spin); | ||
756 | |||
757 | mutex_unlock(&rgd->rd_mutex); | ||
758 | |||
759 | return 0; | 776 | return 0; |
760 | 777 | ||
761 | fail: | 778 | fail: |
@@ -765,52 +782,32 @@ fail: | |||
765 | bi->bi_bh = NULL; | 782 | bi->bi_bh = NULL; |
766 | gfs2_assert_warn(sdp, !bi->bi_clone); | 783 | gfs2_assert_warn(sdp, !bi->bi_clone); |
767 | } | 784 | } |
768 | mutex_unlock(&rgd->rd_mutex); | ||
769 | 785 | ||
770 | return error; | 786 | return error; |
771 | } | 787 | } |
772 | 788 | ||
773 | void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd) | ||
774 | { | ||
775 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
776 | |||
777 | spin_lock(&sdp->sd_rindex_spin); | ||
778 | gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); | ||
779 | rgd->rd_bh_count++; | ||
780 | spin_unlock(&sdp->sd_rindex_spin); | ||
781 | } | ||
782 | |||
783 | /** | 789 | /** |
784 | * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get() | 790 | * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() |
785 | * @rgd: the struct gfs2_rgrpd describing the RG to read in | 791 | * @rgd: the struct gfs2_rgrpd describing the RG to read in |
786 | * | 792 | * |
787 | */ | 793 | */ |
788 | 794 | ||
789 | void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd) | 795 | void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) |
790 | { | 796 | { |
791 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 797 | struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; |
792 | int x, length = rgd->rd_length; | 798 | int x, length = rgd->rd_length; |
793 | 799 | ||
794 | spin_lock(&sdp->sd_rindex_spin); | ||
795 | gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); | ||
796 | if (--rgd->rd_bh_count) { | ||
797 | spin_unlock(&sdp->sd_rindex_spin); | ||
798 | return; | ||
799 | } | ||
800 | |||
801 | for (x = 0; x < length; x++) { | 800 | for (x = 0; x < length; x++) { |
802 | struct gfs2_bitmap *bi = rgd->rd_bits + x; | 801 | struct gfs2_bitmap *bi = rgd->rd_bits + x; |
803 | kfree(bi->bi_clone); | ||
804 | bi->bi_clone = NULL; | ||
805 | brelse(bi->bi_bh); | 802 | brelse(bi->bi_bh); |
806 | bi->bi_bh = NULL; | 803 | bi->bi_bh = NULL; |
807 | } | 804 | } |
808 | 805 | ||
809 | spin_unlock(&sdp->sd_rindex_spin); | ||
810 | } | 806 | } |
811 | 807 | ||
812 | static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | 808 | void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, |
813 | const struct gfs2_bitmap *bi) | 809 | struct buffer_head *bh, |
810 | const struct gfs2_bitmap *bi) | ||
814 | { | 811 | { |
815 | struct super_block *sb = sdp->sd_vfs; | 812 | struct super_block *sb = sdp->sd_vfs; |
816 | struct block_device *bdev = sb->s_bdev; | 813 | struct block_device *bdev = sb->s_bdev; |
@@ -823,7 +820,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
823 | unsigned int x; | 820 | unsigned int x; |
824 | 821 | ||
825 | for (x = 0; x < bi->bi_len; x++) { | 822 | for (x = 0; x < bi->bi_len; x++) { |
826 | const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x; | 823 | const u8 *orig = bh->b_data + bi->bi_offset + x; |
827 | const u8 *clone = bi->bi_clone + bi->bi_offset + x; | 824 | const u8 *clone = bi->bi_clone + bi->bi_offset + x; |
828 | u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); | 825 | u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); |
829 | diff &= 0x55; | 826 | diff &= 0x55; |
@@ -862,28 +859,6 @@ fail: | |||
862 | sdp->sd_args.ar_discard = 0; | 859 | sdp->sd_args.ar_discard = 0; |
863 | } | 860 | } |
864 | 861 | ||
865 | void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) | ||
866 | { | ||
867 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
868 | unsigned int length = rgd->rd_length; | ||
869 | unsigned int x; | ||
870 | |||
871 | for (x = 0; x < length; x++) { | ||
872 | struct gfs2_bitmap *bi = rgd->rd_bits + x; | ||
873 | if (!bi->bi_clone) | ||
874 | continue; | ||
875 | if (sdp->sd_args.ar_discard) | ||
876 | gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi); | ||
877 | clear_bit(GBF_FULL, &bi->bi_flags); | ||
878 | memcpy(bi->bi_clone + bi->bi_offset, | ||
879 | bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); | ||
880 | } | ||
881 | |||
882 | spin_lock(&sdp->sd_rindex_spin); | ||
883 | rgd->rd_free_clone = rgd->rd_free; | ||
884 | spin_unlock(&sdp->sd_rindex_spin); | ||
885 | } | ||
886 | |||
887 | /** | 862 | /** |
888 | * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode | 863 | * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode |
889 | * @ip: the incore GFS2 inode structure | 864 | * @ip: the incore GFS2 inode structure |
@@ -893,38 +868,35 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) | |||
893 | 868 | ||
894 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) | 869 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) |
895 | { | 870 | { |
871 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
872 | int error; | ||
896 | BUG_ON(ip->i_alloc != NULL); | 873 | BUG_ON(ip->i_alloc != NULL); |
897 | ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS); | 874 | ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS); |
875 | error = gfs2_rindex_update(sdp); | ||
876 | if (error) | ||
877 | fs_warn(sdp, "rindex update returns %d\n", error); | ||
898 | return ip->i_alloc; | 878 | return ip->i_alloc; |
899 | } | 879 | } |
900 | 880 | ||
901 | /** | 881 | /** |
902 | * try_rgrp_fit - See if a given reservation will fit in a given RG | 882 | * try_rgrp_fit - See if a given reservation will fit in a given RG |
903 | * @rgd: the RG data | 883 | * @rgd: the RG data |
904 | * @al: the struct gfs2_alloc structure describing the reservation | 884 | * @ip: the inode |
905 | * | 885 | * |
906 | * If there's room for the requested blocks to be allocated from the RG: | 886 | * If there's room for the requested blocks to be allocated from the RG: |
907 | * Sets the $al_rgd field in @al. | ||
908 | * | 887 | * |
909 | * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) | 888 | * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) |
910 | */ | 889 | */ |
911 | 890 | ||
912 | static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) | 891 | static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) |
913 | { | 892 | { |
914 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 893 | const struct gfs2_alloc *al = ip->i_alloc; |
915 | int ret = 0; | ||
916 | 894 | ||
917 | if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) | 895 | if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) |
918 | return 0; | 896 | return 0; |
919 | 897 | if (rgd->rd_free_clone >= al->al_requested) | |
920 | spin_lock(&sdp->sd_rindex_spin); | 898 | return 1; |
921 | if (rgd->rd_free_clone >= al->al_requested) { | 899 | return 0; |
922 | al->al_rgd = rgd; | ||
923 | ret = 1; | ||
924 | } | ||
925 | spin_unlock(&sdp->sd_rindex_spin); | ||
926 | |||
927 | return ret; | ||
928 | } | 900 | } |
929 | 901 | ||
930 | /** | 902 | /** |
@@ -992,76 +964,6 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
992 | } | 964 | } |
993 | 965 | ||
994 | /** | 966 | /** |
995 | * recent_rgrp_next - get next RG from "recent" list | ||
996 | * @cur_rgd: current rgrp | ||
997 | * | ||
998 | * Returns: The next rgrp in the recent list | ||
999 | */ | ||
1000 | |||
1001 | static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd) | ||
1002 | { | ||
1003 | struct gfs2_sbd *sdp = cur_rgd->rd_sbd; | ||
1004 | struct list_head *head; | ||
1005 | struct gfs2_rgrpd *rgd; | ||
1006 | |||
1007 | spin_lock(&sdp->sd_rindex_spin); | ||
1008 | head = &sdp->sd_rindex_mru_list; | ||
1009 | if (unlikely(cur_rgd->rd_list_mru.next == head)) { | ||
1010 | spin_unlock(&sdp->sd_rindex_spin); | ||
1011 | return NULL; | ||
1012 | } | ||
1013 | rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru); | ||
1014 | spin_unlock(&sdp->sd_rindex_spin); | ||
1015 | return rgd; | ||
1016 | } | ||
1017 | |||
1018 | /** | ||
1019 | * forward_rgrp_get - get an rgrp to try next from full list | ||
1020 | * @sdp: The GFS2 superblock | ||
1021 | * | ||
1022 | * Returns: The rgrp to try next | ||
1023 | */ | ||
1024 | |||
1025 | static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp) | ||
1026 | { | ||
1027 | struct gfs2_rgrpd *rgd; | ||
1028 | unsigned int journals = gfs2_jindex_size(sdp); | ||
1029 | unsigned int rg = 0, x; | ||
1030 | |||
1031 | spin_lock(&sdp->sd_rindex_spin); | ||
1032 | |||
1033 | rgd = sdp->sd_rindex_forward; | ||
1034 | if (!rgd) { | ||
1035 | if (sdp->sd_rgrps >= journals) | ||
1036 | rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals; | ||
1037 | |||
1038 | for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg; | ||
1039 | x++, rgd = gfs2_rgrpd_get_next(rgd)) | ||
1040 | /* Do Nothing */; | ||
1041 | |||
1042 | sdp->sd_rindex_forward = rgd; | ||
1043 | } | ||
1044 | |||
1045 | spin_unlock(&sdp->sd_rindex_spin); | ||
1046 | |||
1047 | return rgd; | ||
1048 | } | ||
1049 | |||
1050 | /** | ||
1051 | * forward_rgrp_set - set the forward rgrp pointer | ||
1052 | * @sdp: the filesystem | ||
1053 | * @rgd: The new forward rgrp | ||
1054 | * | ||
1055 | */ | ||
1056 | |||
1057 | static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd) | ||
1058 | { | ||
1059 | spin_lock(&sdp->sd_rindex_spin); | ||
1060 | sdp->sd_rindex_forward = rgd; | ||
1061 | spin_unlock(&sdp->sd_rindex_spin); | ||
1062 | } | ||
1063 | |||
1064 | /** | ||
1065 | * get_local_rgrp - Choose and lock a rgrp for allocation | 967 | * get_local_rgrp - Choose and lock a rgrp for allocation |
1066 | * @ip: the inode to reserve space for | 968 | * @ip: the inode to reserve space for |
1067 | * @rgp: the chosen and locked rgrp | 969 | * @rgp: the chosen and locked rgrp |
@@ -1076,14 +978,18 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1076 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 978 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1077 | struct gfs2_rgrpd *rgd, *begin = NULL; | 979 | struct gfs2_rgrpd *rgd, *begin = NULL; |
1078 | struct gfs2_alloc *al = ip->i_alloc; | 980 | struct gfs2_alloc *al = ip->i_alloc; |
1079 | int flags = LM_FLAG_TRY; | ||
1080 | int skipped = 0; | ||
1081 | int loops = 0; | ||
1082 | int error, rg_locked; | 981 | int error, rg_locked; |
982 | int loops = 0; | ||
983 | |||
984 | if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) | ||
985 | rgd = begin = ip->i_rgd; | ||
986 | else | ||
987 | rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal); | ||
1083 | 988 | ||
1084 | rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); | 989 | if (rgd == NULL) |
990 | return -EBADSLT; | ||
1085 | 991 | ||
1086 | while (rgd) { | 992 | while (loops < 3) { |
1087 | rg_locked = 0; | 993 | rg_locked = 0; |
1088 | 994 | ||
1089 | if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { | 995 | if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { |
@@ -1095,92 +1001,36 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1095 | } | 1001 | } |
1096 | switch (error) { | 1002 | switch (error) { |
1097 | case 0: | 1003 | case 0: |
1098 | if (try_rgrp_fit(rgd, al)) | 1004 | if (try_rgrp_fit(rgd, ip)) { |
1099 | goto out; | 1005 | ip->i_rgd = rgd; |
1006 | return 0; | ||
1007 | } | ||
1100 | if (rgd->rd_flags & GFS2_RDF_CHECK) | 1008 | if (rgd->rd_flags & GFS2_RDF_CHECK) |
1101 | try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); | 1009 | try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); |
1102 | if (!rg_locked) | 1010 | if (!rg_locked) |
1103 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1011 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
1104 | /* fall through */ | 1012 | /* fall through */ |
1105 | case GLR_TRYFAILED: | 1013 | case GLR_TRYFAILED: |
1106 | rgd = recent_rgrp_next(rgd); | 1014 | rgd = gfs2_rgrpd_get_next(rgd); |
1107 | break; | 1015 | if (rgd == begin) |
1108 | 1016 | loops++; | |
1109 | default: | ||
1110 | return error; | ||
1111 | } | ||
1112 | } | ||
1113 | |||
1114 | /* Go through full list of rgrps */ | ||
1115 | |||
1116 | begin = rgd = forward_rgrp_get(sdp); | ||
1117 | |||
1118 | for (;;) { | ||
1119 | rg_locked = 0; | ||
1120 | |||
1121 | if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { | ||
1122 | rg_locked = 1; | ||
1123 | error = 0; | ||
1124 | } else { | ||
1125 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, | ||
1126 | &al->al_rgd_gh); | ||
1127 | } | ||
1128 | switch (error) { | ||
1129 | case 0: | ||
1130 | if (try_rgrp_fit(rgd, al)) | ||
1131 | goto out; | ||
1132 | if (rgd->rd_flags & GFS2_RDF_CHECK) | ||
1133 | try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); | ||
1134 | if (!rg_locked) | ||
1135 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
1136 | break; | ||
1137 | |||
1138 | case GLR_TRYFAILED: | ||
1139 | skipped++; | ||
1140 | break; | 1017 | break; |
1141 | |||
1142 | default: | 1018 | default: |
1143 | return error; | 1019 | return error; |
1144 | } | 1020 | } |
1145 | |||
1146 | rgd = gfs2_rgrpd_get_next(rgd); | ||
1147 | if (!rgd) | ||
1148 | rgd = gfs2_rgrpd_get_first(sdp); | ||
1149 | |||
1150 | if (rgd == begin) { | ||
1151 | if (++loops >= 3) | ||
1152 | return -ENOSPC; | ||
1153 | if (!skipped) | ||
1154 | loops++; | ||
1155 | flags = 0; | ||
1156 | if (loops == 2) | ||
1157 | gfs2_log_flush(sdp, NULL); | ||
1158 | } | ||
1159 | } | 1021 | } |
1160 | 1022 | ||
1161 | out: | 1023 | return -ENOSPC; |
1162 | if (begin) { | ||
1163 | spin_lock(&sdp->sd_rindex_spin); | ||
1164 | list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); | ||
1165 | spin_unlock(&sdp->sd_rindex_spin); | ||
1166 | rgd = gfs2_rgrpd_get_next(rgd); | ||
1167 | if (!rgd) | ||
1168 | rgd = gfs2_rgrpd_get_first(sdp); | ||
1169 | forward_rgrp_set(sdp, rgd); | ||
1170 | } | ||
1171 | |||
1172 | return 0; | ||
1173 | } | 1024 | } |
1174 | 1025 | ||
1175 | /** | 1026 | /** |
1176 | * gfs2_inplace_reserve_i - Reserve space in the filesystem | 1027 | * gfs2_inplace_reserve - Reserve space in the filesystem |
1177 | * @ip: the inode to reserve space for | 1028 | * @ip: the inode to reserve space for |
1178 | * | 1029 | * |
1179 | * Returns: errno | 1030 | * Returns: errno |
1180 | */ | 1031 | */ |
1181 | 1032 | ||
1182 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, | 1033 | int gfs2_inplace_reserve(struct gfs2_inode *ip) |
1183 | char *file, unsigned int line) | ||
1184 | { | 1034 | { |
1185 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1035 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1186 | struct gfs2_alloc *al = ip->i_alloc; | 1036 | struct gfs2_alloc *al = ip->i_alloc; |
@@ -1191,45 +1041,22 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, | |||
1191 | if (gfs2_assert_warn(sdp, al->al_requested)) | 1041 | if (gfs2_assert_warn(sdp, al->al_requested)) |
1192 | return -EINVAL; | 1042 | return -EINVAL; |
1193 | 1043 | ||
1194 | if (hold_rindex) { | ||
1195 | /* We need to hold the rindex unless the inode we're using is | ||
1196 | the rindex itself, in which case it's already held. */ | ||
1197 | if (ip != GFS2_I(sdp->sd_rindex)) | ||
1198 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); | ||
1199 | else if (!sdp->sd_rgrps) /* We may not have the rindex read | ||
1200 | in, so: */ | ||
1201 | error = gfs2_ri_update(ip); | ||
1202 | if (error) | ||
1203 | return error; | ||
1204 | } | ||
1205 | |||
1206 | try_again: | ||
1207 | do { | 1044 | do { |
1208 | error = get_local_rgrp(ip, &last_unlinked); | 1045 | error = get_local_rgrp(ip, &last_unlinked); |
1209 | /* If there is no space, flushing the log may release some */ | 1046 | if (error != -ENOSPC) |
1210 | if (error) { | 1047 | break; |
1211 | if (ip == GFS2_I(sdp->sd_rindex) && | 1048 | /* Check that fs hasn't grown if writing to rindex */ |
1212 | !sdp->sd_rindex_uptodate) { | 1049 | if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { |
1213 | error = gfs2_ri_update(ip); | 1050 | error = gfs2_ri_update(ip); |
1214 | if (error) | 1051 | if (error) |
1215 | return error; | 1052 | break; |
1216 | goto try_again; | 1053 | continue; |
1217 | } | ||
1218 | gfs2_log_flush(sdp, NULL); | ||
1219 | } | 1054 | } |
1220 | } while (error && tries++ < 3); | 1055 | /* Flushing the log may release space */ |
1221 | 1056 | gfs2_log_flush(sdp, NULL); | |
1222 | if (error) { | 1057 | } while (tries++ < 3); |
1223 | if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) | ||
1224 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
1225 | return error; | ||
1226 | } | ||
1227 | |||
1228 | /* no error, so we have the rgrp set in the inode's allocation. */ | ||
1229 | al->al_file = file; | ||
1230 | al->al_line = line; | ||
1231 | 1058 | ||
1232 | return 0; | 1059 | return error; |
1233 | } | 1060 | } |
1234 | 1061 | ||
1235 | /** | 1062 | /** |
@@ -1241,20 +1068,10 @@ try_again: | |||
1241 | 1068 | ||
1242 | void gfs2_inplace_release(struct gfs2_inode *ip) | 1069 | void gfs2_inplace_release(struct gfs2_inode *ip) |
1243 | { | 1070 | { |
1244 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1245 | struct gfs2_alloc *al = ip->i_alloc; | 1071 | struct gfs2_alloc *al = ip->i_alloc; |
1246 | 1072 | ||
1247 | if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) | ||
1248 | fs_warn(sdp, "al_alloced = %u, al_requested = %u " | ||
1249 | "al_file = %s, al_line = %u\n", | ||
1250 | al->al_alloced, al->al_requested, al->al_file, | ||
1251 | al->al_line); | ||
1252 | |||
1253 | al->al_rgd = NULL; | ||
1254 | if (al->al_rgd_gh.gh_gl) | 1073 | if (al->al_rgd_gh.gh_gl) |
1255 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1074 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
1256 | if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl) | ||
1257 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
1258 | } | 1075 | } |
1259 | 1076 | ||
1260 | /** | 1077 | /** |
@@ -1352,6 +1169,7 @@ do_search: | |||
1352 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone | 1169 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone |
1353 | bitmaps, so we must search the originals for that. */ | 1170 | bitmaps, so we must search the originals for that. */ |
1354 | buffer = bi->bi_bh->b_data + bi->bi_offset; | 1171 | buffer = bi->bi_bh->b_data + bi->bi_offset; |
1172 | WARN_ON(!buffer_uptodate(bi->bi_bh)); | ||
1355 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) | 1173 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) |
1356 | buffer = bi->bi_clone + bi->bi_offset; | 1174 | buffer = bi->bi_clone + bi->bi_offset; |
1357 | 1175 | ||
@@ -1371,6 +1189,7 @@ skip: | |||
1371 | 1189 | ||
1372 | if (blk == BFITNOENT) | 1190 | if (blk == BFITNOENT) |
1373 | return blk; | 1191 | return blk; |
1192 | |||
1374 | *n = 1; | 1193 | *n = 1; |
1375 | if (old_state == new_state) | 1194 | if (old_state == new_state) |
1376 | goto out; | 1195 | goto out; |
@@ -1503,7 +1322,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) | |||
1503 | if (al == NULL) | 1322 | if (al == NULL) |
1504 | return -ECANCELED; | 1323 | return -ECANCELED; |
1505 | 1324 | ||
1506 | rgd = al->al_rgd; | 1325 | rgd = ip->i_rgd; |
1507 | 1326 | ||
1508 | if (rgrp_contains_block(rgd, ip->i_goal)) | 1327 | if (rgrp_contains_block(rgd, ip->i_goal)) |
1509 | goal = ip->i_goal - rgd->rd_data0; | 1328 | goal = ip->i_goal - rgd->rd_data0; |
@@ -1518,7 +1337,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) | |||
1518 | 1337 | ||
1519 | rgd->rd_last_alloc = blk; | 1338 | rgd->rd_last_alloc = blk; |
1520 | block = rgd->rd_data0 + blk; | 1339 | block = rgd->rd_data0 + blk; |
1521 | ip->i_goal = block; | 1340 | ip->i_goal = block + *n - 1; |
1522 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1341 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1523 | if (error == 0) { | 1342 | if (error == 0) { |
1524 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; | 1343 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; |
@@ -1539,9 +1358,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) | |||
1539 | gfs2_statfs_change(sdp, 0, -(s64)*n, 0); | 1358 | gfs2_statfs_change(sdp, 0, -(s64)*n, 0); |
1540 | gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid); | 1359 | gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid); |
1541 | 1360 | ||
1542 | spin_lock(&sdp->sd_rindex_spin); | ||
1543 | rgd->rd_free_clone -= *n; | 1361 | rgd->rd_free_clone -= *n; |
1544 | spin_unlock(&sdp->sd_rindex_spin); | ||
1545 | trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED); | 1362 | trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED); |
1546 | *bn = block; | 1363 | *bn = block; |
1547 | return 0; | 1364 | return 0; |
@@ -1564,7 +1381,7 @@ int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation) | |||
1564 | { | 1381 | { |
1565 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 1382 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
1566 | struct gfs2_alloc *al = dip->i_alloc; | 1383 | struct gfs2_alloc *al = dip->i_alloc; |
1567 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1384 | struct gfs2_rgrpd *rgd = dip->i_rgd; |
1568 | u32 blk; | 1385 | u32 blk; |
1569 | u64 block; | 1386 | u64 block; |
1570 | unsigned int n = 1; | 1387 | unsigned int n = 1; |
@@ -1594,9 +1411,7 @@ int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation) | |||
1594 | gfs2_statfs_change(sdp, 0, -1, +1); | 1411 | gfs2_statfs_change(sdp, 0, -1, +1); |
1595 | gfs2_trans_add_unrevoke(sdp, block, 1); | 1412 | gfs2_trans_add_unrevoke(sdp, block, 1); |
1596 | 1413 | ||
1597 | spin_lock(&sdp->sd_rindex_spin); | ||
1598 | rgd->rd_free_clone--; | 1414 | rgd->rd_free_clone--; |
1599 | spin_unlock(&sdp->sd_rindex_spin); | ||
1600 | trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); | 1415 | trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); |
1601 | *bn = block; | 1416 | *bn = block; |
1602 | return 0; | 1417 | return 0; |
@@ -1629,8 +1444,6 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) | |||
1629 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1444 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1630 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 1445 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1631 | 1446 | ||
1632 | gfs2_trans_add_rg(rgd); | ||
1633 | |||
1634 | /* Directories keep their data in the metadata address space */ | 1447 | /* Directories keep their data in the metadata address space */ |
1635 | if (meta || ip->i_depth) | 1448 | if (meta || ip->i_depth) |
1636 | gfs2_meta_wipe(ip, bstart, blen); | 1449 | gfs2_meta_wipe(ip, bstart, blen); |
@@ -1666,7 +1479,6 @@ void gfs2_unlink_di(struct inode *inode) | |||
1666 | trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); | 1479 | trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); |
1667 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1480 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1668 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 1481 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1669 | gfs2_trans_add_rg(rgd); | ||
1670 | } | 1482 | } |
1671 | 1483 | ||
1672 | static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) | 1484 | static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) |
@@ -1688,7 +1500,6 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) | |||
1688 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 1500 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1689 | 1501 | ||
1690 | gfs2_statfs_change(sdp, 0, +1, -1); | 1502 | gfs2_statfs_change(sdp, 0, +1, -1); |
1691 | gfs2_trans_add_rg(rgd); | ||
1692 | } | 1503 | } |
1693 | 1504 | ||
1694 | 1505 | ||
@@ -1714,41 +1525,33 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) | |||
1714 | int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) | 1525 | int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) |
1715 | { | 1526 | { |
1716 | struct gfs2_rgrpd *rgd; | 1527 | struct gfs2_rgrpd *rgd; |
1717 | struct gfs2_holder ri_gh, rgd_gh; | 1528 | struct gfs2_holder rgd_gh; |
1718 | struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); | ||
1719 | int ri_locked = 0; | ||
1720 | int error; | 1529 | int error; |
1721 | 1530 | ||
1722 | if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { | 1531 | error = gfs2_rindex_update(sdp); |
1723 | error = gfs2_rindex_hold(sdp, &ri_gh); | 1532 | if (error) |
1724 | if (error) | 1533 | return error; |
1725 | goto fail; | ||
1726 | ri_locked = 1; | ||
1727 | } | ||
1728 | 1534 | ||
1729 | error = -EINVAL; | 1535 | error = -EINVAL; |
1730 | rgd = gfs2_blk2rgrpd(sdp, no_addr); | 1536 | rgd = gfs2_blk2rgrpd(sdp, no_addr); |
1731 | if (!rgd) | 1537 | if (!rgd) |
1732 | goto fail_rindex; | 1538 | goto fail; |
1733 | 1539 | ||
1734 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); | 1540 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); |
1735 | if (error) | 1541 | if (error) |
1736 | goto fail_rindex; | 1542 | goto fail; |
1737 | 1543 | ||
1738 | if (gfs2_get_block_type(rgd, no_addr) != type) | 1544 | if (gfs2_get_block_type(rgd, no_addr) != type) |
1739 | error = -ESTALE; | 1545 | error = -ESTALE; |
1740 | 1546 | ||
1741 | gfs2_glock_dq_uninit(&rgd_gh); | 1547 | gfs2_glock_dq_uninit(&rgd_gh); |
1742 | fail_rindex: | ||
1743 | if (ri_locked) | ||
1744 | gfs2_glock_dq_uninit(&ri_gh); | ||
1745 | fail: | 1548 | fail: |
1746 | return error; | 1549 | return error; |
1747 | } | 1550 | } |
1748 | 1551 | ||
1749 | /** | 1552 | /** |
1750 | * gfs2_rlist_add - add a RG to a list of RGs | 1553 | * gfs2_rlist_add - add a RG to a list of RGs |
1751 | * @sdp: the filesystem | 1554 | * @ip: the inode |
1752 | * @rlist: the list of resource groups | 1555 | * @rlist: the list of resource groups |
1753 | * @block: the block | 1556 | * @block: the block |
1754 | * | 1557 | * |
@@ -1758,9 +1561,10 @@ fail: | |||
1758 | * | 1561 | * |
1759 | */ | 1562 | */ |
1760 | 1563 | ||
1761 | void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | 1564 | void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, |
1762 | u64 block) | 1565 | u64 block) |
1763 | { | 1566 | { |
1567 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1764 | struct gfs2_rgrpd *rgd; | 1568 | struct gfs2_rgrpd *rgd; |
1765 | struct gfs2_rgrpd **tmp; | 1569 | struct gfs2_rgrpd **tmp; |
1766 | unsigned int new_space; | 1570 | unsigned int new_space; |
@@ -1769,12 +1573,15 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | |||
1769 | if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) | 1573 | if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) |
1770 | return; | 1574 | return; |
1771 | 1575 | ||
1772 | rgd = gfs2_blk2rgrpd(sdp, block); | 1576 | if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) |
1577 | rgd = ip->i_rgd; | ||
1578 | else | ||
1579 | rgd = gfs2_blk2rgrpd(sdp, block); | ||
1773 | if (!rgd) { | 1580 | if (!rgd) { |
1774 | if (gfs2_consist(sdp)) | 1581 | fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); |
1775 | fs_err(sdp, "block = %llu\n", (unsigned long long)block); | ||
1776 | return; | 1582 | return; |
1777 | } | 1583 | } |
1584 | ip->i_rgd = rgd; | ||
1778 | 1585 | ||
1779 | for (x = 0; x < rlist->rl_rgrps; x++) | 1586 | for (x = 0; x < rlist->rl_rgrps; x++) |
1780 | if (rlist->rl_rgd[x] == rgd) | 1587 | if (rlist->rl_rgd[x] == rgd) |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index d253f9a8c70e..cf5c50180192 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -18,18 +18,15 @@ struct gfs2_holder; | |||
18 | 18 | ||
19 | extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); | 19 | extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); |
20 | 20 | ||
21 | struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); | 21 | extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); |
22 | struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); | 22 | extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); |
23 | struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); | 23 | extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); |
24 | 24 | ||
25 | extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); | 25 | extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); |
26 | extern int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); | 26 | extern int gfs2_rindex_update(struct gfs2_sbd *sdp); |
27 | 27 | extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); | |
28 | extern int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); | 28 | extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); |
29 | extern void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); | 29 | extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); |
30 | extern void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); | ||
31 | |||
32 | extern void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); | ||
33 | 30 | ||
34 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); | 31 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); |
35 | static inline void gfs2_alloc_put(struct gfs2_inode *ip) | 32 | static inline void gfs2_alloc_put(struct gfs2_inode *ip) |
@@ -39,16 +36,9 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) | |||
39 | ip->i_alloc = NULL; | 36 | ip->i_alloc = NULL; |
40 | } | 37 | } |
41 | 38 | ||
42 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, | 39 | extern int gfs2_inplace_reserve(struct gfs2_inode *ip); |
43 | char *file, unsigned int line); | ||
44 | #define gfs2_inplace_reserve(ip) \ | ||
45 | gfs2_inplace_reserve_i((ip), 1, __FILE__, __LINE__) | ||
46 | #define gfs2_inplace_reserve_ri(ip) \ | ||
47 | gfs2_inplace_reserve_i((ip), 0, __FILE__, __LINE__) | ||
48 | |||
49 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 40 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
50 | 41 | ||
51 | extern int gfs2_ri_update(struct gfs2_inode *ip); | ||
52 | extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); | 42 | extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); |
53 | extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); | 43 | extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); |
54 | 44 | ||
@@ -66,11 +56,14 @@ struct gfs2_rgrp_list { | |||
66 | struct gfs2_holder *rl_ghs; | 56 | struct gfs2_holder *rl_ghs; |
67 | }; | 57 | }; |
68 | 58 | ||
69 | extern void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | 59 | extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, |
70 | u64 block); | 60 | u64 block); |
71 | extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); | 61 | extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); |
72 | extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); | 62 | extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); |
73 | extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); | 63 | extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); |
74 | extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); | 64 | extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); |
65 | extern void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | ||
66 | struct buffer_head *bh, | ||
67 | const struct gfs2_bitmap *bi); | ||
75 | 68 | ||
76 | #endif /* __RGRP_DOT_H__ */ | 69 | #endif /* __RGRP_DOT_H__ */ |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b7beadd9ba4c..71e420989f77 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -752,51 +752,77 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
752 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 752 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
753 | struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); | 753 | struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); |
754 | struct backing_dev_info *bdi = metamapping->backing_dev_info; | 754 | struct backing_dev_info *bdi = metamapping->backing_dev_info; |
755 | struct gfs2_holder gh; | 755 | int ret = 0; |
756 | |||
757 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
758 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
759 | if (bdi->dirty_exceeded) | ||
760 | gfs2_ail1_flush(sdp, wbc); | ||
761 | else | ||
762 | filemap_fdatawrite(metamapping); | ||
763 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
764 | ret = filemap_fdatawait(metamapping); | ||
765 | if (ret) | ||
766 | mark_inode_dirty_sync(inode); | ||
767 | return ret; | ||
768 | } | ||
769 | |||
770 | /** | ||
771 | * gfs2_dirty_inode - check for atime updates | ||
772 | * @inode: The inode in question | ||
773 | * @flags: The type of dirty | ||
774 | * | ||
775 | * Unfortunately it can be called under any combination of inode | ||
776 | * glock and transaction lock, so we have to check carefully. | ||
777 | * | ||
778 | * At the moment this deals only with atime - it should be possible | ||
779 | * to expand that role in future, once a review of the locking has | ||
780 | * been carried out. | ||
781 | */ | ||
782 | |||
783 | static void gfs2_dirty_inode(struct inode *inode, int flags) | ||
784 | { | ||
785 | struct gfs2_inode *ip = GFS2_I(inode); | ||
786 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
756 | struct buffer_head *bh; | 787 | struct buffer_head *bh; |
757 | struct timespec atime; | 788 | struct gfs2_holder gh; |
758 | struct gfs2_dinode *di; | 789 | int need_unlock = 0; |
759 | int ret = -EAGAIN; | 790 | int need_endtrans = 0; |
760 | int unlock_required = 0; | 791 | int ret; |
761 | 792 | ||
762 | /* Skip timestamp update, if this is from a memalloc */ | 793 | if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC))) |
763 | if (current->flags & PF_MEMALLOC) | 794 | return; |
764 | goto do_flush; | 795 | |
765 | if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { | 796 | if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { |
766 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 797 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
767 | if (ret) | 798 | if (ret) { |
768 | goto do_flush; | 799 | fs_err(sdp, "dirty_inode: glock %d\n", ret); |
769 | unlock_required = 1; | 800 | return; |
801 | } | ||
802 | need_unlock = 1; | ||
770 | } | 803 | } |
771 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); | 804 | |
772 | if (ret) | 805 | if (current->journal_info == NULL) { |
773 | goto do_unlock; | 806 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); |
807 | if (ret) { | ||
808 | fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret); | ||
809 | goto out; | ||
810 | } | ||
811 | need_endtrans = 1; | ||
812 | } | ||
813 | |||
774 | ret = gfs2_meta_inode_buffer(ip, &bh); | 814 | ret = gfs2_meta_inode_buffer(ip, &bh); |
775 | if (ret == 0) { | 815 | if (ret == 0) { |
776 | di = (struct gfs2_dinode *)bh->b_data; | 816 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
777 | atime.tv_sec = be64_to_cpu(di->di_atime); | 817 | gfs2_dinode_out(ip, bh->b_data); |
778 | atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); | ||
779 | if (timespec_compare(&inode->i_atime, &atime) > 0) { | ||
780 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
781 | gfs2_dinode_out(ip, bh->b_data); | ||
782 | } | ||
783 | brelse(bh); | 818 | brelse(bh); |
784 | } | 819 | } |
785 | gfs2_trans_end(sdp); | 820 | |
786 | do_unlock: | 821 | if (need_endtrans) |
787 | if (unlock_required) | 822 | gfs2_trans_end(sdp); |
823 | out: | ||
824 | if (need_unlock) | ||
788 | gfs2_glock_dq_uninit(&gh); | 825 | gfs2_glock_dq_uninit(&gh); |
789 | do_flush: | ||
790 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
791 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
792 | filemap_fdatawrite(metamapping); | ||
793 | if (bdi->dirty_exceeded) | ||
794 | gfs2_ail1_flush(sdp, wbc); | ||
795 | if (!ret && (wbc->sync_mode == WB_SYNC_ALL)) | ||
796 | ret = filemap_fdatawait(metamapping); | ||
797 | if (ret) | ||
798 | mark_inode_dirty_sync(inode); | ||
799 | return ret; | ||
800 | } | 826 | } |
801 | 827 | ||
802 | /** | 828 | /** |
@@ -1011,7 +1037,6 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd, | |||
1011 | 1037 | ||
1012 | static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) | 1038 | static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) |
1013 | { | 1039 | { |
1014 | struct gfs2_holder ri_gh; | ||
1015 | struct gfs2_rgrpd *rgd_next; | 1040 | struct gfs2_rgrpd *rgd_next; |
1016 | struct gfs2_holder *gha, *gh; | 1041 | struct gfs2_holder *gha, *gh; |
1017 | unsigned int slots = 64; | 1042 | unsigned int slots = 64; |
@@ -1024,10 +1049,6 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host | |||
1024 | if (!gha) | 1049 | if (!gha) |
1025 | return -ENOMEM; | 1050 | return -ENOMEM; |
1026 | 1051 | ||
1027 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
1028 | if (error) | ||
1029 | goto out; | ||
1030 | |||
1031 | rgd_next = gfs2_rgrpd_get_first(sdp); | 1052 | rgd_next = gfs2_rgrpd_get_first(sdp); |
1032 | 1053 | ||
1033 | for (;;) { | 1054 | for (;;) { |
@@ -1070,9 +1091,6 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host | |||
1070 | yield(); | 1091 | yield(); |
1071 | } | 1092 | } |
1072 | 1093 | ||
1073 | gfs2_glock_dq_uninit(&ri_gh); | ||
1074 | |||
1075 | out: | ||
1076 | kfree(gha); | 1094 | kfree(gha); |
1077 | return error; | 1095 | return error; |
1078 | } | 1096 | } |
@@ -1124,6 +1142,10 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1124 | struct gfs2_statfs_change_host sc; | 1142 | struct gfs2_statfs_change_host sc; |
1125 | int error; | 1143 | int error; |
1126 | 1144 | ||
1145 | error = gfs2_rindex_update(sdp); | ||
1146 | if (error) | ||
1147 | return error; | ||
1148 | |||
1127 | if (gfs2_tune_get(sdp, gt_statfs_slow)) | 1149 | if (gfs2_tune_get(sdp, gt_statfs_slow)) |
1128 | error = gfs2_statfs_slow(sdp, &sc); | 1150 | error = gfs2_statfs_slow(sdp, &sc); |
1129 | else | 1151 | else |
@@ -1394,21 +1416,17 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) | |||
1394 | if (error) | 1416 | if (error) |
1395 | goto out; | 1417 | goto out; |
1396 | 1418 | ||
1397 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); | ||
1398 | if (error) | ||
1399 | goto out_qs; | ||
1400 | |||
1401 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); | 1419 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); |
1402 | if (!rgd) { | 1420 | if (!rgd) { |
1403 | gfs2_consist_inode(ip); | 1421 | gfs2_consist_inode(ip); |
1404 | error = -EIO; | 1422 | error = -EIO; |
1405 | goto out_rindex_relse; | 1423 | goto out_qs; |
1406 | } | 1424 | } |
1407 | 1425 | ||
1408 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, | 1426 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, |
1409 | &al->al_rgd_gh); | 1427 | &al->al_rgd_gh); |
1410 | if (error) | 1428 | if (error) |
1411 | goto out_rindex_relse; | 1429 | goto out_qs; |
1412 | 1430 | ||
1413 | error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, | 1431 | error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, |
1414 | sdp->sd_jdesc->jd_blocks); | 1432 | sdp->sd_jdesc->jd_blocks); |
@@ -1423,8 +1441,6 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) | |||
1423 | 1441 | ||
1424 | out_rg_gunlock: | 1442 | out_rg_gunlock: |
1425 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1443 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
1426 | out_rindex_relse: | ||
1427 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
1428 | out_qs: | 1444 | out_qs: |
1429 | gfs2_quota_unhold(ip); | 1445 | gfs2_quota_unhold(ip); |
1430 | out: | 1446 | out: |
@@ -1471,9 +1487,11 @@ static void gfs2_evict_inode(struct inode *inode) | |||
1471 | goto out; | 1487 | goto out; |
1472 | } | 1488 | } |
1473 | 1489 | ||
1474 | error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED); | 1490 | if (!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) { |
1475 | if (error) | 1491 | error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED); |
1476 | goto out_truncate; | 1492 | if (error) |
1493 | goto out_truncate; | ||
1494 | } | ||
1477 | 1495 | ||
1478 | if (test_bit(GIF_INVALID, &ip->i_flags)) { | 1496 | if (test_bit(GIF_INVALID, &ip->i_flags)) { |
1479 | error = gfs2_inode_refresh(ip); | 1497 | error = gfs2_inode_refresh(ip); |
@@ -1513,6 +1531,10 @@ static void gfs2_evict_inode(struct inode *inode) | |||
1513 | goto out_unlock; | 1531 | goto out_unlock; |
1514 | 1532 | ||
1515 | out_truncate: | 1533 | out_truncate: |
1534 | gfs2_log_flush(sdp, ip->i_gl); | ||
1535 | write_inode_now(inode, 1); | ||
1536 | gfs2_ail_flush(ip->i_gl, 0); | ||
1537 | |||
1516 | /* Case 2 starts here */ | 1538 | /* Case 2 starts here */ |
1517 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | 1539 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); |
1518 | if (error) | 1540 | if (error) |
@@ -1552,6 +1574,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) | |||
1552 | if (ip) { | 1574 | if (ip) { |
1553 | ip->i_flags = 0; | 1575 | ip->i_flags = 0; |
1554 | ip->i_gl = NULL; | 1576 | ip->i_gl = NULL; |
1577 | ip->i_rgd = NULL; | ||
1555 | } | 1578 | } |
1556 | return &ip->i_inode; | 1579 | return &ip->i_inode; |
1557 | } | 1580 | } |
@@ -1572,6 +1595,7 @@ const struct super_operations gfs2_super_ops = { | |||
1572 | .alloc_inode = gfs2_alloc_inode, | 1595 | .alloc_inode = gfs2_alloc_inode, |
1573 | .destroy_inode = gfs2_destroy_inode, | 1596 | .destroy_inode = gfs2_destroy_inode, |
1574 | .write_inode = gfs2_write_inode, | 1597 | .write_inode = gfs2_write_inode, |
1598 | .dirty_inode = gfs2_dirty_inode, | ||
1575 | .evict_inode = gfs2_evict_inode, | 1599 | .evict_inode = gfs2_evict_inode, |
1576 | .put_super = gfs2_put_super, | 1600 | .put_super = gfs2_put_super, |
1577 | .sync_fs = gfs2_sync_fs, | 1601 | .sync_fs = gfs2_sync_fs, |
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 9ec73a854111..86ac75d99d31 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
@@ -185,8 +185,3 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) | |||
185 | gfs2_log_unlock(sdp); | 185 | gfs2_log_unlock(sdp); |
186 | } | 186 | } |
187 | 187 | ||
188 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) | ||
189 | { | ||
190 | lops_add(rgd->rd_sbd, &rgd->rd_le); | ||
191 | } | ||
192 | |||
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index fb56b783e028..f8f101ef600c 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
@@ -28,20 +28,20 @@ struct gfs2_glock; | |||
28 | 28 | ||
29 | /* reserve either the number of blocks to be allocated plus the rg header | 29 | /* reserve either the number of blocks to be allocated plus the rg header |
30 | * block, or all of the blocks in the rg, whichever is smaller */ | 30 | * block, or all of the blocks in the rg, whichever is smaller */ |
31 | static inline unsigned int gfs2_rg_blocks(const struct gfs2_alloc *al) | 31 | static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) |
32 | { | 32 | { |
33 | return (al->al_requested < al->al_rgd->rd_length)? | 33 | const struct gfs2_alloc *al = ip->i_alloc; |
34 | al->al_requested + 1 : al->al_rgd->rd_length; | 34 | if (al->al_requested < ip->i_rgd->rd_length) |
35 | return al->al_requested + 1; | ||
36 | return ip->i_rgd->rd_length; | ||
35 | } | 37 | } |
36 | 38 | ||
37 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | 39 | extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, |
38 | unsigned int revokes); | 40 | unsigned int revokes); |
39 | 41 | ||
40 | void gfs2_trans_end(struct gfs2_sbd *sdp); | 42 | extern void gfs2_trans_end(struct gfs2_sbd *sdp); |
41 | 43 | extern void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); | |
42 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); | 44 | extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); |
43 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); | 45 | extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); |
44 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); | ||
45 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); | ||
46 | 46 | ||
47 | #endif /* __TRANS_DOT_H__ */ | 47 | #endif /* __TRANS_DOT_H__ */ |
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 439b61c03262..71d7bf830c09 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -332,15 +332,8 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, | |||
332 | if (error) | 332 | if (error) |
333 | goto out_alloc; | 333 | goto out_alloc; |
334 | 334 | ||
335 | error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh); | ||
336 | if (error) | ||
337 | goto out_quota; | ||
338 | |||
339 | error = ea_dealloc_unstuffed(ip, bh, ea, prev, (leave) ? &error : NULL); | 335 | error = ea_dealloc_unstuffed(ip, bh, ea, prev, (leave) ? &error : NULL); |
340 | 336 | ||
341 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
342 | |||
343 | out_quota: | ||
344 | gfs2_quota_unhold(ip); | 337 | gfs2_quota_unhold(ip); |
345 | out_alloc: | 338 | out_alloc: |
346 | gfs2_alloc_put(ip); | 339 | gfs2_alloc_put(ip); |
@@ -734,7 +727,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
734 | goto out_gunlock_q; | 727 | goto out_gunlock_q; |
735 | 728 | ||
736 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), | 729 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), |
737 | blks + gfs2_rg_blocks(al) + | 730 | blks + gfs2_rg_blocks(ip) + |
738 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); | 731 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); |
739 | if (error) | 732 | if (error) |
740 | goto out_ipres; | 733 | goto out_ipres; |
@@ -1296,7 +1289,8 @@ fail: | |||
1296 | 1289 | ||
1297 | int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) | 1290 | int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) |
1298 | { | 1291 | { |
1299 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1292 | struct inode *inode = &ip->i_inode; |
1293 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1300 | struct gfs2_ea_location el; | 1294 | struct gfs2_ea_location el; |
1301 | int error; | 1295 | int error; |
1302 | 1296 | ||
@@ -1319,7 +1313,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) | |||
1319 | if (error) | 1313 | if (error) |
1320 | return error; | 1314 | return error; |
1321 | 1315 | ||
1322 | error = gfs2_setattr_simple(ip, attr); | 1316 | error = gfs2_setattr_simple(inode, attr); |
1323 | gfs2_trans_end(sdp); | 1317 | gfs2_trans_end(sdp); |
1324 | return error; | 1318 | return error; |
1325 | } | 1319 | } |
@@ -1362,14 +1356,14 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) | |||
1362 | blen++; | 1356 | blen++; |
1363 | else { | 1357 | else { |
1364 | if (bstart) | 1358 | if (bstart) |
1365 | gfs2_rlist_add(sdp, &rlist, bstart); | 1359 | gfs2_rlist_add(ip, &rlist, bstart); |
1366 | bstart = bn; | 1360 | bstart = bn; |
1367 | blen = 1; | 1361 | blen = 1; |
1368 | } | 1362 | } |
1369 | blks++; | 1363 | blks++; |
1370 | } | 1364 | } |
1371 | if (bstart) | 1365 | if (bstart) |
1372 | gfs2_rlist_add(sdp, &rlist, bstart); | 1366 | gfs2_rlist_add(ip, &rlist, bstart); |
1373 | else | 1367 | else |
1374 | goto out; | 1368 | goto out; |
1375 | 1369 | ||
@@ -1501,24 +1495,18 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) | |||
1501 | if (error) | 1495 | if (error) |
1502 | goto out_alloc; | 1496 | goto out_alloc; |
1503 | 1497 | ||
1504 | error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh); | ||
1505 | if (error) | ||
1506 | goto out_quota; | ||
1507 | |||
1508 | error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); | 1498 | error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); |
1509 | if (error) | 1499 | if (error) |
1510 | goto out_rindex; | 1500 | goto out_quota; |
1511 | 1501 | ||
1512 | if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { | 1502 | if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { |
1513 | error = ea_dealloc_indirect(ip); | 1503 | error = ea_dealloc_indirect(ip); |
1514 | if (error) | 1504 | if (error) |
1515 | goto out_rindex; | 1505 | goto out_quota; |
1516 | } | 1506 | } |
1517 | 1507 | ||
1518 | error = ea_dealloc_block(ip); | 1508 | error = ea_dealloc_block(ip); |
1519 | 1509 | ||
1520 | out_rindex: | ||
1521 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
1522 | out_quota: | 1510 | out_quota: |
1523 | gfs2_quota_unhold(ip); | 1511 | gfs2_quota_unhold(ip); |
1524 | out_alloc: | 1512 | out_alloc: |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index c106ca22e812..d24a9b666a23 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -344,6 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
344 | struct inode *root, *inode; | 344 | struct inode *root, *inode; |
345 | struct qstr str; | 345 | struct qstr str; |
346 | struct nls_table *nls = NULL; | 346 | struct nls_table *nls = NULL; |
347 | u64 last_fs_block, last_fs_page; | ||
347 | int err; | 348 | int err; |
348 | 349 | ||
349 | err = -EINVAL; | 350 | err = -EINVAL; |
@@ -399,9 +400,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
399 | if (!sbi->rsrc_clump_blocks) | 400 | if (!sbi->rsrc_clump_blocks) |
400 | sbi->rsrc_clump_blocks = 1; | 401 | sbi->rsrc_clump_blocks = 1; |
401 | 402 | ||
402 | err = generic_check_addressable(sbi->alloc_blksz_shift, | 403 | err = -EFBIG; |
403 | sbi->total_blocks); | 404 | last_fs_block = sbi->total_blocks - 1; |
404 | if (err) { | 405 | last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >> |
406 | PAGE_CACHE_SHIFT; | ||
407 | |||
408 | if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) || | ||
409 | (last_fs_page > (pgoff_t)(~0ULL))) { | ||
405 | printk(KERN_ERR "hfs: filesystem size too large.\n"); | 410 | printk(KERN_ERR "hfs: filesystem size too large.\n"); |
406 | goto out_free_vhdr; | 411 | goto out_free_vhdr; |
407 | } | 412 | } |
@@ -525,8 +530,8 @@ out_close_cat_tree: | |||
525 | out_close_ext_tree: | 530 | out_close_ext_tree: |
526 | hfs_btree_close(sbi->ext_tree); | 531 | hfs_btree_close(sbi->ext_tree); |
527 | out_free_vhdr: | 532 | out_free_vhdr: |
528 | kfree(sbi->s_vhdr); | 533 | kfree(sbi->s_vhdr_buf); |
529 | kfree(sbi->s_backup_vhdr); | 534 | kfree(sbi->s_backup_vhdr_buf); |
530 | out_unload_nls: | 535 | out_unload_nls: |
531 | unload_nls(sbi->nls); | 536 | unload_nls(sbi->nls); |
532 | unload_nls(nls); | 537 | unload_nls(nls); |
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 10e515a0d452..7daf4b852d1c 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c | |||
@@ -272,9 +272,9 @@ reread: | |||
272 | return 0; | 272 | return 0; |
273 | 273 | ||
274 | out_free_backup_vhdr: | 274 | out_free_backup_vhdr: |
275 | kfree(sbi->s_backup_vhdr); | 275 | kfree(sbi->s_backup_vhdr_buf); |
276 | out_free_vhdr: | 276 | out_free_vhdr: |
277 | kfree(sbi->s_vhdr); | 277 | kfree(sbi->s_vhdr_buf); |
278 | out: | 278 | out: |
279 | return error; | 279 | return error; |
280 | } | 280 | } |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 87b6e0421c12..ec889538e5a6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -491,6 +491,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, | |||
491 | inode->i_op = &page_symlink_inode_operations; | 491 | inode->i_op = &page_symlink_inode_operations; |
492 | break; | 492 | break; |
493 | } | 493 | } |
494 | lockdep_annotate_inode_mutex_key(inode); | ||
494 | } | 495 | } |
495 | return inode; | 496 | return inode; |
496 | } | 497 | } |
diff --git a/fs/inode.c b/fs/inode.c index 73920d555c88..ecbb68dc7e2a 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -634,7 +634,7 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan) | |||
634 | * inode to the back of the list so we don't spin on it. | 634 | * inode to the back of the list so we don't spin on it. |
635 | */ | 635 | */ |
636 | if (!spin_trylock(&inode->i_lock)) { | 636 | if (!spin_trylock(&inode->i_lock)) { |
637 | list_move(&inode->i_lru, &sb->s_inode_lru); | 637 | list_move_tail(&inode->i_lru, &sb->s_inode_lru); |
638 | continue; | 638 | continue; |
639 | } | 639 | } |
640 | 640 | ||
@@ -848,16 +848,9 @@ struct inode *new_inode(struct super_block *sb) | |||
848 | } | 848 | } |
849 | EXPORT_SYMBOL(new_inode); | 849 | EXPORT_SYMBOL(new_inode); |
850 | 850 | ||
851 | /** | ||
852 | * unlock_new_inode - clear the I_NEW state and wake up any waiters | ||
853 | * @inode: new inode to unlock | ||
854 | * | ||
855 | * Called when the inode is fully initialised to clear the new state of the | ||
856 | * inode and wake up anyone waiting for the inode to finish initialisation. | ||
857 | */ | ||
858 | void unlock_new_inode(struct inode *inode) | ||
859 | { | ||
860 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 851 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
852 | void lockdep_annotate_inode_mutex_key(struct inode *inode) | ||
853 | { | ||
861 | if (S_ISDIR(inode->i_mode)) { | 854 | if (S_ISDIR(inode->i_mode)) { |
862 | struct file_system_type *type = inode->i_sb->s_type; | 855 | struct file_system_type *type = inode->i_sb->s_type; |
863 | 856 | ||
@@ -873,7 +866,20 @@ void unlock_new_inode(struct inode *inode) | |||
873 | &type->i_mutex_dir_key); | 866 | &type->i_mutex_dir_key); |
874 | } | 867 | } |
875 | } | 868 | } |
869 | } | ||
870 | EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key); | ||
876 | #endif | 871 | #endif |
872 | |||
873 | /** | ||
874 | * unlock_new_inode - clear the I_NEW state and wake up any waiters | ||
875 | * @inode: new inode to unlock | ||
876 | * | ||
877 | * Called when the inode is fully initialised to clear the new state of the | ||
878 | * inode and wake up anyone waiting for the inode to finish initialisation. | ||
879 | */ | ||
880 | void unlock_new_inode(struct inode *inode) | ||
881 | { | ||
882 | lockdep_annotate_inode_mutex_key(inode); | ||
877 | spin_lock(&inode->i_lock); | 883 | spin_lock(&inode->i_lock); |
878 | WARN_ON(!(inode->i_state & I_NEW)); | 884 | WARN_ON(!(inode->i_state & I_NEW)); |
879 | inode->i_state &= ~I_NEW; | 885 | inode->i_state &= ~I_NEW; |
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c index cfeb7164b085..0f20208df602 100644 --- a/fs/jffs2/security.c +++ b/fs/jffs2/security.c | |||
@@ -22,26 +22,29 @@ | |||
22 | #include <linux/security.h> | 22 | #include <linux/security.h> |
23 | #include "nodelist.h" | 23 | #include "nodelist.h" |
24 | 24 | ||
25 | /* ---- Initial Security Label Attachment -------------- */ | 25 | /* ---- Initial Security Label(s) Attachment callback --- */ |
26 | int jffs2_init_security(struct inode *inode, struct inode *dir, | 26 | int jffs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
27 | const struct qstr *qstr) | 27 | void *fs_info) |
28 | { | 28 | { |
29 | int rc; | 29 | const struct xattr *xattr; |
30 | size_t len; | 30 | int err = 0; |
31 | void *value; | ||
32 | char *name; | ||
33 | 31 | ||
34 | rc = security_inode_init_security(inode, dir, qstr, &name, &value, &len); | 32 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
35 | if (rc) { | 33 | err = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, |
36 | if (rc == -EOPNOTSUPP) | 34 | xattr->name, xattr->value, |
37 | return 0; | 35 | xattr->value_len, 0); |
38 | return rc; | 36 | if (err < 0) |
37 | break; | ||
39 | } | 38 | } |
40 | rc = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, value, len, 0); | 39 | return err; |
40 | } | ||
41 | 41 | ||
42 | kfree(name); | 42 | /* ---- Initial Security Label(s) Attachment ----------- */ |
43 | kfree(value); | 43 | int jffs2_init_security(struct inode *inode, struct inode *dir, |
44 | return rc; | 44 | const struct qstr *qstr) |
45 | { | ||
46 | return security_inode_init_security(inode, dir, qstr, | ||
47 | &jffs2_initxattrs, NULL); | ||
45 | } | 48 | } |
46 | 49 | ||
47 | /* ---- XATTR Handler for "security.*" ----------------- */ | 50 | /* ---- XATTR Handler for "security.*" ----------------- */ |
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index adcf92d3b603..7971f37534a3 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c | |||
@@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb) | |||
68 | /* | 68 | /* |
69 | * Wait for outstanding transactions to be written to log: | 69 | * Wait for outstanding transactions to be written to log: |
70 | */ | 70 | */ |
71 | jfs_flush_journal(log, 1); | 71 | jfs_flush_journal(log, 2); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * close fileset inode allocation map (aka fileset inode) | 74 | * close fileset inode allocation map (aka fileset inode) |
@@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb) | |||
146 | * | 146 | * |
147 | * remove file system from log active file system list. | 147 | * remove file system from log active file system list. |
148 | */ | 148 | */ |
149 | jfs_flush_journal(log, 1); | 149 | jfs_flush_journal(log, 2); |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Make sure all metadata makes it to disk | 152 | * Make sure all metadata makes it to disk |
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index e87fedef23db..26683e15b3ac 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c | |||
@@ -1089,38 +1089,37 @@ int jfs_removexattr(struct dentry *dentry, const char *name) | |||
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | #ifdef CONFIG_JFS_SECURITY | 1091 | #ifdef CONFIG_JFS_SECURITY |
1092 | int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir, | 1092 | int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
1093 | const struct qstr *qstr) | 1093 | void *fs_info) |
1094 | { | 1094 | { |
1095 | int rc; | 1095 | const struct xattr *xattr; |
1096 | size_t len; | 1096 | tid_t *tid = fs_info; |
1097 | void *value; | ||
1098 | char *suffix; | ||
1099 | char *name; | 1097 | char *name; |
1100 | 1098 | int err = 0; | |
1101 | rc = security_inode_init_security(inode, dir, qstr, &suffix, &value, | 1099 | |
1102 | &len); | 1100 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
1103 | if (rc) { | 1101 | name = kmalloc(XATTR_SECURITY_PREFIX_LEN + |
1104 | if (rc == -EOPNOTSUPP) | 1102 | strlen(xattr->name) + 1, GFP_NOFS); |
1105 | return 0; | 1103 | if (!name) { |
1106 | return rc; | 1104 | err = -ENOMEM; |
1107 | } | 1105 | break; |
1108 | name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix), | 1106 | } |
1109 | GFP_NOFS); | 1107 | strcpy(name, XATTR_SECURITY_PREFIX); |
1110 | if (!name) { | 1108 | strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); |
1111 | rc = -ENOMEM; | 1109 | |
1112 | goto kmalloc_failed; | 1110 | err = __jfs_setxattr(*tid, inode, name, |
1111 | xattr->value, xattr->value_len, 0); | ||
1112 | kfree(name); | ||
1113 | if (err < 0) | ||
1114 | break; | ||
1113 | } | 1115 | } |
1114 | strcpy(name, XATTR_SECURITY_PREFIX); | 1116 | return err; |
1115 | strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); | 1117 | } |
1116 | |||
1117 | rc = __jfs_setxattr(tid, inode, name, value, len, 0); | ||
1118 | |||
1119 | kfree(name); | ||
1120 | kmalloc_failed: | ||
1121 | kfree(suffix); | ||
1122 | kfree(value); | ||
1123 | 1118 | ||
1124 | return rc; | 1119 | int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir, |
1120 | const struct qstr *qstr) | ||
1121 | { | ||
1122 | return security_inode_init_security(inode, dir, qstr, | ||
1123 | &jfs_initxattrs, &tid); | ||
1125 | } | 1124 | } |
1126 | #endif | 1125 | #endif |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index b7c99bfb3da6..6f29836ec0cb 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -316,14 +316,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, | |||
316 | struct hlist_node *pos; | 316 | struct hlist_node *pos; |
317 | struct nlm_host *host = NULL; | 317 | struct nlm_host *host = NULL; |
318 | struct nsm_handle *nsm = NULL; | 318 | struct nsm_handle *nsm = NULL; |
319 | struct sockaddr_in sin = { | 319 | struct sockaddr *src_sap = svc_daddr(rqstp); |
320 | .sin_family = AF_INET, | 320 | size_t src_len = rqstp->rq_daddrlen; |
321 | }; | ||
322 | struct sockaddr_in6 sin6 = { | ||
323 | .sin6_family = AF_INET6, | ||
324 | }; | ||
325 | struct sockaddr *src_sap; | ||
326 | size_t src_len = rqstp->rq_addrlen; | ||
327 | struct nlm_lookup_host_info ni = { | 321 | struct nlm_lookup_host_info ni = { |
328 | .server = 1, | 322 | .server = 1, |
329 | .sap = svc_addr(rqstp), | 323 | .sap = svc_addr(rqstp), |
@@ -340,21 +334,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, | |||
340 | 334 | ||
341 | mutex_lock(&nlm_host_mutex); | 335 | mutex_lock(&nlm_host_mutex); |
342 | 336 | ||
343 | switch (ni.sap->sa_family) { | ||
344 | case AF_INET: | ||
345 | sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr; | ||
346 | src_sap = (struct sockaddr *)&sin; | ||
347 | break; | ||
348 | case AF_INET6: | ||
349 | ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6); | ||
350 | src_sap = (struct sockaddr *)&sin6; | ||
351 | break; | ||
352 | default: | ||
353 | dprintk("lockd: %s failed; unrecognized address family\n", | ||
354 | __func__); | ||
355 | goto out; | ||
356 | } | ||
357 | |||
358 | if (time_after_eq(jiffies, next_gc)) | 337 | if (time_after_eq(jiffies, next_gc)) |
359 | nlm_gc_hosts(); | 338 | nlm_gc_hosts(); |
360 | 339 | ||
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index abfff9d7979d..c061b9aa7ddb 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -282,7 +282,7 @@ int lockd_up(void) | |||
282 | /* | 282 | /* |
283 | * Create the kernel thread and wait for it to start. | 283 | * Create the kernel thread and wait for it to start. |
284 | */ | 284 | */ |
285 | nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); | 285 | nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); |
286 | if (IS_ERR(nlmsvc_rqst)) { | 286 | if (IS_ERR(nlmsvc_rqst)) { |
287 | error = PTR_ERR(nlmsvc_rqst); | 287 | error = PTR_ERR(nlmsvc_rqst); |
288 | nlmsvc_rqst = NULL; | 288 | nlmsvc_rqst = NULL; |
diff --git a/fs/locks.c b/fs/locks.c index 703f545097de..3b0d05dcd7c1 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -60,7 +60,7 @@ | |||
60 | * | 60 | * |
61 | * Initial implementation of mandatory locks. SunOS turned out to be | 61 | * Initial implementation of mandatory locks. SunOS turned out to be |
62 | * a rotten model, so I implemented the "obvious" semantics. | 62 | * a rotten model, so I implemented the "obvious" semantics. |
63 | * See 'Documentation/mandatory.txt' for details. | 63 | * See 'Documentation/filesystems/mandatory-locking.txt' for details. |
64 | * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996. | 64 | * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996. |
65 | * | 65 | * |
66 | * Don't allow mandatory locks on mmap()'ed files. Added simple functions to | 66 | * Don't allow mandatory locks on mmap()'ed files. Added simple functions to |
@@ -133,6 +133,20 @@ | |||
133 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) | 133 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) |
134 | #define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) | 134 | #define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) |
135 | 135 | ||
136 | static bool lease_breaking(struct file_lock *fl) | ||
137 | { | ||
138 | return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); | ||
139 | } | ||
140 | |||
141 | static int target_leasetype(struct file_lock *fl) | ||
142 | { | ||
143 | if (fl->fl_flags & FL_UNLOCK_PENDING) | ||
144 | return F_UNLCK; | ||
145 | if (fl->fl_flags & FL_DOWNGRADE_PENDING) | ||
146 | return F_RDLCK; | ||
147 | return fl->fl_type; | ||
148 | } | ||
149 | |||
136 | int leases_enable = 1; | 150 | int leases_enable = 1; |
137 | int lease_break_time = 45; | 151 | int lease_break_time = 45; |
138 | 152 | ||
@@ -1119,6 +1133,17 @@ int locks_mandatory_area(int read_write, struct inode *inode, | |||
1119 | 1133 | ||
1120 | EXPORT_SYMBOL(locks_mandatory_area); | 1134 | EXPORT_SYMBOL(locks_mandatory_area); |
1121 | 1135 | ||
1136 | static void lease_clear_pending(struct file_lock *fl, int arg) | ||
1137 | { | ||
1138 | switch (arg) { | ||
1139 | case F_UNLCK: | ||
1140 | fl->fl_flags &= ~FL_UNLOCK_PENDING; | ||
1141 | /* fall through: */ | ||
1142 | case F_RDLCK: | ||
1143 | fl->fl_flags &= ~FL_DOWNGRADE_PENDING; | ||
1144 | } | ||
1145 | } | ||
1146 | |||
1122 | /* We already had a lease on this file; just change its type */ | 1147 | /* We already had a lease on this file; just change its type */ |
1123 | int lease_modify(struct file_lock **before, int arg) | 1148 | int lease_modify(struct file_lock **before, int arg) |
1124 | { | 1149 | { |
@@ -1127,6 +1152,7 @@ int lease_modify(struct file_lock **before, int arg) | |||
1127 | 1152 | ||
1128 | if (error) | 1153 | if (error) |
1129 | return error; | 1154 | return error; |
1155 | lease_clear_pending(fl, arg); | ||
1130 | locks_wake_up_blocks(fl); | 1156 | locks_wake_up_blocks(fl); |
1131 | if (arg == F_UNLCK) | 1157 | if (arg == F_UNLCK) |
1132 | locks_delete_lock(before); | 1158 | locks_delete_lock(before); |
@@ -1135,19 +1161,25 @@ int lease_modify(struct file_lock **before, int arg) | |||
1135 | 1161 | ||
1136 | EXPORT_SYMBOL(lease_modify); | 1162 | EXPORT_SYMBOL(lease_modify); |
1137 | 1163 | ||
1164 | static bool past_time(unsigned long then) | ||
1165 | { | ||
1166 | if (!then) | ||
1167 | /* 0 is a special value meaning "this never expires": */ | ||
1168 | return false; | ||
1169 | return time_after(jiffies, then); | ||
1170 | } | ||
1171 | |||
1138 | static void time_out_leases(struct inode *inode) | 1172 | static void time_out_leases(struct inode *inode) |
1139 | { | 1173 | { |
1140 | struct file_lock **before; | 1174 | struct file_lock **before; |
1141 | struct file_lock *fl; | 1175 | struct file_lock *fl; |
1142 | 1176 | ||
1143 | before = &inode->i_flock; | 1177 | before = &inode->i_flock; |
1144 | while ((fl = *before) && IS_LEASE(fl) && (fl->fl_type & F_INPROGRESS)) { | 1178 | while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { |
1145 | if ((fl->fl_break_time == 0) | 1179 | if (past_time(fl->fl_downgrade_time)) |
1146 | || time_before(jiffies, fl->fl_break_time)) { | 1180 | lease_modify(before, F_RDLCK); |
1147 | before = &fl->fl_next; | 1181 | if (past_time(fl->fl_break_time)) |
1148 | continue; | 1182 | lease_modify(before, F_UNLCK); |
1149 | } | ||
1150 | lease_modify(before, fl->fl_type & ~F_INPROGRESS); | ||
1151 | if (fl == *before) /* lease_modify may have freed fl */ | 1183 | if (fl == *before) /* lease_modify may have freed fl */ |
1152 | before = &fl->fl_next; | 1184 | before = &fl->fl_next; |
1153 | } | 1185 | } |
@@ -1165,7 +1197,7 @@ static void time_out_leases(struct inode *inode) | |||
1165 | */ | 1197 | */ |
1166 | int __break_lease(struct inode *inode, unsigned int mode) | 1198 | int __break_lease(struct inode *inode, unsigned int mode) |
1167 | { | 1199 | { |
1168 | int error = 0, future; | 1200 | int error = 0; |
1169 | struct file_lock *new_fl, *flock; | 1201 | struct file_lock *new_fl, *flock; |
1170 | struct file_lock *fl; | 1202 | struct file_lock *fl; |
1171 | unsigned long break_time; | 1203 | unsigned long break_time; |
@@ -1182,24 +1214,13 @@ int __break_lease(struct inode *inode, unsigned int mode) | |||
1182 | if ((flock == NULL) || !IS_LEASE(flock)) | 1214 | if ((flock == NULL) || !IS_LEASE(flock)) |
1183 | goto out; | 1215 | goto out; |
1184 | 1216 | ||
1217 | if (!locks_conflict(flock, new_fl)) | ||
1218 | goto out; | ||
1219 | |||
1185 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) | 1220 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) |
1186 | if (fl->fl_owner == current->files) | 1221 | if (fl->fl_owner == current->files) |
1187 | i_have_this_lease = 1; | 1222 | i_have_this_lease = 1; |
1188 | 1223 | ||
1189 | if (want_write) { | ||
1190 | /* If we want write access, we have to revoke any lease. */ | ||
1191 | future = F_UNLCK | F_INPROGRESS; | ||
1192 | } else if (flock->fl_type & F_INPROGRESS) { | ||
1193 | /* If the lease is already being broken, we just leave it */ | ||
1194 | future = flock->fl_type; | ||
1195 | } else if (flock->fl_type & F_WRLCK) { | ||
1196 | /* Downgrade the exclusive lease to a read-only lease. */ | ||
1197 | future = F_RDLCK | F_INPROGRESS; | ||
1198 | } else { | ||
1199 | /* the existing lease was read-only, so we can read too. */ | ||
1200 | goto out; | ||
1201 | } | ||
1202 | |||
1203 | if (IS_ERR(new_fl) && !i_have_this_lease | 1224 | if (IS_ERR(new_fl) && !i_have_this_lease |
1204 | && ((mode & O_NONBLOCK) == 0)) { | 1225 | && ((mode & O_NONBLOCK) == 0)) { |
1205 | error = PTR_ERR(new_fl); | 1226 | error = PTR_ERR(new_fl); |
@@ -1214,12 +1235,18 @@ int __break_lease(struct inode *inode, unsigned int mode) | |||
1214 | } | 1235 | } |
1215 | 1236 | ||
1216 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { | 1237 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { |
1217 | if (fl->fl_type != future) { | 1238 | if (want_write) { |
1218 | fl->fl_type = future; | 1239 | if (fl->fl_flags & FL_UNLOCK_PENDING) |
1240 | continue; | ||
1241 | fl->fl_flags |= FL_UNLOCK_PENDING; | ||
1219 | fl->fl_break_time = break_time; | 1242 | fl->fl_break_time = break_time; |
1220 | /* lease must have lmops break callback */ | 1243 | } else { |
1221 | fl->fl_lmops->lm_break(fl); | 1244 | if (lease_breaking(flock)) |
1245 | continue; | ||
1246 | fl->fl_flags |= FL_DOWNGRADE_PENDING; | ||
1247 | fl->fl_downgrade_time = break_time; | ||
1222 | } | 1248 | } |
1249 | fl->fl_lmops->lm_break(fl); | ||
1223 | } | 1250 | } |
1224 | 1251 | ||
1225 | if (i_have_this_lease || (mode & O_NONBLOCK)) { | 1252 | if (i_have_this_lease || (mode & O_NONBLOCK)) { |
@@ -1243,10 +1270,13 @@ restart: | |||
1243 | if (error >= 0) { | 1270 | if (error >= 0) { |
1244 | if (error == 0) | 1271 | if (error == 0) |
1245 | time_out_leases(inode); | 1272 | time_out_leases(inode); |
1246 | /* Wait for the next lease that has not been broken yet */ | 1273 | /* |
1274 | * Wait for the next conflicting lease that has not been | ||
1275 | * broken yet | ||
1276 | */ | ||
1247 | for (flock = inode->i_flock; flock && IS_LEASE(flock); | 1277 | for (flock = inode->i_flock; flock && IS_LEASE(flock); |
1248 | flock = flock->fl_next) { | 1278 | flock = flock->fl_next) { |
1249 | if (flock->fl_type & F_INPROGRESS) | 1279 | if (locks_conflict(new_fl, flock)) |
1250 | goto restart; | 1280 | goto restart; |
1251 | } | 1281 | } |
1252 | error = 0; | 1282 | error = 0; |
@@ -1314,7 +1344,7 @@ int fcntl_getlease(struct file *filp) | |||
1314 | for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); | 1344 | for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); |
1315 | fl = fl->fl_next) { | 1345 | fl = fl->fl_next) { |
1316 | if (fl->fl_file == filp) { | 1346 | if (fl->fl_file == filp) { |
1317 | type = fl->fl_type & ~F_INPROGRESS; | 1347 | type = target_leasetype(fl); |
1318 | break; | 1348 | break; |
1319 | } | 1349 | } |
1320 | } | 1350 | } |
@@ -1322,50 +1352,23 @@ int fcntl_getlease(struct file *filp) | |||
1322 | return type; | 1352 | return type; |
1323 | } | 1353 | } |
1324 | 1354 | ||
1325 | /** | 1355 | int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) |
1326 | * generic_setlease - sets a lease on an open file | ||
1327 | * @filp: file pointer | ||
1328 | * @arg: type of lease to obtain | ||
1329 | * @flp: input - file_lock to use, output - file_lock inserted | ||
1330 | * | ||
1331 | * The (input) flp->fl_lmops->lm_break function is required | ||
1332 | * by break_lease(). | ||
1333 | * | ||
1334 | * Called with file_lock_lock held. | ||
1335 | */ | ||
1336 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | ||
1337 | { | 1356 | { |
1338 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1357 | struct file_lock *fl, **before, **my_before = NULL, *lease; |
1339 | struct dentry *dentry = filp->f_path.dentry; | 1358 | struct dentry *dentry = filp->f_path.dentry; |
1340 | struct inode *inode = dentry->d_inode; | 1359 | struct inode *inode = dentry->d_inode; |
1341 | int error, rdlease_count = 0, wrlease_count = 0; | 1360 | int error; |
1342 | 1361 | ||
1343 | lease = *flp; | 1362 | lease = *flp; |
1344 | 1363 | ||
1345 | error = -EACCES; | 1364 | error = -EAGAIN; |
1346 | if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) | 1365 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
1347 | goto out; | ||
1348 | error = -EINVAL; | ||
1349 | if (!S_ISREG(inode->i_mode)) | ||
1350 | goto out; | 1366 | goto out; |
1351 | error = security_file_lock(filp, arg); | 1367 | if ((arg == F_WRLCK) |
1352 | if (error) | 1368 | && ((dentry->d_count > 1) |
1369 | || (atomic_read(&inode->i_count) > 1))) | ||
1353 | goto out; | 1370 | goto out; |
1354 | 1371 | ||
1355 | time_out_leases(inode); | ||
1356 | |||
1357 | BUG_ON(!(*flp)->fl_lmops->lm_break); | ||
1358 | |||
1359 | if (arg != F_UNLCK) { | ||
1360 | error = -EAGAIN; | ||
1361 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | ||
1362 | goto out; | ||
1363 | if ((arg == F_WRLCK) | ||
1364 | && ((dentry->d_count > 1) | ||
1365 | || (atomic_read(&inode->i_count) > 1))) | ||
1366 | goto out; | ||
1367 | } | ||
1368 | |||
1369 | /* | 1372 | /* |
1370 | * At this point, we know that if there is an exclusive | 1373 | * At this point, we know that if there is an exclusive |
1371 | * lease on this file, then we hold it on this filp | 1374 | * lease on this file, then we hold it on this filp |
@@ -1374,27 +1377,28 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1374 | * then the file is not open by anyone (including us) | 1377 | * then the file is not open by anyone (including us) |
1375 | * except for this filp. | 1378 | * except for this filp. |
1376 | */ | 1379 | */ |
1380 | error = -EAGAIN; | ||
1377 | for (before = &inode->i_flock; | 1381 | for (before = &inode->i_flock; |
1378 | ((fl = *before) != NULL) && IS_LEASE(fl); | 1382 | ((fl = *before) != NULL) && IS_LEASE(fl); |
1379 | before = &fl->fl_next) { | 1383 | before = &fl->fl_next) { |
1380 | if (fl->fl_file == filp) | 1384 | if (fl->fl_file == filp) { |
1381 | my_before = before; | 1385 | my_before = before; |
1382 | else if (fl->fl_type == (F_INPROGRESS | F_UNLCK)) | 1386 | continue; |
1383 | /* | 1387 | } |
1384 | * Someone is in the process of opening this | 1388 | /* |
1385 | * file for writing so we may not take an | 1389 | * No exclusive leases if someone else has a lease on |
1386 | * exclusive lease on it. | 1390 | * this file: |
1387 | */ | 1391 | */ |
1388 | wrlease_count++; | 1392 | if (arg == F_WRLCK) |
1389 | else | 1393 | goto out; |
1390 | rdlease_count++; | 1394 | /* |
1395 | * Modifying our existing lease is OK, but no getting a | ||
1396 | * new lease if someone else is opening for write: | ||
1397 | */ | ||
1398 | if (fl->fl_flags & FL_UNLOCK_PENDING) | ||
1399 | goto out; | ||
1391 | } | 1400 | } |
1392 | 1401 | ||
1393 | error = -EAGAIN; | ||
1394 | if ((arg == F_RDLCK && (wrlease_count > 0)) || | ||
1395 | (arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0))) | ||
1396 | goto out; | ||
1397 | |||
1398 | if (my_before != NULL) { | 1402 | if (my_before != NULL) { |
1399 | error = lease->fl_lmops->lm_change(my_before, arg); | 1403 | error = lease->fl_lmops->lm_change(my_before, arg); |
1400 | if (!error) | 1404 | if (!error) |
@@ -1402,9 +1406,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1402 | goto out; | 1406 | goto out; |
1403 | } | 1407 | } |
1404 | 1408 | ||
1405 | if (arg == F_UNLCK) | ||
1406 | goto out; | ||
1407 | |||
1408 | error = -EINVAL; | 1409 | error = -EINVAL; |
1409 | if (!leases_enable) | 1410 | if (!leases_enable) |
1410 | goto out; | 1411 | goto out; |
@@ -1415,6 +1416,62 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1415 | out: | 1416 | out: |
1416 | return error; | 1417 | return error; |
1417 | } | 1418 | } |
1419 | |||
1420 | int generic_delete_lease(struct file *filp, struct file_lock **flp) | ||
1421 | { | ||
1422 | struct file_lock *fl, **before; | ||
1423 | struct dentry *dentry = filp->f_path.dentry; | ||
1424 | struct inode *inode = dentry->d_inode; | ||
1425 | |||
1426 | for (before = &inode->i_flock; | ||
1427 | ((fl = *before) != NULL) && IS_LEASE(fl); | ||
1428 | before = &fl->fl_next) { | ||
1429 | if (fl->fl_file != filp) | ||
1430 | continue; | ||
1431 | return (*flp)->fl_lmops->lm_change(before, F_UNLCK); | ||
1432 | } | ||
1433 | return -EAGAIN; | ||
1434 | } | ||
1435 | |||
1436 | /** | ||
1437 | * generic_setlease - sets a lease on an open file | ||
1438 | * @filp: file pointer | ||
1439 | * @arg: type of lease to obtain | ||
1440 | * @flp: input - file_lock to use, output - file_lock inserted | ||
1441 | * | ||
1442 | * The (input) flp->fl_lmops->lm_break function is required | ||
1443 | * by break_lease(). | ||
1444 | * | ||
1445 | * Called with file_lock_lock held. | ||
1446 | */ | ||
1447 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | ||
1448 | { | ||
1449 | struct dentry *dentry = filp->f_path.dentry; | ||
1450 | struct inode *inode = dentry->d_inode; | ||
1451 | int error; | ||
1452 | |||
1453 | if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) | ||
1454 | return -EACCES; | ||
1455 | if (!S_ISREG(inode->i_mode)) | ||
1456 | return -EINVAL; | ||
1457 | error = security_file_lock(filp, arg); | ||
1458 | if (error) | ||
1459 | return error; | ||
1460 | |||
1461 | time_out_leases(inode); | ||
1462 | |||
1463 | BUG_ON(!(*flp)->fl_lmops->lm_break); | ||
1464 | |||
1465 | switch (arg) { | ||
1466 | case F_UNLCK: | ||
1467 | return generic_delete_lease(filp, flp); | ||
1468 | case F_RDLCK: | ||
1469 | case F_WRLCK: | ||
1470 | return generic_add_lease(filp, arg, flp); | ||
1471 | default: | ||
1472 | BUG(); | ||
1473 | } | ||
1474 | } | ||
1418 | EXPORT_SYMBOL(generic_setlease); | 1475 | EXPORT_SYMBOL(generic_setlease); |
1419 | 1476 | ||
1420 | static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | 1477 | static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
@@ -2126,7 +2183,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
2126 | } | 2183 | } |
2127 | } else if (IS_LEASE(fl)) { | 2184 | } else if (IS_LEASE(fl)) { |
2128 | seq_printf(f, "LEASE "); | 2185 | seq_printf(f, "LEASE "); |
2129 | if (fl->fl_type & F_INPROGRESS) | 2186 | if (lease_breaking(fl)) |
2130 | seq_printf(f, "BREAKING "); | 2187 | seq_printf(f, "BREAKING "); |
2131 | else if (fl->fl_file) | 2188 | else if (fl->fl_file) |
2132 | seq_printf(f, "ACTIVE "); | 2189 | seq_printf(f, "ACTIVE "); |
@@ -2142,7 +2199,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
2142 | : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); | 2199 | : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); |
2143 | } else { | 2200 | } else { |
2144 | seq_printf(f, "%s ", | 2201 | seq_printf(f, "%s ", |
2145 | (fl->fl_type & F_INPROGRESS) | 2202 | (lease_breaking(fl)) |
2146 | ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " | 2203 | ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " |
2147 | : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); | 2204 | : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); |
2148 | } | 2205 | } |
diff --git a/fs/namei.c b/fs/namei.c index 2826db35dc25..7657be4352bf 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -221,14 +221,12 @@ static int check_acl(struct inode *inode, int mask) | |||
221 | } | 221 | } |
222 | 222 | ||
223 | /* | 223 | /* |
224 | * This does basic POSIX ACL permission checking | 224 | * This does the basic permission checking |
225 | */ | 225 | */ |
226 | static int acl_permission_check(struct inode *inode, int mask) | 226 | static int acl_permission_check(struct inode *inode, int mask) |
227 | { | 227 | { |
228 | unsigned int mode = inode->i_mode; | 228 | unsigned int mode = inode->i_mode; |
229 | 229 | ||
230 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK; | ||
231 | |||
232 | if (current_user_ns() != inode_userns(inode)) | 230 | if (current_user_ns() != inode_userns(inode)) |
233 | goto other_perms; | 231 | goto other_perms; |
234 | 232 | ||
@@ -257,7 +255,7 @@ other_perms: | |||
257 | /** | 255 | /** |
258 | * generic_permission - check for access rights on a Posix-like filesystem | 256 | * generic_permission - check for access rights on a Posix-like filesystem |
259 | * @inode: inode to check access rights for | 257 | * @inode: inode to check access rights for |
260 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) | 258 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) |
261 | * | 259 | * |
262 | * Used to check for read/write/execute permissions on a file. | 260 | * Used to check for read/write/execute permissions on a file. |
263 | * We use "fsuid" for this, letting us set arbitrary permissions | 261 | * We use "fsuid" for this, letting us set arbitrary permissions |
@@ -273,7 +271,7 @@ int generic_permission(struct inode *inode, int mask) | |||
273 | int ret; | 271 | int ret; |
274 | 272 | ||
275 | /* | 273 | /* |
276 | * Do the basic POSIX ACL permission checks. | 274 | * Do the basic permission checks. |
277 | */ | 275 | */ |
278 | ret = acl_permission_check(inode, mask); | 276 | ret = acl_permission_check(inode, mask); |
279 | if (ret != -EACCES) | 277 | if (ret != -EACCES) |
@@ -331,12 +329,14 @@ static inline int do_inode_permission(struct inode *inode, int mask) | |||
331 | /** | 329 | /** |
332 | * inode_permission - check for access rights to a given inode | 330 | * inode_permission - check for access rights to a given inode |
333 | * @inode: inode to check permission on | 331 | * @inode: inode to check permission on |
334 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) | 332 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) |
335 | * | 333 | * |
336 | * Used to check for read/write/execute permissions on an inode. | 334 | * Used to check for read/write/execute permissions on an inode. |
337 | * We use "fsuid" for this, letting us set arbitrary permissions | 335 | * We use "fsuid" for this, letting us set arbitrary permissions |
338 | * for filesystem access without changing the "normal" uids which | 336 | * for filesystem access without changing the "normal" uids which |
339 | * are used for other things. | 337 | * are used for other things. |
338 | * | ||
339 | * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. | ||
340 | */ | 340 | */ |
341 | int inode_permission(struct inode *inode, int mask) | 341 | int inode_permission(struct inode *inode, int mask) |
342 | { | 342 | { |
@@ -721,31 +721,22 @@ static int follow_automount(struct path *path, unsigned flags, | |||
721 | if (!path->dentry->d_op || !path->dentry->d_op->d_automount) | 721 | if (!path->dentry->d_op || !path->dentry->d_op->d_automount) |
722 | return -EREMOTE; | 722 | return -EREMOTE; |
723 | 723 | ||
724 | /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT | 724 | /* We don't want to mount if someone's just doing a stat - |
725 | * and this is the terminal part of the path. | 725 | * unless they're stat'ing a directory and appended a '/' to |
726 | * the name. | ||
727 | * | ||
728 | * We do, however, want to mount if someone wants to open or | ||
729 | * create a file of any type under the mountpoint, wants to | ||
730 | * traverse through the mountpoint or wants to open the | ||
731 | * mounted directory. Also, autofs may mark negative dentries | ||
732 | * as being automount points. These will need the attentions | ||
733 | * of the daemon to instantiate them before they can be used. | ||
726 | */ | 734 | */ |
727 | if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) | 735 | if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | |
728 | return -EISDIR; /* we actually want to stop here */ | 736 | LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && |
737 | path->dentry->d_inode) | ||
738 | return -EISDIR; | ||
729 | 739 | ||
730 | /* | ||
731 | * We don't want to mount if someone's just doing a stat and they've | ||
732 | * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and | ||
733 | * appended a '/' to the name. | ||
734 | */ | ||
735 | if (!(flags & LOOKUP_FOLLOW)) { | ||
736 | /* We do, however, want to mount if someone wants to open or | ||
737 | * create a file of any type under the mountpoint, wants to | ||
738 | * traverse through the mountpoint or wants to open the mounted | ||
739 | * directory. | ||
740 | * Also, autofs may mark negative dentries as being automount | ||
741 | * points. These will need the attentions of the daemon to | ||
742 | * instantiate them before they can be used. | ||
743 | */ | ||
744 | if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | | ||
745 | LOOKUP_OPEN | LOOKUP_CREATE)) && | ||
746 | path->dentry->d_inode) | ||
747 | return -EISDIR; | ||
748 | } | ||
749 | current->total_link_count++; | 740 | current->total_link_count++; |
750 | if (current->total_link_count >= 40) | 741 | if (current->total_link_count >= 40) |
751 | return -ELOOP; | 742 | return -ELOOP; |
@@ -2044,10 +2035,7 @@ static int may_open(struct path *path, int acc_mode, int flag) | |||
2044 | if (flag & O_NOATIME && !inode_owner_or_capable(inode)) | 2035 | if (flag & O_NOATIME && !inode_owner_or_capable(inode)) |
2045 | return -EPERM; | 2036 | return -EPERM; |
2046 | 2037 | ||
2047 | /* | 2038 | return 0; |
2048 | * Ensure there are no outstanding leases on the file. | ||
2049 | */ | ||
2050 | return break_lease(inode, flag); | ||
2051 | } | 2039 | } |
2052 | 2040 | ||
2053 | static int handle_truncate(struct file *filp) | 2041 | static int handle_truncate(struct file *filp) |
@@ -2619,6 +2607,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2619 | if (!dir->i_op->rmdir) | 2607 | if (!dir->i_op->rmdir) |
2620 | return -EPERM; | 2608 | return -EPERM; |
2621 | 2609 | ||
2610 | dget(dentry); | ||
2622 | mutex_lock(&dentry->d_inode->i_mutex); | 2611 | mutex_lock(&dentry->d_inode->i_mutex); |
2623 | 2612 | ||
2624 | error = -EBUSY; | 2613 | error = -EBUSY; |
@@ -2639,6 +2628,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2639 | 2628 | ||
2640 | out: | 2629 | out: |
2641 | mutex_unlock(&dentry->d_inode->i_mutex); | 2630 | mutex_unlock(&dentry->d_inode->i_mutex); |
2631 | dput(dentry); | ||
2642 | if (!error) | 2632 | if (!error) |
2643 | d_delete(dentry); | 2633 | d_delete(dentry); |
2644 | return error; | 2634 | return error; |
@@ -3028,6 +3018,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
3028 | if (error) | 3018 | if (error) |
3029 | return error; | 3019 | return error; |
3030 | 3020 | ||
3021 | dget(new_dentry); | ||
3031 | if (target) | 3022 | if (target) |
3032 | mutex_lock(&target->i_mutex); | 3023 | mutex_lock(&target->i_mutex); |
3033 | 3024 | ||
@@ -3048,6 +3039,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
3048 | out: | 3039 | out: |
3049 | if (target) | 3040 | if (target) |
3050 | mutex_unlock(&target->i_mutex); | 3041 | mutex_unlock(&target->i_mutex); |
3042 | dput(new_dentry); | ||
3051 | if (!error) | 3043 | if (!error) |
3052 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | 3044 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
3053 | d_move(old_dentry,new_dentry); | 3045 | d_move(old_dentry,new_dentry); |
diff --git a/fs/namespace.c b/fs/namespace.c index 22bfe8273c68..e5e1c7d1839b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1109,6 +1109,7 @@ static int show_vfsstat(struct seq_file *m, void *v) | |||
1109 | 1109 | ||
1110 | /* device */ | 1110 | /* device */ |
1111 | if (mnt->mnt_sb->s_op->show_devname) { | 1111 | if (mnt->mnt_sb->s_op->show_devname) { |
1112 | seq_puts(m, "device "); | ||
1112 | err = mnt->mnt_sb->s_op->show_devname(m, mnt); | 1113 | err = mnt->mnt_sb->s_op->show_devname(m, mnt); |
1113 | } else { | 1114 | } else { |
1114 | if (mnt->mnt_devname) { | 1115 | if (mnt->mnt_devname) { |
@@ -1757,7 +1758,7 @@ static int do_loopback(struct path *path, char *old_name, | |||
1757 | return err; | 1758 | return err; |
1758 | if (!old_name || !*old_name) | 1759 | if (!old_name || !*old_name) |
1759 | return -EINVAL; | 1760 | return -EINVAL; |
1760 | err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); | 1761 | err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path); |
1761 | if (err) | 1762 | if (err) |
1762 | return err; | 1763 | return err; |
1763 | 1764 | ||
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index be020771c6b4..dbcd82126aed 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -79,12 +79,9 @@ config NFS_V4_1 | |||
79 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL | 79 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL |
80 | select SUNRPC_BACKCHANNEL | 80 | select SUNRPC_BACKCHANNEL |
81 | select PNFS_FILE_LAYOUT | 81 | select PNFS_FILE_LAYOUT |
82 | select PNFS_BLOCK | ||
83 | select MD | ||
84 | select BLK_DEV_DM | ||
85 | help | 82 | help |
86 | This option enables support for minor version 1 of the NFSv4 protocol | 83 | This option enables support for minor version 1 of the NFSv4 protocol |
87 | (RFC 5661 and RFC 5663) in the kernel's NFS client. | 84 | (RFC 5661) in the kernel's NFS client. |
88 | 85 | ||
89 | If unsure, say N. | 86 | If unsure, say N. |
90 | 87 | ||
@@ -93,16 +90,13 @@ config PNFS_FILE_LAYOUT | |||
93 | 90 | ||
94 | config PNFS_BLOCK | 91 | config PNFS_BLOCK |
95 | tristate | 92 | tristate |
93 | depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM | ||
94 | default m | ||
96 | 95 | ||
97 | config PNFS_OBJLAYOUT | 96 | config PNFS_OBJLAYOUT |
98 | tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)" | 97 | tristate |
99 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD | 98 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD |
100 | help | 99 | default m |
101 | Say M here if you want your pNFS client to support the Objects Layout Driver. | ||
102 | Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and | ||
103 | upper level driver (SCSI_OSD_ULD). | ||
104 | |||
105 | If unsure, say N. | ||
106 | 100 | ||
107 | config ROOT_NFS | 101 | config ROOT_NFS |
108 | bool "Root file system on NFS" | 102 | bool "Root file system on NFS" |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index e56564d2ef95..281ae95932c9 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/namei.h> | 36 | #include <linux/namei.h> |
37 | #include <linux/bio.h> /* struct bio */ | 37 | #include <linux/bio.h> /* struct bio */ |
38 | #include <linux/buffer_head.h> /* various write calls */ | 38 | #include <linux/buffer_head.h> /* various write calls */ |
39 | #include <linux/prefetch.h> | ||
39 | 40 | ||
40 | #include "blocklayout.h" | 41 | #include "blocklayout.h" |
41 | 42 | ||
@@ -175,17 +176,6 @@ retry: | |||
175 | return bio; | 176 | return bio; |
176 | } | 177 | } |
177 | 178 | ||
178 | static void bl_set_lo_fail(struct pnfs_layout_segment *lseg) | ||
179 | { | ||
180 | if (lseg->pls_range.iomode == IOMODE_RW) { | ||
181 | dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); | ||
182 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
183 | } else { | ||
184 | dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); | ||
185 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
186 | } | ||
187 | } | ||
188 | |||
189 | /* This is basically copied from mpage_end_io_read */ | 179 | /* This is basically copied from mpage_end_io_read */ |
190 | static void bl_end_io_read(struct bio *bio, int err) | 180 | static void bl_end_io_read(struct bio *bio, int err) |
191 | { | 181 | { |
@@ -205,7 +195,7 @@ static void bl_end_io_read(struct bio *bio, int err) | |||
205 | if (!uptodate) { | 195 | if (!uptodate) { |
206 | if (!rdata->pnfs_error) | 196 | if (!rdata->pnfs_error) |
207 | rdata->pnfs_error = -EIO; | 197 | rdata->pnfs_error = -EIO; |
208 | bl_set_lo_fail(rdata->lseg); | 198 | pnfs_set_lo_fail(rdata->lseg); |
209 | } | 199 | } |
210 | bio_put(bio); | 200 | bio_put(bio); |
211 | put_parallel(par); | 201 | put_parallel(par); |
@@ -302,6 +292,7 @@ bl_read_pagelist(struct nfs_read_data *rdata) | |||
302 | bl_end_io_read, par); | 292 | bl_end_io_read, par); |
303 | if (IS_ERR(bio)) { | 293 | if (IS_ERR(bio)) { |
304 | rdata->pnfs_error = PTR_ERR(bio); | 294 | rdata->pnfs_error = PTR_ERR(bio); |
295 | bio = NULL; | ||
305 | goto out; | 296 | goto out; |
306 | } | 297 | } |
307 | } | 298 | } |
@@ -369,7 +360,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err) | |||
369 | if (!uptodate) { | 360 | if (!uptodate) { |
370 | if (!wdata->pnfs_error) | 361 | if (!wdata->pnfs_error) |
371 | wdata->pnfs_error = -EIO; | 362 | wdata->pnfs_error = -EIO; |
372 | bl_set_lo_fail(wdata->lseg); | 363 | pnfs_set_lo_fail(wdata->lseg); |
373 | } | 364 | } |
374 | bio_put(bio); | 365 | bio_put(bio); |
375 | put_parallel(par); | 366 | put_parallel(par); |
@@ -385,7 +376,7 @@ static void bl_end_io_write(struct bio *bio, int err) | |||
385 | if (!uptodate) { | 376 | if (!uptodate) { |
386 | if (!wdata->pnfs_error) | 377 | if (!wdata->pnfs_error) |
387 | wdata->pnfs_error = -EIO; | 378 | wdata->pnfs_error = -EIO; |
388 | bl_set_lo_fail(wdata->lseg); | 379 | pnfs_set_lo_fail(wdata->lseg); |
389 | } | 380 | } |
390 | bio_put(bio); | 381 | bio_put(bio); |
391 | put_parallel(par); | 382 | put_parallel(par); |
@@ -542,6 +533,11 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) | |||
542 | fill_invalid_ext: | 533 | fill_invalid_ext: |
543 | dprintk("%s need to zero %d pages\n", __func__, npg_zero); | 534 | dprintk("%s need to zero %d pages\n", __func__, npg_zero); |
544 | for (;npg_zero > 0; npg_zero--) { | 535 | for (;npg_zero > 0; npg_zero--) { |
536 | if (bl_is_sector_init(be->be_inval, isect)) { | ||
537 | dprintk("isect %llu already init\n", | ||
538 | (unsigned long long)isect); | ||
539 | goto next_page; | ||
540 | } | ||
545 | /* page ref released in bl_end_io_write_zero */ | 541 | /* page ref released in bl_end_io_write_zero */ |
546 | index = isect >> PAGE_CACHE_SECTOR_SHIFT; | 542 | index = isect >> PAGE_CACHE_SECTOR_SHIFT; |
547 | dprintk("%s zero %dth page: index %lu isect %llu\n", | 543 | dprintk("%s zero %dth page: index %lu isect %llu\n", |
@@ -561,8 +557,7 @@ fill_invalid_ext: | |||
561 | * PageUptodate: It was read before | 557 | * PageUptodate: It was read before |
562 | * sector_initialized: already written out | 558 | * sector_initialized: already written out |
563 | */ | 559 | */ |
564 | if (PageDirty(page) || PageWriteback(page) || | 560 | if (PageDirty(page) || PageWriteback(page)) { |
565 | bl_is_sector_init(be->be_inval, isect)) { | ||
566 | print_page(page); | 561 | print_page(page); |
567 | unlock_page(page); | 562 | unlock_page(page); |
568 | page_cache_release(page); | 563 | page_cache_release(page); |
@@ -591,6 +586,7 @@ fill_invalid_ext: | |||
591 | bl_end_io_write_zero, par); | 586 | bl_end_io_write_zero, par); |
592 | if (IS_ERR(bio)) { | 587 | if (IS_ERR(bio)) { |
593 | wdata->pnfs_error = PTR_ERR(bio); | 588 | wdata->pnfs_error = PTR_ERR(bio); |
589 | bio = NULL; | ||
594 | goto out; | 590 | goto out; |
595 | } | 591 | } |
596 | /* FIXME: This should be done in bi_end_io */ | 592 | /* FIXME: This should be done in bi_end_io */ |
@@ -639,6 +635,7 @@ next_page: | |||
639 | bl_end_io_write, par); | 635 | bl_end_io_write, par); |
640 | if (IS_ERR(bio)) { | 636 | if (IS_ERR(bio)) { |
641 | wdata->pnfs_error = PTR_ERR(bio); | 637 | wdata->pnfs_error = PTR_ERR(bio); |
638 | bio = NULL; | ||
642 | goto out; | 639 | goto out; |
643 | } | 640 | } |
644 | isect += PAGE_CACHE_SECTORS; | 641 | isect += PAGE_CACHE_SECTORS; |
@@ -804,7 +801,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | |||
804 | struct nfs4_deviceid *d_id) | 801 | struct nfs4_deviceid *d_id) |
805 | { | 802 | { |
806 | struct pnfs_device *dev; | 803 | struct pnfs_device *dev; |
807 | struct pnfs_block_dev *rv = NULL; | 804 | struct pnfs_block_dev *rv; |
808 | u32 max_resp_sz; | 805 | u32 max_resp_sz; |
809 | int max_pages; | 806 | int max_pages; |
810 | struct page **pages = NULL; | 807 | struct page **pages = NULL; |
@@ -822,18 +819,20 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | |||
822 | dev = kmalloc(sizeof(*dev), GFP_NOFS); | 819 | dev = kmalloc(sizeof(*dev), GFP_NOFS); |
823 | if (!dev) { | 820 | if (!dev) { |
824 | dprintk("%s kmalloc failed\n", __func__); | 821 | dprintk("%s kmalloc failed\n", __func__); |
825 | return NULL; | 822 | return ERR_PTR(-ENOMEM); |
826 | } | 823 | } |
827 | 824 | ||
828 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); | 825 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); |
829 | if (pages == NULL) { | 826 | if (pages == NULL) { |
830 | kfree(dev); | 827 | kfree(dev); |
831 | return NULL; | 828 | return ERR_PTR(-ENOMEM); |
832 | } | 829 | } |
833 | for (i = 0; i < max_pages; i++) { | 830 | for (i = 0; i < max_pages; i++) { |
834 | pages[i] = alloc_page(GFP_NOFS); | 831 | pages[i] = alloc_page(GFP_NOFS); |
835 | if (!pages[i]) | 832 | if (!pages[i]) { |
833 | rv = ERR_PTR(-ENOMEM); | ||
836 | goto out_free; | 834 | goto out_free; |
835 | } | ||
837 | } | 836 | } |
838 | 837 | ||
839 | memcpy(&dev->dev_id, d_id, sizeof(*d_id)); | 838 | memcpy(&dev->dev_id, d_id, sizeof(*d_id)); |
@@ -846,8 +845,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | |||
846 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); | 845 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); |
847 | rc = nfs4_proc_getdeviceinfo(server, dev); | 846 | rc = nfs4_proc_getdeviceinfo(server, dev); |
848 | dprintk("%s getdevice info returns %d\n", __func__, rc); | 847 | dprintk("%s getdevice info returns %d\n", __func__, rc); |
849 | if (rc) | 848 | if (rc) { |
849 | rv = ERR_PTR(rc); | ||
850 | goto out_free; | 850 | goto out_free; |
851 | } | ||
851 | 852 | ||
852 | rv = nfs4_blk_decode_device(server, dev); | 853 | rv = nfs4_blk_decode_device(server, dev); |
853 | out_free: | 854 | out_free: |
@@ -865,7 +866,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | |||
865 | struct pnfs_devicelist *dlist = NULL; | 866 | struct pnfs_devicelist *dlist = NULL; |
866 | struct pnfs_block_dev *bdev; | 867 | struct pnfs_block_dev *bdev; |
867 | LIST_HEAD(block_disklist); | 868 | LIST_HEAD(block_disklist); |
868 | int status = 0, i; | 869 | int status, i; |
869 | 870 | ||
870 | dprintk("%s enter\n", __func__); | 871 | dprintk("%s enter\n", __func__); |
871 | 872 | ||
@@ -897,8 +898,8 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | |||
897 | for (i = 0; i < dlist->num_devs; i++) { | 898 | for (i = 0; i < dlist->num_devs; i++) { |
898 | bdev = nfs4_blk_get_deviceinfo(server, fh, | 899 | bdev = nfs4_blk_get_deviceinfo(server, fh, |
899 | &dlist->dev_id[i]); | 900 | &dlist->dev_id[i]); |
900 | if (!bdev) { | 901 | if (IS_ERR(bdev)) { |
901 | status = -ENODEV; | 902 | status = PTR_ERR(bdev); |
902 | goto out_error; | 903 | goto out_error; |
903 | } | 904 | } |
904 | spin_lock(&b_mt_id->bm_lock); | 905 | spin_lock(&b_mt_id->bm_lock); |
@@ -959,7 +960,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = { | |||
959 | }; | 960 | }; |
960 | 961 | ||
961 | static const struct rpc_pipe_ops bl_upcall_ops = { | 962 | static const struct rpc_pipe_ops bl_upcall_ops = { |
962 | .upcall = bl_pipe_upcall, | 963 | .upcall = rpc_pipe_generic_upcall, |
963 | .downcall = bl_pipe_downcall, | 964 | .downcall = bl_pipe_downcall, |
964 | .destroy_msg = bl_pipe_destroy_msg, | 965 | .destroy_msg = bl_pipe_destroy_msg, |
965 | }; | 966 | }; |
@@ -988,17 +989,20 @@ static int __init nfs4blocklayout_init(void) | |||
988 | mnt, | 989 | mnt, |
989 | NFS_PIPE_DIRNAME, 0, &path); | 990 | NFS_PIPE_DIRNAME, 0, &path); |
990 | if (ret) | 991 | if (ret) |
991 | goto out_remove; | 992 | goto out_putrpc; |
992 | 993 | ||
993 | bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, | 994 | bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, |
994 | &bl_upcall_ops, 0); | 995 | &bl_upcall_ops, 0); |
996 | path_put(&path); | ||
995 | if (IS_ERR(bl_device_pipe)) { | 997 | if (IS_ERR(bl_device_pipe)) { |
996 | ret = PTR_ERR(bl_device_pipe); | 998 | ret = PTR_ERR(bl_device_pipe); |
997 | goto out_remove; | 999 | goto out_putrpc; |
998 | } | 1000 | } |
999 | out: | 1001 | out: |
1000 | return ret; | 1002 | return ret; |
1001 | 1003 | ||
1004 | out_putrpc: | ||
1005 | rpc_put_mount(); | ||
1002 | out_remove: | 1006 | out_remove: |
1003 | pnfs_unregister_layoutdriver(&blocklayout_type); | 1007 | pnfs_unregister_layoutdriver(&blocklayout_type); |
1004 | return ret; | 1008 | return ret; |
@@ -1011,6 +1015,7 @@ static void __exit nfs4blocklayout_exit(void) | |||
1011 | 1015 | ||
1012 | pnfs_unregister_layoutdriver(&blocklayout_type); | 1016 | pnfs_unregister_layoutdriver(&blocklayout_type); |
1013 | rpc_unlink(bl_device_pipe); | 1017 | rpc_unlink(bl_device_pipe); |
1018 | rpc_put_mount(); | ||
1014 | } | 1019 | } |
1015 | 1020 | ||
1016 | MODULE_ALIAS("nfs-layouttype4-3"); | 1021 | MODULE_ALIAS("nfs-layouttype4-3"); |
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index f27d827960a3..42acf7ef5992 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -150,7 +150,7 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg) | |||
150 | } | 150 | } |
151 | 151 | ||
152 | struct bl_dev_msg { | 152 | struct bl_dev_msg { |
153 | int status; | 153 | int32_t status; |
154 | uint32_t major, minor; | 154 | uint32_t major, minor; |
155 | }; | 155 | }; |
156 | 156 | ||
@@ -169,8 +169,6 @@ extern wait_queue_head_t bl_wq; | |||
169 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ | 169 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ |
170 | 170 | ||
171 | /* blocklayoutdev.c */ | 171 | /* blocklayoutdev.c */ |
172 | ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *, | ||
173 | char __user *, size_t); | ||
174 | ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); | 172 | ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); |
175 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *); | 173 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *); |
176 | struct block_device *nfs4_blkdev_get(dev_t dev); | 174 | struct block_device *nfs4_blkdev_get(dev_t dev); |
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index a83b393fb01c..d08ba9107fde 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c | |||
@@ -79,28 +79,6 @@ int nfs4_blkdev_put(struct block_device *bdev) | |||
79 | return blkdev_put(bdev, FMODE_READ); | 79 | return blkdev_put(bdev, FMODE_READ); |
80 | } | 80 | } |
81 | 81 | ||
82 | /* | ||
83 | * Shouldn't there be a rpc_generic_upcall() to do this for us? | ||
84 | */ | ||
85 | ssize_t bl_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, | ||
86 | char __user *dst, size_t buflen) | ||
87 | { | ||
88 | char *data = (char *)msg->data + msg->copied; | ||
89 | size_t mlen = min(msg->len - msg->copied, buflen); | ||
90 | unsigned long left; | ||
91 | |||
92 | left = copy_to_user(dst, data, mlen); | ||
93 | if (left == mlen) { | ||
94 | msg->errno = -EFAULT; | ||
95 | return -EFAULT; | ||
96 | } | ||
97 | |||
98 | mlen -= left; | ||
99 | msg->copied += mlen; | ||
100 | msg->errno = 0; | ||
101 | return mlen; | ||
102 | } | ||
103 | |||
104 | static struct bl_dev_msg bl_mount_reply; | 82 | static struct bl_dev_msg bl_mount_reply; |
105 | 83 | ||
106 | ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | 84 | ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, |
@@ -131,7 +109,7 @@ struct pnfs_block_dev * | |||
131 | nfs4_blk_decode_device(struct nfs_server *server, | 109 | nfs4_blk_decode_device(struct nfs_server *server, |
132 | struct pnfs_device *dev) | 110 | struct pnfs_device *dev) |
133 | { | 111 | { |
134 | struct pnfs_block_dev *rv = NULL; | 112 | struct pnfs_block_dev *rv; |
135 | struct block_device *bd = NULL; | 113 | struct block_device *bd = NULL; |
136 | struct rpc_pipe_msg msg; | 114 | struct rpc_pipe_msg msg; |
137 | struct bl_msg_hdr bl_msg = { | 115 | struct bl_msg_hdr bl_msg = { |
@@ -141,7 +119,7 @@ nfs4_blk_decode_device(struct nfs_server *server, | |||
141 | uint8_t *dataptr; | 119 | uint8_t *dataptr; |
142 | DECLARE_WAITQUEUE(wq, current); | 120 | DECLARE_WAITQUEUE(wq, current); |
143 | struct bl_dev_msg *reply = &bl_mount_reply; | 121 | struct bl_dev_msg *reply = &bl_mount_reply; |
144 | int offset, len, i; | 122 | int offset, len, i, rc; |
145 | 123 | ||
146 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | 124 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); |
147 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, | 125 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, |
@@ -168,8 +146,10 @@ nfs4_blk_decode_device(struct nfs_server *server, | |||
168 | 146 | ||
169 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | 147 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); |
170 | add_wait_queue(&bl_wq, &wq); | 148 | add_wait_queue(&bl_wq, &wq); |
171 | if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { | 149 | rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg); |
150 | if (rc < 0) { | ||
172 | remove_wait_queue(&bl_wq, &wq); | 151 | remove_wait_queue(&bl_wq, &wq); |
152 | rv = ERR_PTR(rc); | ||
173 | goto out; | 153 | goto out; |
174 | } | 154 | } |
175 | 155 | ||
@@ -187,8 +167,9 @@ nfs4_blk_decode_device(struct nfs_server *server, | |||
187 | 167 | ||
188 | bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); | 168 | bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); |
189 | if (IS_ERR(bd)) { | 169 | if (IS_ERR(bd)) { |
190 | dprintk("%s failed to open device : %ld\n", | 170 | rc = PTR_ERR(bd); |
191 | __func__, PTR_ERR(bd)); | 171 | dprintk("%s failed to open device : %d\n", __func__, rc); |
172 | rv = ERR_PTR(rc); | ||
192 | goto out; | 173 | goto out; |
193 | } | 174 | } |
194 | 175 | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index e3d294269058..516f3375e067 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -125,7 +125,7 @@ nfs4_callback_up(struct svc_serv *serv) | |||
125 | else | 125 | else |
126 | goto out_err; | 126 | goto out_err; |
127 | 127 | ||
128 | return svc_prepare_thread(serv, &serv->sv_pools[0]); | 128 | return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); |
129 | 129 | ||
130 | out_err: | 130 | out_err: |
131 | if (ret == 0) | 131 | if (ret == 0) |
@@ -199,7 +199,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) | |||
199 | INIT_LIST_HEAD(&serv->sv_cb_list); | 199 | INIT_LIST_HEAD(&serv->sv_cb_list); |
200 | spin_lock_init(&serv->sv_cb_lock); | 200 | spin_lock_init(&serv->sv_cb_lock); |
201 | init_waitqueue_head(&serv->sv_cb_waitq); | 201 | init_waitqueue_head(&serv->sv_cb_waitq); |
202 | rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); | 202 | rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); |
203 | if (IS_ERR(rqstp)) { | 203 | if (IS_ERR(rqstp)) { |
204 | svc_xprt_put(serv->sv_bc_xprt); | 204 | svc_xprt_put(serv->sv_bc_xprt); |
205 | serv->sv_bc_xprt = NULL; | 205 | serv->sv_bc_xprt = NULL; |
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b257383bb565..07df5f1d85e5 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -38,6 +38,7 @@ enum nfs4_callback_opnum { | |||
38 | struct cb_process_state { | 38 | struct cb_process_state { |
39 | __be32 drc_status; | 39 | __be32 drc_status; |
40 | struct nfs_client *clp; | 40 | struct nfs_client *clp; |
41 | int slotid; | ||
41 | }; | 42 | }; |
42 | 43 | ||
43 | struct cb_compound_hdr_arg { | 44 | struct cb_compound_hdr_arg { |
@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( | |||
166 | void *dummy, struct cb_process_state *cps); | 167 | void *dummy, struct cb_process_state *cps); |
167 | 168 | ||
168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); | 169 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); |
169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); | ||
170 | 170 | ||
171 | struct cb_devicenotifyitem { | 171 | struct cb_devicenotifyitem { |
172 | uint32_t cbd_notify_type; | 172 | uint32_t cbd_notify_type; |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 74780f9f852c..43926add945b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
348 | /* Normal */ | 348 | /* Normal */ |
349 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { | 349 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { |
350 | slot->seq_nr++; | 350 | slot->seq_nr++; |
351 | return htonl(NFS4_OK); | 351 | goto out_ok; |
352 | } | 352 | } |
353 | 353 | ||
354 | /* Replay */ | 354 | /* Replay */ |
@@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
367 | /* Wraparound */ | 367 | /* Wraparound */ |
368 | if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { | 368 | if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { |
369 | slot->seq_nr = 1; | 369 | slot->seq_nr = 1; |
370 | return htonl(NFS4_OK); | 370 | goto out_ok; |
371 | } | 371 | } |
372 | 372 | ||
373 | /* Misordered request */ | 373 | /* Misordered request */ |
374 | return htonl(NFS4ERR_SEQ_MISORDERED); | 374 | return htonl(NFS4ERR_SEQ_MISORDERED); |
375 | out_ok: | ||
376 | tbl->highest_used_slotid = args->csa_slotid; | ||
377 | return htonl(NFS4_OK); | ||
375 | } | 378 | } |
376 | 379 | ||
377 | /* | 380 | /* |
@@ -433,26 +436,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
433 | struct cb_sequenceres *res, | 436 | struct cb_sequenceres *res, |
434 | struct cb_process_state *cps) | 437 | struct cb_process_state *cps) |
435 | { | 438 | { |
439 | struct nfs4_slot_table *tbl; | ||
436 | struct nfs_client *clp; | 440 | struct nfs_client *clp; |
437 | int i; | 441 | int i; |
438 | __be32 status = htonl(NFS4ERR_BADSESSION); | 442 | __be32 status = htonl(NFS4ERR_BADSESSION); |
439 | 443 | ||
440 | cps->clp = NULL; | ||
441 | |||
442 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); | 444 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); |
443 | if (clp == NULL) | 445 | if (clp == NULL) |
444 | goto out; | 446 | goto out; |
445 | 447 | ||
448 | tbl = &clp->cl_session->bc_slot_table; | ||
449 | |||
450 | spin_lock(&tbl->slot_tbl_lock); | ||
446 | /* state manager is resetting the session */ | 451 | /* state manager is resetting the session */ |
447 | if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { | 452 | if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { |
448 | status = NFS4ERR_DELAY; | 453 | spin_unlock(&tbl->slot_tbl_lock); |
454 | status = htonl(NFS4ERR_DELAY); | ||
455 | /* Return NFS4ERR_BADSESSION if we're draining the session | ||
456 | * in order to reset it. | ||
457 | */ | ||
458 | if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | ||
459 | status = htonl(NFS4ERR_BADSESSION); | ||
449 | goto out; | 460 | goto out; |
450 | } | 461 | } |
451 | 462 | ||
452 | status = validate_seqid(&clp->cl_session->bc_slot_table, args); | 463 | status = validate_seqid(&clp->cl_session->bc_slot_table, args); |
464 | spin_unlock(&tbl->slot_tbl_lock); | ||
453 | if (status) | 465 | if (status) |
454 | goto out; | 466 | goto out; |
455 | 467 | ||
468 | cps->slotid = args->csa_slotid; | ||
469 | |||
456 | /* | 470 | /* |
457 | * Check for pending referring calls. If a match is found, a | 471 | * Check for pending referring calls. If a match is found, a |
458 | * related callback was received before the response to the original | 472 | * related callback was received before the response to the original |
@@ -469,7 +483,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
469 | res->csr_slotid = args->csa_slotid; | 483 | res->csr_slotid = args->csa_slotid; |
470 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 484 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
471 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 485 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
472 | nfs4_cb_take_slot(clp); | ||
473 | 486 | ||
474 | out: | 487 | out: |
475 | cps->clp = clp; /* put in nfs4_callback_compound */ | 488 | cps->clp = clp; /* put in nfs4_callback_compound */ |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c6c86a77e043..918ad647afea 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) | |||
754 | * Let the state manager know callback processing done. | 754 | * Let the state manager know callback processing done. |
755 | * A single slot, so highest used slotid is either 0 or -1 | 755 | * A single slot, so highest used slotid is either 0 or -1 |
756 | */ | 756 | */ |
757 | tbl->highest_used_slotid--; | 757 | tbl->highest_used_slotid = -1; |
758 | nfs4_check_drain_bc_complete(session); | 758 | nfs4_check_drain_bc_complete(session); |
759 | spin_unlock(&tbl->slot_tbl_lock); | 759 | spin_unlock(&tbl->slot_tbl_lock); |
760 | } | 760 | } |
761 | 761 | ||
762 | static void nfs4_cb_free_slot(struct nfs_client *clp) | 762 | static void nfs4_cb_free_slot(struct cb_process_state *cps) |
763 | { | 763 | { |
764 | if (clp && clp->cl_session) | 764 | if (cps->slotid != -1) |
765 | nfs4_callback_free_slot(clp->cl_session); | 765 | nfs4_callback_free_slot(cps->clp->cl_session); |
766 | } | ||
767 | |||
768 | /* A single slot, so highest used slotid is either 0 or -1 */ | ||
769 | void nfs4_cb_take_slot(struct nfs_client *clp) | ||
770 | { | ||
771 | struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; | ||
772 | |||
773 | spin_lock(&tbl->slot_tbl_lock); | ||
774 | tbl->highest_used_slotid++; | ||
775 | BUG_ON(tbl->highest_used_slotid != 0); | ||
776 | spin_unlock(&tbl->slot_tbl_lock); | ||
777 | } | 766 | } |
778 | 767 | ||
779 | #else /* CONFIG_NFS_V4_1 */ | 768 | #else /* CONFIG_NFS_V4_1 */ |
@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) | |||
784 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); | 773 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); |
785 | } | 774 | } |
786 | 775 | ||
787 | static void nfs4_cb_free_slot(struct nfs_client *clp) | 776 | static void nfs4_cb_free_slot(struct cb_process_state *cps) |
788 | { | 777 | { |
789 | } | 778 | } |
790 | #endif /* CONFIG_NFS_V4_1 */ | 779 | #endif /* CONFIG_NFS_V4_1 */ |
@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
866 | struct cb_process_state cps = { | 855 | struct cb_process_state cps = { |
867 | .drc_status = 0, | 856 | .drc_status = 0, |
868 | .clp = NULL, | 857 | .clp = NULL, |
858 | .slotid = -1, | ||
869 | }; | 859 | }; |
870 | unsigned int nops = 0; | 860 | unsigned int nops = 0; |
871 | 861 | ||
@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
906 | 896 | ||
907 | *hdr_res.status = status; | 897 | *hdr_res.status = status; |
908 | *hdr_res.nops = htonl(nops); | 898 | *hdr_res.nops = htonl(nops); |
909 | nfs4_cb_free_slot(cps.clp); | 899 | nfs4_cb_free_slot(&cps); |
910 | nfs_put_client(cps.clp); | 900 | nfs_put_client(cps.clp); |
911 | dprintk("%s: done, status = %u\n", __func__, ntohl(status)); | 901 | dprintk("%s: done, status = %u\n", __func__, ntohl(status)); |
912 | return rpc_success; | 902 | return rpc_success; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5833fbbf59b0..873bf00d51a2 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -336,11 +336,12 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, | |||
336 | const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; | 336 | const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; |
337 | const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; | 337 | const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; |
338 | 338 | ||
339 | if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL && | 339 | if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) |
340 | sin1->sin6_scope_id != sin2->sin6_scope_id) | ||
341 | return 0; | 340 | return 0; |
341 | else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) | ||
342 | return sin1->sin6_scope_id == sin2->sin6_scope_id; | ||
342 | 343 | ||
343 | return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); | 344 | return 1; |
344 | } | 345 | } |
345 | #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ | 346 | #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ |
346 | static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, | 347 | static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, |
@@ -1867,6 +1868,10 @@ static int nfs_server_list_show(struct seq_file *m, void *v) | |||
1867 | /* display one transport per line on subsequent lines */ | 1868 | /* display one transport per line on subsequent lines */ |
1868 | clp = list_entry(v, struct nfs_client, cl_share_link); | 1869 | clp = list_entry(v, struct nfs_client, cl_share_link); |
1869 | 1870 | ||
1871 | /* Check if the client is initialized */ | ||
1872 | if (clp->cl_cons_state != NFS_CS_READY) | ||
1873 | return 0; | ||
1874 | |||
1870 | seq_printf(m, "v%u %s %s %3d %s\n", | 1875 | seq_printf(m, "v%u %s %s %3d %s\n", |
1871 | clp->rpc_ops->version, | 1876 | clp->rpc_ops->version, |
1872 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), | 1877 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 321a66bc3846..7f2654069806 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -240,7 +240,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
240 | sizeof(delegation->stateid.data)); | 240 | sizeof(delegation->stateid.data)); |
241 | delegation->type = res->delegation_type; | 241 | delegation->type = res->delegation_type; |
242 | delegation->maxsize = res->maxsize; | 242 | delegation->maxsize = res->maxsize; |
243 | delegation->change_attr = nfsi->change_attr; | 243 | delegation->change_attr = inode->i_version; |
244 | delegation->cred = get_rpccred(cred); | 244 | delegation->cred = get_rpccred(cred); |
245 | delegation->inode = inode; | 245 | delegation->inode = inode; |
246 | delegation->flags = 1<<NFS_DELEGATION_REFERENCED; | 246 | delegation->flags = 1<<NFS_DELEGATION_REFERENCED; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 28b8c3f3cda3..91c01f0a4c3b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -180,8 +180,6 @@ force_reval: | |||
180 | 180 | ||
181 | static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | 181 | static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) |
182 | { | 182 | { |
183 | loff_t loff; | ||
184 | |||
185 | dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", | 183 | dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", |
186 | filp->f_path.dentry->d_parent->d_name.name, | 184 | filp->f_path.dentry->d_parent->d_name.name, |
187 | filp->f_path.dentry->d_name.name, | 185 | filp->f_path.dentry->d_name.name, |
@@ -197,13 +195,9 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
197 | int retval = nfs_revalidate_file_size(inode, filp); | 195 | int retval = nfs_revalidate_file_size(inode, filp); |
198 | if (retval < 0) | 196 | if (retval < 0) |
199 | return (loff_t)retval; | 197 | return (loff_t)retval; |
198 | } | ||
200 | 199 | ||
201 | spin_lock(&inode->i_lock); | 200 | return generic_file_llseek(filp, offset, origin); |
202 | loff = generic_file_llseek_unlocked(filp, offset, origin); | ||
203 | spin_unlock(&inode->i_lock); | ||
204 | } else | ||
205 | loff = generic_file_llseek_unlocked(filp, offset, origin); | ||
206 | return loff; | ||
207 | } | 201 | } |
208 | 202 | ||
209 | /* | 203 | /* |
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c index 5b1006480bc2..7cf2c4699b08 100644 --- a/fs/nfs/fscache-index.c +++ b/fs/nfs/fscache-index.c | |||
@@ -212,7 +212,7 @@ static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data, | |||
212 | auxdata.ctime = nfsi->vfs_inode.i_ctime; | 212 | auxdata.ctime = nfsi->vfs_inode.i_ctime; |
213 | 213 | ||
214 | if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) | 214 | if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) |
215 | auxdata.change_attr = nfsi->change_attr; | 215 | auxdata.change_attr = nfsi->vfs_inode.i_version; |
216 | 216 | ||
217 | if (bufmax > sizeof(auxdata)) | 217 | if (bufmax > sizeof(auxdata)) |
218 | bufmax = sizeof(auxdata); | 218 | bufmax = sizeof(auxdata); |
@@ -244,7 +244,7 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, | |||
244 | auxdata.ctime = nfsi->vfs_inode.i_ctime; | 244 | auxdata.ctime = nfsi->vfs_inode.i_ctime; |
245 | 245 | ||
246 | if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) | 246 | if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) |
247 | auxdata.change_attr = nfsi->change_attr; | 247 | auxdata.change_attr = nfsi->vfs_inode.i_version; |
248 | 248 | ||
249 | if (memcmp(data, &auxdata, datalen) != 0) | 249 | if (memcmp(data, &auxdata, datalen) != 0) |
250 | return FSCACHE_CHECKAUX_OBSOLETE; | 250 | return FSCACHE_CHECKAUX_OBSOLETE; |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index f20801ae0a16..47d1c6ff2d8e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -336,8 +336,6 @@ struct idmap { | |||
336 | struct idmap_hashtable idmap_group_hash; | 336 | struct idmap_hashtable idmap_group_hash; |
337 | }; | 337 | }; |
338 | 338 | ||
339 | static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, | ||
340 | char __user *, size_t); | ||
341 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, | 339 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, |
342 | size_t); | 340 | size_t); |
343 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); | 341 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); |
@@ -345,7 +343,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); | |||
345 | static unsigned int fnvhash32(const void *, size_t); | 343 | static unsigned int fnvhash32(const void *, size_t); |
346 | 344 | ||
347 | static const struct rpc_pipe_ops idmap_upcall_ops = { | 345 | static const struct rpc_pipe_ops idmap_upcall_ops = { |
348 | .upcall = idmap_pipe_upcall, | 346 | .upcall = rpc_pipe_generic_upcall, |
349 | .downcall = idmap_pipe_downcall, | 347 | .downcall = idmap_pipe_downcall, |
350 | .destroy_msg = idmap_pipe_destroy_msg, | 348 | .destroy_msg = idmap_pipe_destroy_msg, |
351 | }; | 349 | }; |
@@ -595,27 +593,6 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, | |||
595 | return ret; | 593 | return ret; |
596 | } | 594 | } |
597 | 595 | ||
598 | /* RPC pipefs upcall/downcall routines */ | ||
599 | static ssize_t | ||
600 | idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, | ||
601 | char __user *dst, size_t buflen) | ||
602 | { | ||
603 | char *data = (char *)msg->data + msg->copied; | ||
604 | size_t mlen = min(msg->len, buflen); | ||
605 | unsigned long left; | ||
606 | |||
607 | left = copy_to_user(dst, data, mlen); | ||
608 | if (left == mlen) { | ||
609 | msg->errno = -EFAULT; | ||
610 | return -EFAULT; | ||
611 | } | ||
612 | |||
613 | mlen -= left; | ||
614 | msg->copied += mlen; | ||
615 | msg->errno = 0; | ||
616 | return mlen; | ||
617 | } | ||
618 | |||
619 | static ssize_t | 596 | static ssize_t |
620 | idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | 597 | idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) |
621 | { | 598 | { |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index fe1203797b2b..4dc6d078f108 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -318,7 +318,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
318 | memset(&inode->i_atime, 0, sizeof(inode->i_atime)); | 318 | memset(&inode->i_atime, 0, sizeof(inode->i_atime)); |
319 | memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); | 319 | memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); |
320 | memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); | 320 | memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); |
321 | nfsi->change_attr = 0; | 321 | inode->i_version = 0; |
322 | inode->i_size = 0; | 322 | inode->i_size = 0; |
323 | inode->i_nlink = 0; | 323 | inode->i_nlink = 0; |
324 | inode->i_uid = -2; | 324 | inode->i_uid = -2; |
@@ -344,7 +344,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
344 | | NFS_INO_INVALID_ACCESS | 344 | | NFS_INO_INVALID_ACCESS |
345 | | NFS_INO_INVALID_ACL; | 345 | | NFS_INO_INVALID_ACL; |
346 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) | 346 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) |
347 | nfsi->change_attr = fattr->change_attr; | 347 | inode->i_version = fattr->change_attr; |
348 | else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) | 348 | else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) |
349 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | 349 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR |
350 | | NFS_INO_INVALID_DATA; | 350 | | NFS_INO_INVALID_DATA; |
@@ -897,8 +897,8 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr | |||
897 | 897 | ||
898 | if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) | 898 | if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) |
899 | && (fattr->valid & NFS_ATTR_FATTR_CHANGE) | 899 | && (fattr->valid & NFS_ATTR_FATTR_CHANGE) |
900 | && nfsi->change_attr == fattr->pre_change_attr) { | 900 | && inode->i_version == fattr->pre_change_attr) { |
901 | nfsi->change_attr = fattr->change_attr; | 901 | inode->i_version = fattr->change_attr; |
902 | if (S_ISDIR(inode->i_mode)) | 902 | if (S_ISDIR(inode->i_mode)) |
903 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 903 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; |
904 | ret |= NFS_INO_INVALID_ATTR; | 904 | ret |= NFS_INO_INVALID_ATTR; |
@@ -952,7 +952,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
952 | return -EIO; | 952 | return -EIO; |
953 | 953 | ||
954 | if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && | 954 | if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && |
955 | nfsi->change_attr != fattr->change_attr) | 955 | inode->i_version != fattr->change_attr) |
956 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 956 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
957 | 957 | ||
958 | /* Verify a few of the more important attributes */ | 958 | /* Verify a few of the more important attributes */ |
@@ -1163,7 +1163,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa | |||
1163 | } | 1163 | } |
1164 | if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && | 1164 | if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && |
1165 | (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { | 1165 | (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { |
1166 | fattr->pre_change_attr = NFS_I(inode)->change_attr; | 1166 | fattr->pre_change_attr = inode->i_version; |
1167 | fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; | 1167 | fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; |
1168 | } | 1168 | } |
1169 | if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && | 1169 | if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && |
@@ -1244,13 +1244,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1244 | 1244 | ||
1245 | /* More cache consistency checks */ | 1245 | /* More cache consistency checks */ |
1246 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { | 1246 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { |
1247 | if (nfsi->change_attr != fattr->change_attr) { | 1247 | if (inode->i_version != fattr->change_attr) { |
1248 | dprintk("NFS: change_attr change on server for file %s/%ld\n", | 1248 | dprintk("NFS: change_attr change on server for file %s/%ld\n", |
1249 | inode->i_sb->s_id, inode->i_ino); | 1249 | inode->i_sb->s_id, inode->i_ino); |
1250 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1250 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1251 | if (S_ISDIR(inode->i_mode)) | 1251 | if (S_ISDIR(inode->i_mode)) |
1252 | nfs_force_lookup_revalidate(inode); | 1252 | nfs_force_lookup_revalidate(inode); |
1253 | nfsi->change_attr = fattr->change_attr; | 1253 | inode->i_version = fattr->change_attr; |
1254 | } | 1254 | } |
1255 | } else if (server->caps & NFS_CAP_CHANGE_ATTR) | 1255 | } else if (server->caps & NFS_CAP_CHANGE_ATTR) |
1256 | invalid |= save_cache_validity; | 1256 | invalid |= save_cache_validity; |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ab12913dd473..c1a1bd8ddf1c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -457,13 +457,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) | |||
457 | PAGE_SIZE - 1) >> PAGE_SHIFT; | 457 | PAGE_SIZE - 1) >> PAGE_SHIFT; |
458 | } | 458 | } |
459 | 459 | ||
460 | /* | ||
461 | * Helper for restarting RPC calls in the possible presence of NFSv4.1 | ||
462 | * sessions. | ||
463 | */ | ||
464 | static inline int nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) | ||
465 | { | ||
466 | if (nfs4_has_session(clp)) | ||
467 | return rpc_restart_call_prepare(task); | ||
468 | return rpc_restart_call(task); | ||
469 | } | ||
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1ec1a85fa71c..693ae22f8731 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -13,30 +13,6 @@ | |||
13 | 13 | ||
14 | struct idmap; | 14 | struct idmap; |
15 | 15 | ||
16 | /* | ||
17 | * In a seqid-mutating op, this macro controls which error return | ||
18 | * values trigger incrementation of the seqid. | ||
19 | * | ||
20 | * from rfc 3010: | ||
21 | * The client MUST monotonically increment the sequence number for the | ||
22 | * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE | ||
23 | * operations. This is true even in the event that the previous | ||
24 | * operation that used the sequence number received an error. The only | ||
25 | * exception to this rule is if the previous operation received one of | ||
26 | * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, | ||
27 | * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, | ||
28 | * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. | ||
29 | * | ||
30 | */ | ||
31 | #define seqid_mutating_err(err) \ | ||
32 | (((err) != NFSERR_STALE_CLIENTID) && \ | ||
33 | ((err) != NFSERR_STALE_STATEID) && \ | ||
34 | ((err) != NFSERR_BAD_STATEID) && \ | ||
35 | ((err) != NFSERR_BAD_SEQID) && \ | ||
36 | ((err) != NFSERR_BAD_XDR) && \ | ||
37 | ((err) != NFSERR_RESOURCE) && \ | ||
38 | ((err) != NFSERR_NOFILEHANDLE)) | ||
39 | |||
40 | enum nfs4_client_state { | 16 | enum nfs4_client_state { |
41 | NFS4CLNT_MANAGER_RUNNING = 0, | 17 | NFS4CLNT_MANAGER_RUNNING = 0, |
42 | NFS4CLNT_CHECK_LEASE, | 18 | NFS4CLNT_CHECK_LEASE, |
@@ -56,6 +32,9 @@ enum nfs4_session_state { | |||
56 | NFS4_SESSION_DRAINING, | 32 | NFS4_SESSION_DRAINING, |
57 | }; | 33 | }; |
58 | 34 | ||
35 | #define NFS4_RENEW_TIMEOUT 0x01 | ||
36 | #define NFS4_RENEW_DELEGATION_CB 0x02 | ||
37 | |||
59 | struct nfs4_minor_version_ops { | 38 | struct nfs4_minor_version_ops { |
60 | u32 minor_version; | 39 | u32 minor_version; |
61 | 40 | ||
@@ -225,7 +204,7 @@ struct nfs4_state_recovery_ops { | |||
225 | }; | 204 | }; |
226 | 205 | ||
227 | struct nfs4_state_maintenance_ops { | 206 | struct nfs4_state_maintenance_ops { |
228 | int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); | 207 | int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned); |
229 | struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); | 208 | struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); |
230 | int (*renew_lease)(struct nfs_client *, struct rpc_cred *); | 209 | int (*renew_lease)(struct nfs_client *, struct rpc_cred *); |
231 | }; | 210 | }; |
@@ -237,8 +216,6 @@ extern const struct inode_operations nfs4_dir_inode_operations; | |||
237 | extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); | 216 | extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); |
238 | extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); | 217 | extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); |
239 | extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); | 218 | extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); |
240 | extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); | ||
241 | extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); | ||
242 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); | 219 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); |
243 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); | 220 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); |
244 | extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); | 221 | extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); |
@@ -349,6 +326,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t); | |||
349 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); | 326 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); |
350 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); | 327 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); |
351 | extern void nfs4_schedule_state_manager(struct nfs_client *); | 328 | extern void nfs4_schedule_state_manager(struct nfs_client *); |
329 | extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); | ||
352 | extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); | 330 | extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); |
353 | extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); | 331 | extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); |
354 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); | 332 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e8915d4840ad..09119418402f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -77,19 +77,6 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) | |||
77 | BUG(); | 77 | BUG(); |
78 | } | 78 | } |
79 | 79 | ||
80 | /* For data server errors we don't recover from */ | ||
81 | static void | ||
82 | filelayout_set_lo_fail(struct pnfs_layout_segment *lseg) | ||
83 | { | ||
84 | if (lseg->pls_range.iomode == IOMODE_RW) { | ||
85 | dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); | ||
86 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
87 | } else { | ||
88 | dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); | ||
89 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
90 | } | ||
91 | } | ||
92 | |||
93 | static int filelayout_async_handle_error(struct rpc_task *task, | 80 | static int filelayout_async_handle_error(struct rpc_task *task, |
94 | struct nfs4_state *state, | 81 | struct nfs4_state *state, |
95 | struct nfs_client *clp, | 82 | struct nfs_client *clp, |
@@ -135,7 +122,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||
135 | static int filelayout_read_done_cb(struct rpc_task *task, | 122 | static int filelayout_read_done_cb(struct rpc_task *task, |
136 | struct nfs_read_data *data) | 123 | struct nfs_read_data *data) |
137 | { | 124 | { |
138 | struct nfs_client *clp = data->ds_clp; | ||
139 | int reset = 0; | 125 | int reset = 0; |
140 | 126 | ||
141 | dprintk("%s DS read\n", __func__); | 127 | dprintk("%s DS read\n", __func__); |
@@ -145,11 +131,10 @@ static int filelayout_read_done_cb(struct rpc_task *task, | |||
145 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", | 131 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", |
146 | __func__, data->ds_clp, data->ds_clp->cl_session); | 132 | __func__, data->ds_clp, data->ds_clp->cl_session); |
147 | if (reset) { | 133 | if (reset) { |
148 | filelayout_set_lo_fail(data->lseg); | 134 | pnfs_set_lo_fail(data->lseg); |
149 | nfs4_reset_read(task, data); | 135 | nfs4_reset_read(task, data); |
150 | clp = NFS_SERVER(data->inode)->nfs_client; | ||
151 | } | 136 | } |
152 | nfs_restart_rpc(task, clp); | 137 | rpc_restart_call_prepare(task); |
153 | return -EAGAIN; | 138 | return -EAGAIN; |
154 | } | 139 | } |
155 | 140 | ||
@@ -216,17 +201,13 @@ static int filelayout_write_done_cb(struct rpc_task *task, | |||
216 | 201 | ||
217 | if (filelayout_async_handle_error(task, data->args.context->state, | 202 | if (filelayout_async_handle_error(task, data->args.context->state, |
218 | data->ds_clp, &reset) == -EAGAIN) { | 203 | data->ds_clp, &reset) == -EAGAIN) { |
219 | struct nfs_client *clp; | ||
220 | |||
221 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", | 204 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", |
222 | __func__, data->ds_clp, data->ds_clp->cl_session); | 205 | __func__, data->ds_clp, data->ds_clp->cl_session); |
223 | if (reset) { | 206 | if (reset) { |
224 | filelayout_set_lo_fail(data->lseg); | 207 | pnfs_set_lo_fail(data->lseg); |
225 | nfs4_reset_write(task, data); | 208 | nfs4_reset_write(task, data); |
226 | clp = NFS_SERVER(data->inode)->nfs_client; | 209 | } |
227 | } else | 210 | rpc_restart_call_prepare(task); |
228 | clp = data->ds_clp; | ||
229 | nfs_restart_rpc(task, clp); | ||
230 | return -EAGAIN; | 211 | return -EAGAIN; |
231 | } | 212 | } |
232 | 213 | ||
@@ -256,9 +237,9 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
256 | __func__, data->ds_clp, data->ds_clp->cl_session); | 237 | __func__, data->ds_clp, data->ds_clp->cl_session); |
257 | if (reset) { | 238 | if (reset) { |
258 | prepare_to_resend_writes(data); | 239 | prepare_to_resend_writes(data); |
259 | filelayout_set_lo_fail(data->lseg); | 240 | pnfs_set_lo_fail(data->lseg); |
260 | } else | 241 | } else |
261 | nfs_restart_rpc(task, data->ds_clp); | 242 | rpc_restart_call_prepare(task); |
262 | return -EAGAIN; | 243 | return -EAGAIN; |
263 | } | 244 | } |
264 | 245 | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8c77039e7a81..d2ae413c986a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -73,9 +73,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data); | |||
73 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | 73 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); |
74 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 74 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
75 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); | 75 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); |
76 | static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir, | ||
77 | const struct qstr *name, struct nfs_fh *fhandle, | ||
78 | struct nfs_fattr *fattr); | ||
79 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 76 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); |
80 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 77 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
81 | struct nfs_fattr *fattr, struct iattr *sattr, | 78 | struct nfs_fattr *fattr, struct iattr *sattr, |
@@ -753,9 +750,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) | |||
753 | 750 | ||
754 | spin_lock(&dir->i_lock); | 751 | spin_lock(&dir->i_lock); |
755 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; | 752 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; |
756 | if (!cinfo->atomic || cinfo->before != nfsi->change_attr) | 753 | if (!cinfo->atomic || cinfo->before != dir->i_version) |
757 | nfs_force_lookup_revalidate(dir); | 754 | nfs_force_lookup_revalidate(dir); |
758 | nfsi->change_attr = cinfo->after; | 755 | dir->i_version = cinfo->after; |
759 | spin_unlock(&dir->i_lock); | 756 | spin_unlock(&dir->i_lock); |
760 | } | 757 | } |
761 | 758 | ||
@@ -1596,8 +1593,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
1596 | int status; | 1593 | int status; |
1597 | 1594 | ||
1598 | status = nfs4_run_open_task(data, 0); | 1595 | status = nfs4_run_open_task(data, 0); |
1599 | if (status != 0 || !data->rpc_done) | 1596 | if (!data->rpc_done) |
1597 | return status; | ||
1598 | if (status != 0) { | ||
1599 | if (status == -NFS4ERR_BADNAME && | ||
1600 | !(o_arg->open_flags & O_CREAT)) | ||
1601 | return -ENOENT; | ||
1600 | return status; | 1602 | return status; |
1603 | } | ||
1601 | 1604 | ||
1602 | if (o_arg->open_flags & O_CREAT) { | 1605 | if (o_arg->open_flags & O_CREAT) { |
1603 | update_changeattr(dir, &o_res->cinfo); | 1606 | update_changeattr(dir, &o_res->cinfo); |
@@ -2408,14 +2411,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
2408 | return status; | 2411 | return status; |
2409 | } | 2412 | } |
2410 | 2413 | ||
2411 | static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server, | 2414 | static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, |
2412 | const struct nfs_fh *dirfh, const struct qstr *name, | 2415 | const struct qstr *name, struct nfs_fh *fhandle, |
2413 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2416 | struct nfs_fattr *fattr) |
2414 | { | 2417 | { |
2418 | struct nfs_server *server = NFS_SERVER(dir); | ||
2415 | int status; | 2419 | int status; |
2416 | struct nfs4_lookup_arg args = { | 2420 | struct nfs4_lookup_arg args = { |
2417 | .bitmask = server->attr_bitmask, | 2421 | .bitmask = server->attr_bitmask, |
2418 | .dir_fh = dirfh, | 2422 | .dir_fh = NFS_FH(dir), |
2419 | .name = name, | 2423 | .name = name, |
2420 | }; | 2424 | }; |
2421 | struct nfs4_lookup_res res = { | 2425 | struct nfs4_lookup_res res = { |
@@ -2431,40 +2435,8 @@ static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server, | |||
2431 | 2435 | ||
2432 | nfs_fattr_init(fattr); | 2436 | nfs_fattr_init(fattr); |
2433 | 2437 | ||
2434 | dprintk("NFS call lookupfh %s\n", name->name); | ||
2435 | status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); | ||
2436 | dprintk("NFS reply lookupfh: %d\n", status); | ||
2437 | return status; | ||
2438 | } | ||
2439 | |||
2440 | static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, | ||
2441 | struct qstr *name, struct nfs_fh *fhandle, | ||
2442 | struct nfs_fattr *fattr) | ||
2443 | { | ||
2444 | struct nfs4_exception exception = { }; | ||
2445 | int err; | ||
2446 | do { | ||
2447 | err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr); | ||
2448 | /* FIXME: !!!! */ | ||
2449 | if (err == -NFS4ERR_MOVED) { | ||
2450 | err = -EREMOTE; | ||
2451 | break; | ||
2452 | } | ||
2453 | err = nfs4_handle_exception(server, err, &exception); | ||
2454 | } while (exception.retry); | ||
2455 | return err; | ||
2456 | } | ||
2457 | |||
2458 | static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, | ||
2459 | const struct qstr *name, struct nfs_fh *fhandle, | ||
2460 | struct nfs_fattr *fattr) | ||
2461 | { | ||
2462 | int status; | ||
2463 | |||
2464 | dprintk("NFS call lookup %s\n", name->name); | 2438 | dprintk("NFS call lookup %s\n", name->name); |
2465 | status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); | 2439 | status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); |
2466 | if (status == -NFS4ERR_MOVED) | ||
2467 | status = nfs4_get_referral(dir, name, fattr, fhandle); | ||
2468 | dprintk("NFS reply lookup: %d\n", status); | 2440 | dprintk("NFS reply lookup: %d\n", status); |
2469 | return status; | 2441 | return status; |
2470 | } | 2442 | } |
@@ -2485,11 +2457,20 @@ static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qst | |||
2485 | struct nfs4_exception exception = { }; | 2457 | struct nfs4_exception exception = { }; |
2486 | int err; | 2458 | int err; |
2487 | do { | 2459 | do { |
2488 | err = nfs4_handle_exception(NFS_SERVER(dir), | 2460 | int status; |
2489 | _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr), | 2461 | |
2490 | &exception); | 2462 | status = _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr); |
2491 | if (err == -EPERM) | 2463 | switch (status) { |
2464 | case -NFS4ERR_BADNAME: | ||
2465 | return -ENOENT; | ||
2466 | case -NFS4ERR_MOVED: | ||
2467 | err = nfs4_get_referral(dir, name, fattr, fhandle); | ||
2468 | break; | ||
2469 | case -NFS4ERR_WRONGSEC: | ||
2492 | nfs_fixup_secinfo_attributes(fattr, fhandle); | 2470 | nfs_fixup_secinfo_attributes(fattr, fhandle); |
2471 | } | ||
2472 | err = nfs4_handle_exception(NFS_SERVER(dir), | ||
2473 | status, &exception); | ||
2493 | } while (exception.retry); | 2474 | } while (exception.retry); |
2494 | return err; | 2475 | return err; |
2495 | } | 2476 | } |
@@ -3210,7 +3191,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) | |||
3210 | struct nfs_server *server = NFS_SERVER(data->inode); | 3191 | struct nfs_server *server = NFS_SERVER(data->inode); |
3211 | 3192 | ||
3212 | if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { | 3193 | if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { |
3213 | nfs_restart_rpc(task, server->nfs_client); | 3194 | rpc_restart_call_prepare(task); |
3214 | return -EAGAIN; | 3195 | return -EAGAIN; |
3215 | } | 3196 | } |
3216 | 3197 | ||
@@ -3260,7 +3241,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data | |||
3260 | struct inode *inode = data->inode; | 3241 | struct inode *inode = data->inode; |
3261 | 3242 | ||
3262 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { | 3243 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { |
3263 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); | 3244 | rpc_restart_call_prepare(task); |
3264 | return -EAGAIN; | 3245 | return -EAGAIN; |
3265 | } | 3246 | } |
3266 | if (task->tk_status >= 0) { | 3247 | if (task->tk_status >= 0) { |
@@ -3317,7 +3298,7 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat | |||
3317 | struct inode *inode = data->inode; | 3298 | struct inode *inode = data->inode; |
3318 | 3299 | ||
3319 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { | 3300 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { |
3320 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); | 3301 | rpc_restart_call_prepare(task); |
3321 | return -EAGAIN; | 3302 | return -EAGAIN; |
3322 | } | 3303 | } |
3323 | nfs_refresh_inode(inode, data->res.fattr); | 3304 | nfs_refresh_inode(inode, data->res.fattr); |
@@ -3374,9 +3355,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) | |||
3374 | 3355 | ||
3375 | if (task->tk_status < 0) { | 3356 | if (task->tk_status < 0) { |
3376 | /* Unless we're shutting down, schedule state recovery! */ | 3357 | /* Unless we're shutting down, schedule state recovery! */ |
3377 | if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) | 3358 | if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) |
3359 | return; | ||
3360 | if (task->tk_status != NFS4ERR_CB_PATH_DOWN) { | ||
3378 | nfs4_schedule_lease_recovery(clp); | 3361 | nfs4_schedule_lease_recovery(clp); |
3379 | return; | 3362 | return; |
3363 | } | ||
3364 | nfs4_schedule_path_down_recovery(clp); | ||
3380 | } | 3365 | } |
3381 | do_renew_lease(clp, timestamp); | 3366 | do_renew_lease(clp, timestamp); |
3382 | } | 3367 | } |
@@ -3386,7 +3371,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { | |||
3386 | .rpc_release = nfs4_renew_release, | 3371 | .rpc_release = nfs4_renew_release, |
3387 | }; | 3372 | }; |
3388 | 3373 | ||
3389 | int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) | 3374 | static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) |
3390 | { | 3375 | { |
3391 | struct rpc_message msg = { | 3376 | struct rpc_message msg = { |
3392 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], | 3377 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], |
@@ -3395,9 +3380,11 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) | |||
3395 | }; | 3380 | }; |
3396 | struct nfs4_renewdata *data; | 3381 | struct nfs4_renewdata *data; |
3397 | 3382 | ||
3383 | if (renew_flags == 0) | ||
3384 | return 0; | ||
3398 | if (!atomic_inc_not_zero(&clp->cl_count)) | 3385 | if (!atomic_inc_not_zero(&clp->cl_count)) |
3399 | return -EIO; | 3386 | return -EIO; |
3400 | data = kmalloc(sizeof(*data), GFP_KERNEL); | 3387 | data = kmalloc(sizeof(*data), GFP_NOFS); |
3401 | if (data == NULL) | 3388 | if (data == NULL) |
3402 | return -ENOMEM; | 3389 | return -ENOMEM; |
3403 | data->client = clp; | 3390 | data->client = clp; |
@@ -3406,7 +3393,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) | |||
3406 | &nfs4_renew_ops, data); | 3393 | &nfs4_renew_ops, data); |
3407 | } | 3394 | } |
3408 | 3395 | ||
3409 | int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) | 3396 | static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) |
3410 | { | 3397 | { |
3411 | struct rpc_message msg = { | 3398 | struct rpc_message msg = { |
3412 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], | 3399 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], |
@@ -3851,7 +3838,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
3851 | default: | 3838 | default: |
3852 | if (nfs4_async_handle_error(task, data->res.server, NULL) == | 3839 | if (nfs4_async_handle_error(task, data->res.server, NULL) == |
3853 | -EAGAIN) { | 3840 | -EAGAIN) { |
3854 | nfs_restart_rpc(task, data->res.server->nfs_client); | 3841 | rpc_restart_call_prepare(task); |
3855 | return; | 3842 | return; |
3856 | } | 3843 | } |
3857 | } | 3844 | } |
@@ -4105,8 +4092,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
4105 | break; | 4092 | break; |
4106 | default: | 4093 | default: |
4107 | if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) | 4094 | if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) |
4108 | nfs_restart_rpc(task, | 4095 | rpc_restart_call_prepare(task); |
4109 | calldata->server->nfs_client); | ||
4110 | } | 4096 | } |
4111 | } | 4097 | } |
4112 | 4098 | ||
@@ -4939,7 +4925,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) | |||
4939 | task->tk_status = 0; | 4925 | task->tk_status = 0; |
4940 | /* fall through */ | 4926 | /* fall through */ |
4941 | case -NFS4ERR_RETRY_UNCACHED_REP: | 4927 | case -NFS4ERR_RETRY_UNCACHED_REP: |
4942 | nfs_restart_rpc(task, data->clp); | 4928 | rpc_restart_call_prepare(task); |
4943 | return; | 4929 | return; |
4944 | } | 4930 | } |
4945 | dprintk("<-- %s\n", __func__); | 4931 | dprintk("<-- %s\n", __func__); |
@@ -5504,11 +5490,13 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ | |||
5504 | return rpc_run_task(&task_setup_data); | 5490 | return rpc_run_task(&task_setup_data); |
5505 | } | 5491 | } |
5506 | 5492 | ||
5507 | static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred) | 5493 | static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) |
5508 | { | 5494 | { |
5509 | struct rpc_task *task; | 5495 | struct rpc_task *task; |
5510 | int ret = 0; | 5496 | int ret = 0; |
5511 | 5497 | ||
5498 | if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) | ||
5499 | return 0; | ||
5512 | task = _nfs41_proc_sequence(clp, cred); | 5500 | task = _nfs41_proc_sequence(clp, cred); |
5513 | if (IS_ERR(task)) | 5501 | if (IS_ERR(task)) |
5514 | ret = PTR_ERR(task); | 5502 | ret = PTR_ERR(task); |
@@ -5778,7 +5766,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
5778 | 5766 | ||
5779 | server = NFS_SERVER(lrp->args.inode); | 5767 | server = NFS_SERVER(lrp->args.inode); |
5780 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | 5768 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { |
5781 | nfs_restart_rpc(task, lrp->clp); | 5769 | rpc_restart_call_prepare(task); |
5782 | return; | 5770 | return; |
5783 | } | 5771 | } |
5784 | spin_lock(&lo->plh_inode->i_lock); | 5772 | spin_lock(&lo->plh_inode->i_lock); |
@@ -5949,7 +5937,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) | |||
5949 | } | 5937 | } |
5950 | 5938 | ||
5951 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | 5939 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { |
5952 | nfs_restart_rpc(task, server->nfs_client); | 5940 | rpc_restart_call_prepare(task); |
5953 | return; | 5941 | return; |
5954 | } | 5942 | } |
5955 | 5943 | ||
@@ -6262,7 +6250,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
6262 | .getroot = nfs4_proc_get_root, | 6250 | .getroot = nfs4_proc_get_root, |
6263 | .getattr = nfs4_proc_getattr, | 6251 | .getattr = nfs4_proc_getattr, |
6264 | .setattr = nfs4_proc_setattr, | 6252 | .setattr = nfs4_proc_setattr, |
6265 | .lookupfh = nfs4_proc_lookupfh, | ||
6266 | .lookup = nfs4_proc_lookup, | 6253 | .lookup = nfs4_proc_lookup, |
6267 | .access = nfs4_proc_access, | 6254 | .access = nfs4_proc_access, |
6268 | .readlink = nfs4_proc_readlink, | 6255 | .readlink = nfs4_proc_readlink, |
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index df8e7f3ca56d..dc484c0eae7f 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c | |||
@@ -60,6 +60,7 @@ nfs4_renew_state(struct work_struct *work) | |||
60 | struct rpc_cred *cred; | 60 | struct rpc_cred *cred; |
61 | long lease; | 61 | long lease; |
62 | unsigned long last, now; | 62 | unsigned long last, now; |
63 | unsigned renew_flags = 0; | ||
63 | 64 | ||
64 | ops = clp->cl_mvops->state_renewal_ops; | 65 | ops = clp->cl_mvops->state_renewal_ops; |
65 | dprintk("%s: start\n", __func__); | 66 | dprintk("%s: start\n", __func__); |
@@ -72,18 +73,23 @@ nfs4_renew_state(struct work_struct *work) | |||
72 | last = clp->cl_last_renewal; | 73 | last = clp->cl_last_renewal; |
73 | now = jiffies; | 74 | now = jiffies; |
74 | /* Are we close to a lease timeout? */ | 75 | /* Are we close to a lease timeout? */ |
75 | if (time_after(now, last + lease/3)) { | 76 | if (time_after(now, last + lease/3)) |
77 | renew_flags |= NFS4_RENEW_TIMEOUT; | ||
78 | if (nfs_delegations_present(clp)) | ||
79 | renew_flags |= NFS4_RENEW_DELEGATION_CB; | ||
80 | |||
81 | if (renew_flags != 0) { | ||
76 | cred = ops->get_state_renewal_cred_locked(clp); | 82 | cred = ops->get_state_renewal_cred_locked(clp); |
77 | spin_unlock(&clp->cl_lock); | 83 | spin_unlock(&clp->cl_lock); |
78 | if (cred == NULL) { | 84 | if (cred == NULL) { |
79 | if (!nfs_delegations_present(clp)) { | 85 | if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) { |
80 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); | 86 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); |
81 | goto out; | 87 | goto out; |
82 | } | 88 | } |
83 | nfs_expire_all_delegations(clp); | 89 | nfs_expire_all_delegations(clp); |
84 | } else { | 90 | } else { |
85 | /* Queue an asynchronous RENEW. */ | 91 | /* Queue an asynchronous RENEW. */ |
86 | ops->sched_state_renewal(clp, cred); | 92 | ops->sched_state_renewal(clp, cred, renew_flags); |
87 | put_rpccred(cred); | 93 | put_rpccred(cred); |
88 | goto out_exp; | 94 | goto out_exp; |
89 | } | 95 | } |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 72ab97ef3d61..39914be40b03 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1038,6 +1038,12 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) | |||
1038 | nfs4_schedule_state_manager(clp); | 1038 | nfs4_schedule_state_manager(clp); |
1039 | } | 1039 | } |
1040 | 1040 | ||
1041 | void nfs4_schedule_path_down_recovery(struct nfs_client *clp) | ||
1042 | { | ||
1043 | nfs_handle_cb_pathdown(clp); | ||
1044 | nfs4_schedule_state_manager(clp); | ||
1045 | } | ||
1046 | |||
1041 | static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) | 1047 | static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) |
1042 | { | 1048 | { |
1043 | 1049 | ||
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 9383ca7245bc..d0cda12fddc3 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write) | |||
479 | for (i = 0; i < ios->numdevs; i++) { | 479 | for (i = 0; i < ios->numdevs; i++) { |
480 | struct osd_sense_info osi; | 480 | struct osd_sense_info osi; |
481 | struct osd_request *or = ios->per_dev[i].or; | 481 | struct osd_request *or = ios->per_dev[i].or; |
482 | unsigned dev; | ||
483 | int ret; | 482 | int ret; |
484 | 483 | ||
485 | if (!or) | 484 | if (!or) |
@@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write) | |||
500 | 499 | ||
501 | continue; /* we recovered */ | 500 | continue; /* we recovered */ |
502 | } | 501 | } |
503 | dev = ios->per_dev[i].dev; | 502 | objlayout_io_set_result(&ios->ol_state, i, |
504 | objlayout_io_set_result(&ios->ol_state, dev, | 503 | &ios->layout->comps[i].oc_object_id, |
505 | &ios->layout->comps[dev].oc_object_id, | ||
506 | osd_pri_2_pnfs_err(osi.osd_err_pri), | 504 | osd_pri_2_pnfs_err(osi.osd_err_pri), |
507 | ios->per_dev[i].offset, | 505 | ios->per_dev[i].offset, |
508 | ios->per_dev[i].length, | 506 | ios->per_dev[i].length, |
@@ -589,22 +587,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, | |||
589 | } | 587 | } |
590 | 588 | ||
591 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | 589 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, |
592 | unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, | 590 | unsigned pgbase, struct _objio_per_comp *per_dev, int len, |
593 | gfp_t gfp_flags) | 591 | gfp_t gfp_flags) |
594 | { | 592 | { |
595 | unsigned pg = *cur_pg; | 593 | unsigned pg = *cur_pg; |
594 | int cur_len = len; | ||
596 | struct request_queue *q = | 595 | struct request_queue *q = |
597 | osd_request_queue(_io_od(ios, per_dev->dev)); | 596 | osd_request_queue(_io_od(ios, per_dev->dev)); |
598 | 597 | ||
599 | per_dev->length += cur_len; | ||
600 | |||
601 | if (per_dev->bio == NULL) { | 598 | if (per_dev->bio == NULL) { |
602 | unsigned stripes = ios->layout->num_comps / | 599 | unsigned pages_in_stripe = ios->layout->group_width * |
603 | ios->layout->mirrors_p1; | ||
604 | unsigned pages_in_stripe = stripes * | ||
605 | (ios->layout->stripe_unit / PAGE_SIZE); | 600 | (ios->layout->stripe_unit / PAGE_SIZE); |
606 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / | 601 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / |
607 | stripes; | 602 | ios->layout->group_width; |
608 | 603 | ||
609 | if (BIO_MAX_PAGES_KMALLOC < bio_size) | 604 | if (BIO_MAX_PAGES_KMALLOC < bio_size) |
610 | bio_size = BIO_MAX_PAGES_KMALLOC; | 605 | bio_size = BIO_MAX_PAGES_KMALLOC; |
@@ -632,6 +627,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | |||
632 | } | 627 | } |
633 | BUG_ON(cur_len); | 628 | BUG_ON(cur_len); |
634 | 629 | ||
630 | per_dev->length += len; | ||
635 | *cur_pg = pg; | 631 | *cur_pg = pg; |
636 | return 0; | 632 | return 0; |
637 | } | 633 | } |
@@ -650,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, | |||
650 | int ret = 0; | 646 | int ret = 0; |
651 | 647 | ||
652 | while (length) { | 648 | while (length) { |
653 | struct _objio_per_comp *per_dev = &ios->per_dev[dev]; | 649 | struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; |
654 | unsigned cur_len, page_off = 0; | 650 | unsigned cur_len, page_off = 0; |
655 | 651 | ||
656 | if (!per_dev->length) { | 652 | if (!per_dev->length) { |
@@ -670,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, | |||
670 | cur_len = stripe_unit; | 666 | cur_len = stripe_unit; |
671 | } | 667 | } |
672 | 668 | ||
673 | if (max_comp < dev) | 669 | if (max_comp < dev - first_dev) |
674 | max_comp = dev; | 670 | max_comp = dev - first_dev; |
675 | } else { | 671 | } else { |
676 | cur_len = stripe_unit; | 672 | cur_len = stripe_unit; |
677 | } | 673 | } |
@@ -806,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) | |||
806 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | 802 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; |
807 | unsigned dev = per_dev->dev; | 803 | unsigned dev = per_dev->dev; |
808 | struct pnfs_osd_object_cred *cred = | 804 | struct pnfs_osd_object_cred *cred = |
809 | &ios->layout->comps[dev]; | 805 | &ios->layout->comps[cur_comp]; |
810 | struct osd_obj_id obj = { | 806 | struct osd_obj_id obj = { |
811 | .partition = cred->oc_object_id.oid_partition_id, | 807 | .partition = cred->oc_object_id.oid_partition_id, |
812 | .id = cred->oc_object_id.oid_object_id, | 808 | .id = cred->oc_object_id.oid_object_id, |
@@ -904,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) | |||
904 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | 900 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { |
905 | struct osd_request *or = NULL; | 901 | struct osd_request *or = NULL; |
906 | struct pnfs_osd_object_cred *cred = | 902 | struct pnfs_osd_object_cred *cred = |
907 | &ios->layout->comps[dev]; | 903 | &ios->layout->comps[cur_comp]; |
908 | struct osd_obj_id obj = { | 904 | struct osd_obj_id obj = { |
909 | .partition = cred->oc_object_id.oid_partition_id, | 905 | .partition = cred->oc_object_id.oid_partition_id, |
910 | .id = cred->oc_object_id.oid_object_id, | 906 | .id = cred->oc_object_id.oid_object_id, |
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c index 16fc758e9123..b3918f7ac34d 100644 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c | |||
@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, | |||
170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); | 170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); |
171 | layout->olo_comps_index = be32_to_cpup(p++); | 171 | layout->olo_comps_index = be32_to_cpup(p++); |
172 | layout->olo_num_comps = be32_to_cpup(p++); | 172 | layout->olo_num_comps = be32_to_cpup(p++); |
173 | dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__, | ||
174 | layout->olo_comps_index, layout->olo_num_comps); | ||
175 | |||
173 | iter->total_comps = layout->olo_num_comps; | 176 | iter->total_comps = layout->olo_num_comps; |
174 | return 0; | 177 | return 0; |
175 | } | 178 | } |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e550e8836c37..ee73d9a4f700 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -1168,23 +1168,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | |||
1168 | /* | 1168 | /* |
1169 | * Called by non rpc-based layout drivers | 1169 | * Called by non rpc-based layout drivers |
1170 | */ | 1170 | */ |
1171 | int | 1171 | void pnfs_ld_write_done(struct nfs_write_data *data) |
1172 | pnfs_ld_write_done(struct nfs_write_data *data) | ||
1173 | { | 1172 | { |
1174 | int status; | 1173 | if (likely(!data->pnfs_error)) { |
1175 | |||
1176 | if (!data->pnfs_error) { | ||
1177 | pnfs_set_layoutcommit(data); | 1174 | pnfs_set_layoutcommit(data); |
1178 | data->mds_ops->rpc_call_done(&data->task, data); | 1175 | data->mds_ops->rpc_call_done(&data->task, data); |
1179 | data->mds_ops->rpc_release(data); | 1176 | } else { |
1180 | return 0; | 1177 | put_lseg(data->lseg); |
1178 | data->lseg = NULL; | ||
1179 | dprintk("pnfs write error = %d\n", data->pnfs_error); | ||
1181 | } | 1180 | } |
1182 | 1181 | data->mds_ops->rpc_release(data); | |
1183 | dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, | ||
1184 | data->pnfs_error); | ||
1185 | status = nfs_initiate_write(data, NFS_CLIENT(data->inode), | ||
1186 | data->mds_ops, NFS_FILE_SYNC); | ||
1187 | return status ? : -EAGAIN; | ||
1188 | } | 1182 | } |
1189 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | 1183 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); |
1190 | 1184 | ||
@@ -1268,23 +1262,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | |||
1268 | /* | 1262 | /* |
1269 | * Called by non rpc-based layout drivers | 1263 | * Called by non rpc-based layout drivers |
1270 | */ | 1264 | */ |
1271 | int | 1265 | void pnfs_ld_read_done(struct nfs_read_data *data) |
1272 | pnfs_ld_read_done(struct nfs_read_data *data) | ||
1273 | { | 1266 | { |
1274 | int status; | 1267 | if (likely(!data->pnfs_error)) { |
1275 | |||
1276 | if (!data->pnfs_error) { | ||
1277 | __nfs4_read_done_cb(data); | 1268 | __nfs4_read_done_cb(data); |
1278 | data->mds_ops->rpc_call_done(&data->task, data); | 1269 | data->mds_ops->rpc_call_done(&data->task, data); |
1279 | data->mds_ops->rpc_release(data); | 1270 | } else { |
1280 | return 0; | 1271 | put_lseg(data->lseg); |
1272 | data->lseg = NULL; | ||
1273 | dprintk("pnfs write error = %d\n", data->pnfs_error); | ||
1281 | } | 1274 | } |
1282 | 1275 | data->mds_ops->rpc_release(data); | |
1283 | dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, | ||
1284 | data->pnfs_error); | ||
1285 | status = nfs_initiate_read(data, NFS_CLIENT(data->inode), | ||
1286 | data->mds_ops); | ||
1287 | return status ? : -EAGAIN; | ||
1288 | } | 1276 | } |
1289 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | 1277 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); |
1290 | 1278 | ||
@@ -1381,6 +1369,18 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) | |||
1381 | } | 1369 | } |
1382 | } | 1370 | } |
1383 | 1371 | ||
1372 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | ||
1373 | { | ||
1374 | if (lseg->pls_range.iomode == IOMODE_RW) { | ||
1375 | dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); | ||
1376 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
1377 | } else { | ||
1378 | dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); | ||
1379 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
1380 | } | ||
1381 | } | ||
1382 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | ||
1383 | |||
1384 | void | 1384 | void |
1385 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) | 1385 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) |
1386 | { | 1386 | { |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 01cbfd54f3cb..1509530cb111 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -178,6 +178,7 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); | |||
178 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); | 178 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); |
179 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); | 179 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); |
180 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); | 180 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); |
181 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); | ||
181 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | 182 | int pnfs_layout_process(struct nfs4_layoutget *lgp); |
182 | void pnfs_free_lseg_list(struct list_head *tmp_list); | 183 | void pnfs_free_lseg_list(struct list_head *tmp_list); |
183 | void pnfs_destroy_layout(struct nfs_inode *); | 184 | void pnfs_destroy_layout(struct nfs_inode *); |
@@ -200,8 +201,8 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | |||
200 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 201 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
201 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 202 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
202 | int _pnfs_return_layout(struct inode *); | 203 | int _pnfs_return_layout(struct inode *); |
203 | int pnfs_ld_write_done(struct nfs_write_data *); | 204 | void pnfs_ld_write_done(struct nfs_write_data *); |
204 | int pnfs_ld_read_done(struct nfs_read_data *); | 205 | void pnfs_ld_read_done(struct nfs_read_data *); |
205 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | 206 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, |
206 | struct nfs_open_context *ctx, | 207 | struct nfs_open_context *ctx, |
207 | loff_t pos, | 208 | loff_t pos, |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2171c043ab08..8b48ec63f722 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -35,16 +35,13 @@ static const struct rpc_call_ops nfs_read_partial_ops; | |||
35 | static const struct rpc_call_ops nfs_read_full_ops; | 35 | static const struct rpc_call_ops nfs_read_full_ops; |
36 | 36 | ||
37 | static struct kmem_cache *nfs_rdata_cachep; | 37 | static struct kmem_cache *nfs_rdata_cachep; |
38 | static mempool_t *nfs_rdata_mempool; | ||
39 | |||
40 | #define MIN_POOL_READ (32) | ||
41 | 38 | ||
42 | struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) | 39 | struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) |
43 | { | 40 | { |
44 | struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL); | 41 | struct nfs_read_data *p; |
45 | 42 | ||
43 | p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); | ||
46 | if (p) { | 44 | if (p) { |
47 | memset(p, 0, sizeof(*p)); | ||
48 | INIT_LIST_HEAD(&p->pages); | 45 | INIT_LIST_HEAD(&p->pages); |
49 | p->npages = pagecount; | 46 | p->npages = pagecount; |
50 | if (pagecount <= ARRAY_SIZE(p->page_array)) | 47 | if (pagecount <= ARRAY_SIZE(p->page_array)) |
@@ -52,7 +49,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) | |||
52 | else { | 49 | else { |
53 | p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); | 50 | p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); |
54 | if (!p->pagevec) { | 51 | if (!p->pagevec) { |
55 | mempool_free(p, nfs_rdata_mempool); | 52 | kmem_cache_free(nfs_rdata_cachep, p); |
56 | p = NULL; | 53 | p = NULL; |
57 | } | 54 | } |
58 | } | 55 | } |
@@ -64,7 +61,7 @@ void nfs_readdata_free(struct nfs_read_data *p) | |||
64 | { | 61 | { |
65 | if (p && (p->pagevec != &p->page_array[0])) | 62 | if (p && (p->pagevec != &p->page_array[0])) |
66 | kfree(p->pagevec); | 63 | kfree(p->pagevec); |
67 | mempool_free(p, nfs_rdata_mempool); | 64 | kmem_cache_free(nfs_rdata_cachep, p); |
68 | } | 65 | } |
69 | 66 | ||
70 | void nfs_readdata_release(struct nfs_read_data *rdata) | 67 | void nfs_readdata_release(struct nfs_read_data *rdata) |
@@ -276,7 +273,6 @@ nfs_async_read_error(struct list_head *head) | |||
276 | while (!list_empty(head)) { | 273 | while (!list_empty(head)) { |
277 | req = nfs_list_entry(head->next); | 274 | req = nfs_list_entry(head->next); |
278 | nfs_list_remove_request(req); | 275 | nfs_list_remove_request(req); |
279 | SetPageError(req->wb_page); | ||
280 | nfs_readpage_release(req); | 276 | nfs_readpage_release(req); |
281 | } | 277 | } |
282 | } | 278 | } |
@@ -322,7 +318,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head | |||
322 | offset += len; | 318 | offset += len; |
323 | } while(nbytes != 0); | 319 | } while(nbytes != 0); |
324 | atomic_set(&req->wb_complete, requests); | 320 | atomic_set(&req->wb_complete, requests); |
325 | ClearPageError(page); | ||
326 | desc->pg_rpc_callops = &nfs_read_partial_ops; | 321 | desc->pg_rpc_callops = &nfs_read_partial_ops; |
327 | return ret; | 322 | return ret; |
328 | out_bad: | 323 | out_bad: |
@@ -331,7 +326,6 @@ out_bad: | |||
331 | list_del(&data->list); | 326 | list_del(&data->list); |
332 | nfs_readdata_free(data); | 327 | nfs_readdata_free(data); |
333 | } | 328 | } |
334 | SetPageError(page); | ||
335 | nfs_readpage_release(req); | 329 | nfs_readpage_release(req); |
336 | return -ENOMEM; | 330 | return -ENOMEM; |
337 | } | 331 | } |
@@ -357,7 +351,6 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head * | |||
357 | req = nfs_list_entry(head->next); | 351 | req = nfs_list_entry(head->next); |
358 | nfs_list_remove_request(req); | 352 | nfs_list_remove_request(req); |
359 | nfs_list_add_request(req, &data->pages); | 353 | nfs_list_add_request(req, &data->pages); |
360 | ClearPageError(req->wb_page); | ||
361 | *pages++ = req->wb_page; | 354 | *pages++ = req->wb_page; |
362 | } | 355 | } |
363 | req = nfs_list_entry(data->pages.next); | 356 | req = nfs_list_entry(data->pages.next); |
@@ -435,7 +428,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data | |||
435 | argp->offset += resp->count; | 428 | argp->offset += resp->count; |
436 | argp->pgbase += resp->count; | 429 | argp->pgbase += resp->count; |
437 | argp->count -= resp->count; | 430 | argp->count -= resp->count; |
438 | nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); | 431 | rpc_restart_call_prepare(task); |
439 | } | 432 | } |
440 | 433 | ||
441 | /* | 434 | /* |
@@ -462,10 +455,10 @@ static void nfs_readpage_release_partial(void *calldata) | |||
462 | int status = data->task.tk_status; | 455 | int status = data->task.tk_status; |
463 | 456 | ||
464 | if (status < 0) | 457 | if (status < 0) |
465 | SetPageError(page); | 458 | set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags); |
466 | 459 | ||
467 | if (atomic_dec_and_test(&req->wb_complete)) { | 460 | if (atomic_dec_and_test(&req->wb_complete)) { |
468 | if (!PageError(page)) | 461 | if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags)) |
469 | SetPageUptodate(page); | 462 | SetPageUptodate(page); |
470 | nfs_readpage_release(req); | 463 | nfs_readpage_release(req); |
471 | } | 464 | } |
@@ -541,13 +534,23 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) | |||
541 | static void nfs_readpage_release_full(void *calldata) | 534 | static void nfs_readpage_release_full(void *calldata) |
542 | { | 535 | { |
543 | struct nfs_read_data *data = calldata; | 536 | struct nfs_read_data *data = calldata; |
537 | struct nfs_pageio_descriptor pgio; | ||
544 | 538 | ||
539 | if (data->pnfs_error) { | ||
540 | nfs_pageio_init_read_mds(&pgio, data->inode); | ||
541 | pgio.pg_recoalesce = 1; | ||
542 | } | ||
545 | while (!list_empty(&data->pages)) { | 543 | while (!list_empty(&data->pages)) { |
546 | struct nfs_page *req = nfs_list_entry(data->pages.next); | 544 | struct nfs_page *req = nfs_list_entry(data->pages.next); |
547 | 545 | ||
548 | nfs_list_remove_request(req); | 546 | nfs_list_remove_request(req); |
549 | nfs_readpage_release(req); | 547 | if (!data->pnfs_error) |
548 | nfs_readpage_release(req); | ||
549 | else | ||
550 | nfs_pageio_add_request(&pgio, req); | ||
550 | } | 551 | } |
552 | if (data->pnfs_error) | ||
553 | nfs_pageio_complete(&pgio); | ||
551 | nfs_readdata_release(calldata); | 554 | nfs_readdata_release(calldata); |
552 | } | 555 | } |
553 | 556 | ||
@@ -648,7 +651,6 @@ readpage_async_filler(void *data, struct page *page) | |||
648 | return 0; | 651 | return 0; |
649 | out_error: | 652 | out_error: |
650 | error = PTR_ERR(new); | 653 | error = PTR_ERR(new); |
651 | SetPageError(page); | ||
652 | out_unlock: | 654 | out_unlock: |
653 | unlock_page(page); | 655 | unlock_page(page); |
654 | return error; | 656 | return error; |
@@ -711,16 +713,10 @@ int __init nfs_init_readpagecache(void) | |||
711 | if (nfs_rdata_cachep == NULL) | 713 | if (nfs_rdata_cachep == NULL) |
712 | return -ENOMEM; | 714 | return -ENOMEM; |
713 | 715 | ||
714 | nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, | ||
715 | nfs_rdata_cachep); | ||
716 | if (nfs_rdata_mempool == NULL) | ||
717 | return -ENOMEM; | ||
718 | |||
719 | return 0; | 716 | return 0; |
720 | } | 717 | } |
721 | 718 | ||
722 | void nfs_destroy_readpagecache(void) | 719 | void nfs_destroy_readpagecache(void) |
723 | { | 720 | { |
724 | mempool_destroy(nfs_rdata_mempool); | ||
725 | kmem_cache_destroy(nfs_rdata_cachep); | 721 | kmem_cache_destroy(nfs_rdata_cachep); |
726 | } | 722 | } |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b961ceac66b4..480b3b6bf71e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -733,18 +733,22 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
733 | 733 | ||
734 | return 0; | 734 | return 0; |
735 | } | 735 | } |
736 | |||
737 | #ifdef CONFIG_NFS_V4 | ||
736 | #ifdef CONFIG_NFS_V4_1 | 738 | #ifdef CONFIG_NFS_V4_1 |
737 | void show_sessions(struct seq_file *m, struct nfs_server *server) | 739 | static void show_sessions(struct seq_file *m, struct nfs_server *server) |
738 | { | 740 | { |
739 | if (nfs4_has_session(server->nfs_client)) | 741 | if (nfs4_has_session(server->nfs_client)) |
740 | seq_printf(m, ",sessions"); | 742 | seq_printf(m, ",sessions"); |
741 | } | 743 | } |
742 | #else | 744 | #else |
743 | void show_sessions(struct seq_file *m, struct nfs_server *server) {} | 745 | static void show_sessions(struct seq_file *m, struct nfs_server *server) {} |
746 | #endif | ||
744 | #endif | 747 | #endif |
745 | 748 | ||
749 | #ifdef CONFIG_NFS_V4 | ||
746 | #ifdef CONFIG_NFS_V4_1 | 750 | #ifdef CONFIG_NFS_V4_1 |
747 | void show_pnfs(struct seq_file *m, struct nfs_server *server) | 751 | static void show_pnfs(struct seq_file *m, struct nfs_server *server) |
748 | { | 752 | { |
749 | seq_printf(m, ",pnfs="); | 753 | seq_printf(m, ",pnfs="); |
750 | if (server->pnfs_curr_ld) | 754 | if (server->pnfs_curr_ld) |
@@ -752,9 +756,10 @@ void show_pnfs(struct seq_file *m, struct nfs_server *server) | |||
752 | else | 756 | else |
753 | seq_printf(m, "not configured"); | 757 | seq_printf(m, "not configured"); |
754 | } | 758 | } |
755 | #else /* CONFIG_NFS_V4_1 */ | 759 | #else |
756 | void show_pnfs(struct seq_file *m, struct nfs_server *server) {} | 760 | static void show_pnfs(struct seq_file *m, struct nfs_server *server) {} |
757 | #endif /* CONFIG_NFS_V4_1 */ | 761 | #endif |
762 | #endif | ||
758 | 763 | ||
759 | static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) | 764 | static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) |
760 | { | 765 | { |
@@ -2035,9 +2040,6 @@ static inline void nfs_initialise_sb(struct super_block *sb) | |||
2035 | sb->s_blocksize = nfs_block_bits(server->wsize, | 2040 | sb->s_blocksize = nfs_block_bits(server->wsize, |
2036 | &sb->s_blocksize_bits); | 2041 | &sb->s_blocksize_bits); |
2037 | 2042 | ||
2038 | if (server->flags & NFS_MOUNT_NOAC) | ||
2039 | sb->s_flags |= MS_SYNCHRONOUS; | ||
2040 | |||
2041 | sb->s_bdi = &server->backing_dev_info; | 2043 | sb->s_bdi = &server->backing_dev_info; |
2042 | 2044 | ||
2043 | nfs_super_set_maxbytes(sb, server->maxfilesize); | 2045 | nfs_super_set_maxbytes(sb, server->maxfilesize); |
@@ -2249,6 +2251,10 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, | |||
2249 | if (server->flags & NFS_MOUNT_UNSHARED) | 2251 | if (server->flags & NFS_MOUNT_UNSHARED) |
2250 | compare_super = NULL; | 2252 | compare_super = NULL; |
2251 | 2253 | ||
2254 | /* -o noac implies -o sync */ | ||
2255 | if (server->flags & NFS_MOUNT_NOAC) | ||
2256 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2257 | |||
2252 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2258 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2253 | s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2259 | s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2254 | if (IS_ERR(s)) { | 2260 | if (IS_ERR(s)) { |
@@ -2361,6 +2367,10 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, | |||
2361 | if (server->flags & NFS_MOUNT_UNSHARED) | 2367 | if (server->flags & NFS_MOUNT_UNSHARED) |
2362 | compare_super = NULL; | 2368 | compare_super = NULL; |
2363 | 2369 | ||
2370 | /* -o noac implies -o sync */ | ||
2371 | if (server->flags & NFS_MOUNT_NOAC) | ||
2372 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2373 | |||
2364 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2374 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2365 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2375 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2366 | if (IS_ERR(s)) { | 2376 | if (IS_ERR(s)) { |
@@ -2628,6 +2638,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, | |||
2628 | if (server->flags & NFS4_MOUNT_UNSHARED) | 2638 | if (server->flags & NFS4_MOUNT_UNSHARED) |
2629 | compare_super = NULL; | 2639 | compare_super = NULL; |
2630 | 2640 | ||
2641 | /* -o noac implies -o sync */ | ||
2642 | if (server->flags & NFS_MOUNT_NOAC) | ||
2643 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2644 | |||
2631 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2645 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2632 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2646 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2633 | if (IS_ERR(s)) { | 2647 | if (IS_ERR(s)) { |
@@ -2789,7 +2803,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, | |||
2789 | goto out_put_mnt_ns; | 2803 | goto out_put_mnt_ns; |
2790 | 2804 | ||
2791 | ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, | 2805 | ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, |
2792 | export_path, LOOKUP_FOLLOW, &path); | 2806 | export_path, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); |
2793 | 2807 | ||
2794 | nfs_referral_loop_unprotect(); | 2808 | nfs_referral_loop_unprotect(); |
2795 | put_mnt_ns(ns_private); | 2809 | put_mnt_ns(ns_private); |
@@ -2916,6 +2930,10 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, | |||
2916 | if (server->flags & NFS4_MOUNT_UNSHARED) | 2930 | if (server->flags & NFS4_MOUNT_UNSHARED) |
2917 | compare_super = NULL; | 2931 | compare_super = NULL; |
2918 | 2932 | ||
2933 | /* -o noac implies -o sync */ | ||
2934 | if (server->flags & NFS_MOUNT_NOAC) | ||
2935 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2936 | |||
2919 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2937 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2920 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2938 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2921 | if (IS_ERR(s)) { | 2939 | if (IS_ERR(s)) { |
@@ -3003,6 +3021,10 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, | |||
3003 | if (server->flags & NFS4_MOUNT_UNSHARED) | 3021 | if (server->flags & NFS4_MOUNT_UNSHARED) |
3004 | compare_super = NULL; | 3022 | compare_super = NULL; |
3005 | 3023 | ||
3024 | /* -o noac implies -o sync */ | ||
3025 | if (server->flags & NFS_MOUNT_NOAC) | ||
3026 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
3027 | |||
3006 | /* Get a superblock - note that we may end up sharing one that already exists */ | 3028 | /* Get a superblock - note that we may end up sharing one that already exists */ |
3007 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 3029 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
3008 | if (IS_ERR(s)) { | 3030 | if (IS_ERR(s)) { |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index b2fbbde58e44..4f9319a2e567 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -87,7 +87,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) | |||
87 | struct inode *dir = data->dir; | 87 | struct inode *dir = data->dir; |
88 | 88 | ||
89 | if (!NFS_PROTO(dir)->unlink_done(task, dir)) | 89 | if (!NFS_PROTO(dir)->unlink_done(task, dir)) |
90 | nfs_restart_rpc(task, NFS_SERVER(dir)->nfs_client); | 90 | rpc_restart_call_prepare(task); |
91 | } | 91 | } |
92 | 92 | ||
93 | /** | 93 | /** |
@@ -369,7 +369,7 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) | |||
369 | struct dentry *new_dentry = data->new_dentry; | 369 | struct dentry *new_dentry = data->new_dentry; |
370 | 370 | ||
371 | if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { | 371 | if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { |
372 | nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); | 372 | rpc_restart_call_prepare(task); |
373 | return; | 373 | return; |
374 | } | 374 | } |
375 | 375 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b39b37f80913..2219c88d96b2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -390,7 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
390 | error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); | 390 | error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); |
391 | BUG_ON(error); | 391 | BUG_ON(error); |
392 | if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) | 392 | if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) |
393 | nfsi->change_attr++; | 393 | inode->i_version++; |
394 | set_bit(PG_MAPPED, &req->wb_flags); | 394 | set_bit(PG_MAPPED, &req->wb_flags); |
395 | SetPagePrivate(req->wb_page); | 395 | SetPagePrivate(req->wb_page); |
396 | set_page_private(req->wb_page, (unsigned long)req); | 396 | set_page_private(req->wb_page, (unsigned long)req); |
@@ -428,7 +428,6 @@ static void | |||
428 | nfs_mark_request_dirty(struct nfs_page *req) | 428 | nfs_mark_request_dirty(struct nfs_page *req) |
429 | { | 429 | { |
430 | __set_page_dirty_nobuffers(req->wb_page); | 430 | __set_page_dirty_nobuffers(req->wb_page); |
431 | __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC); | ||
432 | } | 431 | } |
433 | 432 | ||
434 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 433 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
@@ -762,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
762 | status = nfs_writepage_setup(ctx, page, offset, count); | 761 | status = nfs_writepage_setup(ctx, page, offset, count); |
763 | if (status < 0) | 762 | if (status < 0) |
764 | nfs_set_pageerror(page); | 763 | nfs_set_pageerror(page); |
764 | else | ||
765 | __set_page_dirty_nobuffers(page); | ||
765 | 766 | ||
766 | dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", | 767 | dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", |
767 | status, (long long)i_size_read(inode)); | 768 | status, (long long)i_size_read(inode)); |
@@ -958,7 +959,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head | |||
958 | if (!data) | 959 | if (!data) |
959 | goto out_bad; | 960 | goto out_bad; |
960 | data->pagevec[0] = page; | 961 | data->pagevec[0] = page; |
961 | nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); | 962 | nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); |
962 | list_add(&data->list, res); | 963 | list_add(&data->list, res); |
963 | requests++; | 964 | requests++; |
964 | nbytes -= len; | 965 | nbytes -= len; |
@@ -1010,7 +1011,6 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r | |||
1010 | req = nfs_list_entry(head->next); | 1011 | req = nfs_list_entry(head->next); |
1011 | nfs_list_remove_request(req); | 1012 | nfs_list_remove_request(req); |
1012 | nfs_list_add_request(req, &data->pages); | 1013 | nfs_list_add_request(req, &data->pages); |
1013 | ClearPageError(req->wb_page); | ||
1014 | *pages++ = req->wb_page; | 1014 | *pages++ = req->wb_page; |
1015 | } | 1015 | } |
1016 | req = nfs_list_entry(data->pages.next); | 1016 | req = nfs_list_entry(data->pages.next); |
@@ -1165,7 +1165,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) | |||
1165 | static void nfs_writeback_release_full(void *calldata) | 1165 | static void nfs_writeback_release_full(void *calldata) |
1166 | { | 1166 | { |
1167 | struct nfs_write_data *data = calldata; | 1167 | struct nfs_write_data *data = calldata; |
1168 | int status = data->task.tk_status; | 1168 | int ret, status = data->task.tk_status; |
1169 | struct nfs_pageio_descriptor pgio; | ||
1170 | |||
1171 | if (data->pnfs_error) { | ||
1172 | nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE); | ||
1173 | pgio.pg_recoalesce = 1; | ||
1174 | } | ||
1169 | 1175 | ||
1170 | /* Update attributes as result of writeback. */ | 1176 | /* Update attributes as result of writeback. */ |
1171 | while (!list_empty(&data->pages)) { | 1177 | while (!list_empty(&data->pages)) { |
@@ -1181,6 +1187,11 @@ static void nfs_writeback_release_full(void *calldata) | |||
1181 | req->wb_bytes, | 1187 | req->wb_bytes, |
1182 | (long long)req_offset(req)); | 1188 | (long long)req_offset(req)); |
1183 | 1189 | ||
1190 | if (data->pnfs_error) { | ||
1191 | dprintk(", pnfs error = %d\n", data->pnfs_error); | ||
1192 | goto next; | ||
1193 | } | ||
1194 | |||
1184 | if (status < 0) { | 1195 | if (status < 0) { |
1185 | nfs_set_pageerror(page); | 1196 | nfs_set_pageerror(page); |
1186 | nfs_context_set_write_error(req->wb_context, status); | 1197 | nfs_context_set_write_error(req->wb_context, status); |
@@ -1200,7 +1211,19 @@ remove_request: | |||
1200 | next: | 1211 | next: |
1201 | nfs_clear_page_tag_locked(req); | 1212 | nfs_clear_page_tag_locked(req); |
1202 | nfs_end_page_writeback(page); | 1213 | nfs_end_page_writeback(page); |
1214 | if (data->pnfs_error) { | ||
1215 | lock_page(page); | ||
1216 | nfs_pageio_cond_complete(&pgio, page->index); | ||
1217 | ret = nfs_page_async_flush(&pgio, page, 0); | ||
1218 | if (ret) { | ||
1219 | nfs_set_pageerror(page); | ||
1220 | dprintk("rewrite to MDS error = %d\n", ret); | ||
1221 | } | ||
1222 | unlock_page(page); | ||
1223 | } | ||
1203 | } | 1224 | } |
1225 | if (data->pnfs_error) | ||
1226 | nfs_pageio_complete(&pgio); | ||
1204 | nfs_writedata_release(calldata); | 1227 | nfs_writedata_release(calldata); |
1205 | } | 1228 | } |
1206 | 1229 | ||
@@ -1281,7 +1304,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1281 | */ | 1304 | */ |
1282 | argp->stable = NFS_FILE_SYNC; | 1305 | argp->stable = NFS_FILE_SYNC; |
1283 | } | 1306 | } |
1284 | nfs_restart_rpc(task, server->nfs_client); | 1307 | rpc_restart_call_prepare(task); |
1285 | return; | 1308 | return; |
1286 | } | 1309 | } |
1287 | if (time_before(complain, jiffies)) { | 1310 | if (time_before(complain, jiffies)) { |
@@ -1553,6 +1576,10 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr | |||
1553 | int flags = FLUSH_SYNC; | 1576 | int flags = FLUSH_SYNC; |
1554 | int ret = 0; | 1577 | int ret = 0; |
1555 | 1578 | ||
1579 | /* no commits means nothing needs to be done */ | ||
1580 | if (!nfsi->ncommit) | ||
1581 | return ret; | ||
1582 | |||
1556 | if (wbc->sync_mode == WB_SYNC_NONE) { | 1583 | if (wbc->sync_mode == WB_SYNC_NONE) { |
1557 | /* Don't commit yet if this is a non-blocking flush and there | 1584 | /* Don't commit yet if this is a non-blocking flush and there |
1558 | * are a lot of outstanding writes for this mapping. | 1585 | * are a lot of outstanding writes for this mapping. |
@@ -1686,34 +1713,20 @@ out_error: | |||
1686 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | 1713 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, |
1687 | struct page *page) | 1714 | struct page *page) |
1688 | { | 1715 | { |
1689 | struct nfs_page *req; | 1716 | /* |
1690 | int ret; | 1717 | * If PagePrivate is set, then the page is currently associated with |
1718 | * an in-progress read or write request. Don't try to migrate it. | ||
1719 | * | ||
1720 | * FIXME: we could do this in principle, but we'll need a way to ensure | ||
1721 | * that we can safely release the inode reference while holding | ||
1722 | * the page lock. | ||
1723 | */ | ||
1724 | if (PagePrivate(page)) | ||
1725 | return -EBUSY; | ||
1691 | 1726 | ||
1692 | nfs_fscache_release_page(page, GFP_KERNEL); | 1727 | nfs_fscache_release_page(page, GFP_KERNEL); |
1693 | 1728 | ||
1694 | req = nfs_find_and_lock_request(page, false); | 1729 | return migrate_page(mapping, newpage, page); |
1695 | ret = PTR_ERR(req); | ||
1696 | if (IS_ERR(req)) | ||
1697 | goto out; | ||
1698 | |||
1699 | ret = migrate_page(mapping, newpage, page); | ||
1700 | if (!req) | ||
1701 | goto out; | ||
1702 | if (ret) | ||
1703 | goto out_unlock; | ||
1704 | page_cache_get(newpage); | ||
1705 | spin_lock(&mapping->host->i_lock); | ||
1706 | req->wb_page = newpage; | ||
1707 | SetPagePrivate(newpage); | ||
1708 | set_page_private(newpage, (unsigned long)req); | ||
1709 | ClearPagePrivate(page); | ||
1710 | set_page_private(page, 0); | ||
1711 | spin_unlock(&mapping->host->i_lock); | ||
1712 | page_cache_release(page); | ||
1713 | out_unlock: | ||
1714 | nfs_clear_page_tag_locked(req); | ||
1715 | out: | ||
1716 | return ret; | ||
1717 | } | 1730 | } |
1718 | #endif | 1731 | #endif |
1719 | 1732 | ||
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index f4cc1e2bfc54..62f3b9074e84 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/exportfs.h> | 17 | #include <linux/exportfs.h> |
18 | 18 | ||
19 | #include <linux/nfsd/syscall.h> | ||
20 | #include <net/ipv6.h> | 19 | #include <net/ipv6.h> |
21 | 20 | ||
22 | #include "nfsd.h" | 21 | #include "nfsd.h" |
@@ -318,7 +317,6 @@ static void svc_export_put(struct kref *ref) | |||
318 | struct svc_export *exp = container_of(ref, struct svc_export, h.ref); | 317 | struct svc_export *exp = container_of(ref, struct svc_export, h.ref); |
319 | path_put(&exp->ex_path); | 318 | path_put(&exp->ex_path); |
320 | auth_domain_put(exp->ex_client); | 319 | auth_domain_put(exp->ex_client); |
321 | kfree(exp->ex_pathname); | ||
322 | nfsd4_fslocs_free(&exp->ex_fslocs); | 320 | nfsd4_fslocs_free(&exp->ex_fslocs); |
323 | kfree(exp); | 321 | kfree(exp); |
324 | } | 322 | } |
@@ -528,11 +526,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
528 | 526 | ||
529 | exp.ex_client = dom; | 527 | exp.ex_client = dom; |
530 | 528 | ||
531 | err = -ENOMEM; | ||
532 | exp.ex_pathname = kstrdup(buf, GFP_KERNEL); | ||
533 | if (!exp.ex_pathname) | ||
534 | goto out2; | ||
535 | |||
536 | /* expiry */ | 529 | /* expiry */ |
537 | err = -EINVAL; | 530 | err = -EINVAL; |
538 | exp.h.expiry_time = get_expiry(&mesg); | 531 | exp.h.expiry_time = get_expiry(&mesg); |
@@ -613,8 +606,6 @@ out4: | |||
613 | nfsd4_fslocs_free(&exp.ex_fslocs); | 606 | nfsd4_fslocs_free(&exp.ex_fslocs); |
614 | kfree(exp.ex_uuid); | 607 | kfree(exp.ex_uuid); |
615 | out3: | 608 | out3: |
616 | kfree(exp.ex_pathname); | ||
617 | out2: | ||
618 | path_put(&exp.ex_path); | 609 | path_put(&exp.ex_path); |
619 | out1: | 610 | out1: |
620 | auth_domain_put(dom); | 611 | auth_domain_put(dom); |
@@ -678,7 +669,6 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) | |||
678 | new->ex_client = item->ex_client; | 669 | new->ex_client = item->ex_client; |
679 | new->ex_path.dentry = dget(item->ex_path.dentry); | 670 | new->ex_path.dentry = dget(item->ex_path.dentry); |
680 | new->ex_path.mnt = mntget(item->ex_path.mnt); | 671 | new->ex_path.mnt = mntget(item->ex_path.mnt); |
681 | new->ex_pathname = NULL; | ||
682 | new->ex_fslocs.locations = NULL; | 672 | new->ex_fslocs.locations = NULL; |
683 | new->ex_fslocs.locations_count = 0; | 673 | new->ex_fslocs.locations_count = 0; |
684 | new->ex_fslocs.migrated = 0; | 674 | new->ex_fslocs.migrated = 0; |
@@ -696,8 +686,6 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
696 | new->ex_fsid = item->ex_fsid; | 686 | new->ex_fsid = item->ex_fsid; |
697 | new->ex_uuid = item->ex_uuid; | 687 | new->ex_uuid = item->ex_uuid; |
698 | item->ex_uuid = NULL; | 688 | item->ex_uuid = NULL; |
699 | new->ex_pathname = item->ex_pathname; | ||
700 | item->ex_pathname = NULL; | ||
701 | new->ex_fslocs.locations = item->ex_fslocs.locations; | 689 | new->ex_fslocs.locations = item->ex_fslocs.locations; |
702 | item->ex_fslocs.locations = NULL; | 690 | item->ex_fslocs.locations = NULL; |
703 | new->ex_fslocs.locations_count = item->ex_fslocs.locations_count; | 691 | new->ex_fslocs.locations_count = item->ex_fslocs.locations_count; |
@@ -1010,7 +998,7 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) | |||
1010 | return exp; | 998 | return exp; |
1011 | } | 999 | } |
1012 | 1000 | ||
1013 | static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp) | 1001 | struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp) |
1014 | { | 1002 | { |
1015 | u32 fsidv[2]; | 1003 | u32 fsidv[2]; |
1016 | 1004 | ||
@@ -1030,7 +1018,7 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) | |||
1030 | struct svc_export *exp; | 1018 | struct svc_export *exp; |
1031 | __be32 rv; | 1019 | __be32 rv; |
1032 | 1020 | ||
1033 | exp = find_fsidzero_export(rqstp); | 1021 | exp = rqst_find_fsidzero_export(rqstp); |
1034 | if (IS_ERR(exp)) | 1022 | if (IS_ERR(exp)) |
1035 | return nfserrno(PTR_ERR(exp)); | 1023 | return nfserrno(PTR_ERR(exp)); |
1036 | rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); | 1024 | rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 02eb4edf0ece..7748d6a18d97 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -39,6 +39,8 @@ | |||
39 | 39 | ||
40 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 40 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
41 | 41 | ||
42 | static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); | ||
43 | |||
42 | #define NFSPROC4_CB_NULL 0 | 44 | #define NFSPROC4_CB_NULL 0 |
43 | #define NFSPROC4_CB_COMPOUND 1 | 45 | #define NFSPROC4_CB_COMPOUND 1 |
44 | 46 | ||
@@ -351,7 +353,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, | |||
351 | __be32 *p; | 353 | __be32 *p; |
352 | 354 | ||
353 | encode_nfs_cb_opnum4(xdr, OP_CB_RECALL); | 355 | encode_nfs_cb_opnum4(xdr, OP_CB_RECALL); |
354 | encode_stateid4(xdr, &dp->dl_stateid); | 356 | encode_stateid4(xdr, &dp->dl_stid.sc_stateid); |
355 | 357 | ||
356 | p = xdr_reserve_space(xdr, 4); | 358 | p = xdr_reserve_space(xdr, 4); |
357 | *p++ = xdr_zero; /* truncate */ | 359 | *p++ = xdr_zero; /* truncate */ |
@@ -460,6 +462,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, | |||
460 | */ | 462 | */ |
461 | status = 0; | 463 | status = 0; |
462 | out: | 464 | out: |
465 | if (status) | ||
466 | nfsd4_mark_cb_fault(cb->cb_clp, status); | ||
463 | return status; | 467 | return status; |
464 | out_overflow: | 468 | out_overflow: |
465 | print_overflow_msg(__func__, xdr); | 469 | print_overflow_msg(__func__, xdr); |
@@ -686,6 +690,12 @@ static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) | |||
686 | warn_no_callback_path(clp, reason); | 690 | warn_no_callback_path(clp, reason); |
687 | } | 691 | } |
688 | 692 | ||
693 | static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) | ||
694 | { | ||
695 | clp->cl_cb_state = NFSD4_CB_FAULT; | ||
696 | warn_no_callback_path(clp, reason); | ||
697 | } | ||
698 | |||
689 | static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) | 699 | static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) |
690 | { | 700 | { |
691 | struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); | 701 | struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); |
@@ -787,7 +797,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) | |||
787 | { | 797 | { |
788 | struct nfsd4_callback *cb = calldata; | 798 | struct nfsd4_callback *cb = calldata; |
789 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | 799 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); |
790 | struct nfs4_client *clp = dp->dl_client; | 800 | struct nfs4_client *clp = dp->dl_stid.sc_client; |
791 | u32 minorversion = clp->cl_minorversion; | 801 | u32 minorversion = clp->cl_minorversion; |
792 | 802 | ||
793 | cb->cb_minorversion = minorversion; | 803 | cb->cb_minorversion = minorversion; |
@@ -809,7 +819,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) | |||
809 | { | 819 | { |
810 | struct nfsd4_callback *cb = calldata; | 820 | struct nfsd4_callback *cb = calldata; |
811 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | 821 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); |
812 | struct nfs4_client *clp = dp->dl_client; | 822 | struct nfs4_client *clp = dp->dl_stid.sc_client; |
813 | 823 | ||
814 | dprintk("%s: minorversion=%d\n", __func__, | 824 | dprintk("%s: minorversion=%d\n", __func__, |
815 | clp->cl_minorversion); | 825 | clp->cl_minorversion); |
@@ -832,7 +842,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | |||
832 | { | 842 | { |
833 | struct nfsd4_callback *cb = calldata; | 843 | struct nfsd4_callback *cb = calldata; |
834 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | 844 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); |
835 | struct nfs4_client *clp = dp->dl_client; | 845 | struct nfs4_client *clp = dp->dl_stid.sc_client; |
836 | struct rpc_clnt *current_rpc_client = clp->cl_cb_client; | 846 | struct rpc_clnt *current_rpc_client = clp->cl_cb_client; |
837 | 847 | ||
838 | nfsd4_cb_done(task, calldata); | 848 | nfsd4_cb_done(task, calldata); |
@@ -1006,7 +1016,7 @@ void nfsd4_do_callback_rpc(struct work_struct *w) | |||
1006 | void nfsd4_cb_recall(struct nfs4_delegation *dp) | 1016 | void nfsd4_cb_recall(struct nfs4_delegation *dp) |
1007 | { | 1017 | { |
1008 | struct nfsd4_callback *cb = &dp->dl_recall; | 1018 | struct nfsd4_callback *cb = &dp->dl_recall; |
1009 | struct nfs4_client *clp = dp->dl_client; | 1019 | struct nfs4_client *clp = dp->dl_stid.sc_client; |
1010 | 1020 | ||
1011 | dp->dl_retries = 1; | 1021 | dp->dl_retries = 1; |
1012 | cb->cb_op = dp; | 1022 | cb->cb_op = dp; |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e80777666618..fa383361bc61 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/file.h> | 35 | #include <linux/file.h> |
36 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | 37 | ||
38 | #include "idmap.h" | ||
38 | #include "cache.h" | 39 | #include "cache.h" |
39 | #include "xdr4.h" | 40 | #include "xdr4.h" |
40 | #include "vfs.h" | 41 | #include "vfs.h" |
@@ -156,6 +157,8 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs | |||
156 | !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) | 157 | !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) |
157 | return nfserr_inval; | 158 | return nfserr_inval; |
158 | 159 | ||
160 | accmode |= NFSD_MAY_READ_IF_EXEC; | ||
161 | |||
159 | if (open->op_share_access & NFS4_SHARE_ACCESS_READ) | 162 | if (open->op_share_access & NFS4_SHARE_ACCESS_READ) |
160 | accmode |= NFSD_MAY_READ; | 163 | accmode |= NFSD_MAY_READ; |
161 | if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) | 164 | if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) |
@@ -168,12 +171,29 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs | |||
168 | return status; | 171 | return status; |
169 | } | 172 | } |
170 | 173 | ||
174 | static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) | ||
175 | { | ||
176 | umode_t mode = fh->fh_dentry->d_inode->i_mode; | ||
177 | |||
178 | if (S_ISREG(mode)) | ||
179 | return nfs_ok; | ||
180 | if (S_ISDIR(mode)) | ||
181 | return nfserr_isdir; | ||
182 | /* | ||
183 | * Using err_symlink as our catch-all case may look odd; but | ||
184 | * there's no other obvious error for this case in 4.0, and we | ||
185 | * happen to know that it will cause the linux v4 client to do | ||
186 | * the right thing on attempts to open something other than a | ||
187 | * regular file. | ||
188 | */ | ||
189 | return nfserr_symlink; | ||
190 | } | ||
191 | |||
171 | static __be32 | 192 | static __be32 |
172 | do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) | 193 | do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) |
173 | { | 194 | { |
174 | struct svc_fh resfh; | 195 | struct svc_fh resfh; |
175 | __be32 status; | 196 | __be32 status; |
176 | int created = 0; | ||
177 | 197 | ||
178 | fh_init(&resfh, NFS4_FHSIZE); | 198 | fh_init(&resfh, NFS4_FHSIZE); |
179 | open->op_truncate = 0; | 199 | open->op_truncate = 0; |
@@ -202,7 +222,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
202 | open->op_fname.len, &open->op_iattr, | 222 | open->op_fname.len, &open->op_iattr, |
203 | &resfh, open->op_createmode, | 223 | &resfh, open->op_createmode, |
204 | (u32 *)open->op_verf.data, | 224 | (u32 *)open->op_verf.data, |
205 | &open->op_truncate, &created); | 225 | &open->op_truncate, &open->op_created); |
206 | 226 | ||
207 | /* | 227 | /* |
208 | * Following rfc 3530 14.2.16, use the returned bitmask | 228 | * Following rfc 3530 14.2.16, use the returned bitmask |
@@ -216,6 +236,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
216 | status = nfsd_lookup(rqstp, current_fh, | 236 | status = nfsd_lookup(rqstp, current_fh, |
217 | open->op_fname.data, open->op_fname.len, &resfh); | 237 | open->op_fname.data, open->op_fname.len, &resfh); |
218 | fh_unlock(current_fh); | 238 | fh_unlock(current_fh); |
239 | if (status) | ||
240 | goto out; | ||
241 | status = nfsd_check_obj_isreg(&resfh); | ||
219 | } | 242 | } |
220 | if (status) | 243 | if (status) |
221 | goto out; | 244 | goto out; |
@@ -227,9 +250,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
227 | fh_dup2(current_fh, &resfh); | 250 | fh_dup2(current_fh, &resfh); |
228 | 251 | ||
229 | /* set reply cache */ | 252 | /* set reply cache */ |
230 | fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, | 253 | fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, |
231 | &resfh.fh_handle); | 254 | &resfh.fh_handle); |
232 | if (!created) | 255 | if (!open->op_created) |
233 | status = do_open_permission(rqstp, current_fh, open, | 256 | status = do_open_permission(rqstp, current_fh, open, |
234 | NFSD_MAY_NOP); | 257 | NFSD_MAY_NOP); |
235 | 258 | ||
@@ -254,7 +277,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ | |||
254 | memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); | 277 | memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); |
255 | 278 | ||
256 | /* set replay cache */ | 279 | /* set replay cache */ |
257 | fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, | 280 | fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, |
258 | ¤t_fh->fh_handle); | 281 | ¤t_fh->fh_handle); |
259 | 282 | ||
260 | open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && | 283 | open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && |
@@ -283,14 +306,18 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
283 | __be32 status; | 306 | __be32 status; |
284 | struct nfsd4_compoundres *resp; | 307 | struct nfsd4_compoundres *resp; |
285 | 308 | ||
286 | dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", | 309 | dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", |
287 | (int)open->op_fname.len, open->op_fname.data, | 310 | (int)open->op_fname.len, open->op_fname.data, |
288 | open->op_stateowner); | 311 | open->op_openowner); |
289 | 312 | ||
290 | /* This check required by spec. */ | 313 | /* This check required by spec. */ |
291 | if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) | 314 | if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) |
292 | return nfserr_inval; | 315 | return nfserr_inval; |
293 | 316 | ||
317 | /* We don't yet support WANT bits: */ | ||
318 | open->op_share_access &= NFS4_SHARE_ACCESS_MASK; | ||
319 | |||
320 | open->op_created = 0; | ||
294 | /* | 321 | /* |
295 | * RFC5661 18.51.3 | 322 | * RFC5661 18.51.3 |
296 | * Before RECLAIM_COMPLETE done, server should deny new lock | 323 | * Before RECLAIM_COMPLETE done, server should deny new lock |
@@ -309,7 +336,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
309 | resp = rqstp->rq_resp; | 336 | resp = rqstp->rq_resp; |
310 | status = nfsd4_process_open1(&resp->cstate, open); | 337 | status = nfsd4_process_open1(&resp->cstate, open); |
311 | if (status == nfserr_replay_me) { | 338 | if (status == nfserr_replay_me) { |
312 | struct nfs4_replay *rp = &open->op_stateowner->so_replay; | 339 | struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; |
313 | fh_put(&cstate->current_fh); | 340 | fh_put(&cstate->current_fh); |
314 | fh_copy_shallow(&cstate->current_fh.fh_handle, | 341 | fh_copy_shallow(&cstate->current_fh.fh_handle, |
315 | &rp->rp_openfh); | 342 | &rp->rp_openfh); |
@@ -339,32 +366,23 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
339 | switch (open->op_claim_type) { | 366 | switch (open->op_claim_type) { |
340 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: | 367 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: |
341 | case NFS4_OPEN_CLAIM_NULL: | 368 | case NFS4_OPEN_CLAIM_NULL: |
342 | /* | ||
343 | * (1) set CURRENT_FH to the file being opened, | ||
344 | * creating it if necessary, (2) set open->op_cinfo, | ||
345 | * (3) set open->op_truncate if the file is to be | ||
346 | * truncated after opening, (4) do permission checking. | ||
347 | */ | ||
348 | status = do_open_lookup(rqstp, &cstate->current_fh, | 369 | status = do_open_lookup(rqstp, &cstate->current_fh, |
349 | open); | 370 | open); |
350 | if (status) | 371 | if (status) |
351 | goto out; | 372 | goto out; |
352 | break; | 373 | break; |
353 | case NFS4_OPEN_CLAIM_PREVIOUS: | 374 | case NFS4_OPEN_CLAIM_PREVIOUS: |
354 | open->op_stateowner->so_confirmed = 1; | 375 | open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; |
355 | /* | 376 | case NFS4_OPEN_CLAIM_FH: |
356 | * The CURRENT_FH is already set to the file being | 377 | case NFS4_OPEN_CLAIM_DELEG_CUR_FH: |
357 | * opened. (1) set open->op_cinfo, (2) set | ||
358 | * open->op_truncate if the file is to be truncated | ||
359 | * after opening, (3) do permission checking. | ||
360 | */ | ||
361 | status = do_open_fhandle(rqstp, &cstate->current_fh, | 378 | status = do_open_fhandle(rqstp, &cstate->current_fh, |
362 | open); | 379 | open); |
363 | if (status) | 380 | if (status) |
364 | goto out; | 381 | goto out; |
365 | break; | 382 | break; |
383 | case NFS4_OPEN_CLAIM_DELEG_PREV_FH: | ||
366 | case NFS4_OPEN_CLAIM_DELEGATE_PREV: | 384 | case NFS4_OPEN_CLAIM_DELEGATE_PREV: |
367 | open->op_stateowner->so_confirmed = 1; | 385 | open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; |
368 | dprintk("NFSD: unsupported OPEN claim type %d\n", | 386 | dprintk("NFSD: unsupported OPEN claim type %d\n", |
369 | open->op_claim_type); | 387 | open->op_claim_type); |
370 | status = nfserr_notsupp; | 388 | status = nfserr_notsupp; |
@@ -381,12 +399,13 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
381 | * set, (2) sets open->op_stateid, (3) sets open->op_delegation. | 399 | * set, (2) sets open->op_stateid, (3) sets open->op_delegation. |
382 | */ | 400 | */ |
383 | status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); | 401 | status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); |
402 | WARN_ON(status && open->op_created); | ||
384 | out: | 403 | out: |
385 | if (open->op_stateowner) { | 404 | nfsd4_cleanup_open_state(open, status); |
386 | nfs4_get_stateowner(open->op_stateowner); | 405 | if (open->op_openowner) |
387 | cstate->replay_owner = open->op_stateowner; | 406 | cstate->replay_owner = &open->op_openowner->oo_owner; |
388 | } | 407 | else |
389 | nfs4_unlock_state(); | 408 | nfs4_unlock_state(); |
390 | return status; | 409 | return status; |
391 | } | 410 | } |
392 | 411 | ||
@@ -467,17 +486,12 @@ static __be32 | |||
467 | nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 486 | nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
468 | struct nfsd4_commit *commit) | 487 | struct nfsd4_commit *commit) |
469 | { | 488 | { |
470 | __be32 status; | ||
471 | |||
472 | u32 *p = (u32 *)commit->co_verf.data; | 489 | u32 *p = (u32 *)commit->co_verf.data; |
473 | *p++ = nfssvc_boot.tv_sec; | 490 | *p++ = nfssvc_boot.tv_sec; |
474 | *p++ = nfssvc_boot.tv_usec; | 491 | *p++ = nfssvc_boot.tv_usec; |
475 | 492 | ||
476 | status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, | 493 | return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, |
477 | commit->co_count); | 494 | commit->co_count); |
478 | if (status == nfserr_symlink) | ||
479 | status = nfserr_inval; | ||
480 | return status; | ||
481 | } | 495 | } |
482 | 496 | ||
483 | static __be32 | 497 | static __be32 |
@@ -492,8 +506,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
492 | 506 | ||
493 | status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, | 507 | status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, |
494 | NFSD_MAY_CREATE); | 508 | NFSD_MAY_CREATE); |
495 | if (status == nfserr_symlink) | ||
496 | status = nfserr_notdir; | ||
497 | if (status) | 509 | if (status) |
498 | return status; | 510 | return status; |
499 | 511 | ||
@@ -691,7 +703,7 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
691 | readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); | 703 | readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); |
692 | readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); | 704 | readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); |
693 | 705 | ||
694 | if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || | 706 | if ((cookie == 1) || (cookie == 2) || |
695 | (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) | 707 | (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) |
696 | return nfserr_bad_cookie; | 708 | return nfserr_bad_cookie; |
697 | 709 | ||
@@ -719,8 +731,6 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
719 | return nfserr_grace; | 731 | return nfserr_grace; |
720 | status = nfsd_unlink(rqstp, &cstate->current_fh, 0, | 732 | status = nfsd_unlink(rqstp, &cstate->current_fh, 0, |
721 | remove->rm_name, remove->rm_namelen); | 733 | remove->rm_name, remove->rm_namelen); |
722 | if (status == nfserr_symlink) | ||
723 | return nfserr_notdir; | ||
724 | if (!status) { | 734 | if (!status) { |
725 | fh_unlock(&cstate->current_fh); | 735 | fh_unlock(&cstate->current_fh); |
726 | set_change_info(&remove->rm_cinfo, &cstate->current_fh); | 736 | set_change_info(&remove->rm_cinfo, &cstate->current_fh); |
@@ -751,8 +761,6 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
751 | (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && | 761 | (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && |
752 | S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) | 762 | S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) |
753 | status = nfserr_exist; | 763 | status = nfserr_exist; |
754 | else if (status == nfserr_symlink) | ||
755 | status = nfserr_notdir; | ||
756 | 764 | ||
757 | if (!status) { | 765 | if (!status) { |
758 | set_change_info(&rename->rn_sinfo, &cstate->current_fh); | 766 | set_change_info(&rename->rn_sinfo, &cstate->current_fh); |
@@ -892,8 +900,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
892 | 900 | ||
893 | write->wr_bytes_written = cnt; | 901 | write->wr_bytes_written = cnt; |
894 | 902 | ||
895 | if (status == nfserr_symlink) | ||
896 | status = nfserr_inval; | ||
897 | return status; | 903 | return status; |
898 | } | 904 | } |
899 | 905 | ||
@@ -930,7 +936,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
930 | count = 4 + (verify->ve_attrlen >> 2); | 936 | count = 4 + (verify->ve_attrlen >> 2); |
931 | buf = kmalloc(count << 2, GFP_KERNEL); | 937 | buf = kmalloc(count << 2, GFP_KERNEL); |
932 | if (!buf) | 938 | if (!buf) |
933 | return nfserr_resource; | 939 | return nfserr_jukebox; |
934 | 940 | ||
935 | status = nfsd4_encode_fattr(&cstate->current_fh, | 941 | status = nfsd4_encode_fattr(&cstate->current_fh, |
936 | cstate->current_fh.fh_export, | 942 | cstate->current_fh.fh_export, |
@@ -994,6 +1000,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum) | |||
994 | 1000 | ||
995 | typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, | 1001 | typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, |
996 | void *); | 1002 | void *); |
1003 | typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op); | ||
1004 | |||
997 | enum nfsd4_op_flags { | 1005 | enum nfsd4_op_flags { |
998 | ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ | 1006 | ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ |
999 | ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ | 1007 | ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ |
@@ -1001,13 +1009,15 @@ enum nfsd4_op_flags { | |||
1001 | /* For rfc 5661 section 2.6.3.1.1: */ | 1009 | /* For rfc 5661 section 2.6.3.1.1: */ |
1002 | OP_HANDLES_WRONGSEC = 1 << 3, | 1010 | OP_HANDLES_WRONGSEC = 1 << 3, |
1003 | OP_IS_PUTFH_LIKE = 1 << 4, | 1011 | OP_IS_PUTFH_LIKE = 1 << 4, |
1004 | }; | ||
1005 | |||
1006 | struct nfsd4_operation { | ||
1007 | nfsd4op_func op_func; | ||
1008 | u32 op_flags; | ||
1009 | char *op_name; | ||
1010 | /* | 1012 | /* |
1013 | * These are the ops whose result size we estimate before | ||
1014 | * encoding, to avoid performing an op then not being able to | ||
1015 | * respond or cache a response. This includes writes and setattrs | ||
1016 | * as well as the operations usually called "nonidempotent": | ||
1017 | */ | ||
1018 | OP_MODIFIES_SOMETHING = 1 << 5, | ||
1019 | /* | ||
1020 | * Cache compounds containing these ops in the xid-based drc: | ||
1011 | * We use the DRC for compounds containing non-idempotent | 1021 | * We use the DRC for compounds containing non-idempotent |
1012 | * operations, *except* those that are 4.1-specific (since | 1022 | * operations, *except* those that are 4.1-specific (since |
1013 | * sessions provide their own EOS), and except for stateful | 1023 | * sessions provide their own EOS), and except for stateful |
@@ -1015,7 +1025,15 @@ struct nfsd4_operation { | |||
1015 | * (since sequence numbers provide EOS for open, lock, etc in | 1025 | * (since sequence numbers provide EOS for open, lock, etc in |
1016 | * the v4.0 case). | 1026 | * the v4.0 case). |
1017 | */ | 1027 | */ |
1018 | bool op_cacheresult; | 1028 | OP_CACHEME = 1 << 6, |
1029 | }; | ||
1030 | |||
1031 | struct nfsd4_operation { | ||
1032 | nfsd4op_func op_func; | ||
1033 | u32 op_flags; | ||
1034 | char *op_name; | ||
1035 | /* Try to get response size before operation */ | ||
1036 | nfsd4op_rsize op_rsize_bop; | ||
1019 | }; | 1037 | }; |
1020 | 1038 | ||
1021 | static struct nfsd4_operation nfsd4_ops[]; | 1039 | static struct nfsd4_operation nfsd4_ops[]; |
@@ -1062,7 +1080,7 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) | |||
1062 | 1080 | ||
1063 | bool nfsd4_cache_this_op(struct nfsd4_op *op) | 1081 | bool nfsd4_cache_this_op(struct nfsd4_op *op) |
1064 | { | 1082 | { |
1065 | return OPDESC(op)->op_cacheresult; | 1083 | return OPDESC(op)->op_flags & OP_CACHEME; |
1066 | } | 1084 | } |
1067 | 1085 | ||
1068 | static bool need_wrongsec_check(struct svc_rqst *rqstp) | 1086 | static bool need_wrongsec_check(struct svc_rqst *rqstp) |
@@ -1110,6 +1128,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
1110 | struct nfsd4_operation *opdesc; | 1128 | struct nfsd4_operation *opdesc; |
1111 | struct nfsd4_compound_state *cstate = &resp->cstate; | 1129 | struct nfsd4_compound_state *cstate = &resp->cstate; |
1112 | int slack_bytes; | 1130 | int slack_bytes; |
1131 | u32 plen = 0; | ||
1113 | __be32 status; | 1132 | __be32 status; |
1114 | 1133 | ||
1115 | resp->xbuf = &rqstp->rq_res; | 1134 | resp->xbuf = &rqstp->rq_res; |
@@ -1188,6 +1207,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
1188 | goto encode_op; | 1207 | goto encode_op; |
1189 | } | 1208 | } |
1190 | 1209 | ||
1210 | /* If op is non-idempotent */ | ||
1211 | if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { | ||
1212 | plen = opdesc->op_rsize_bop(rqstp, op); | ||
1213 | op->status = nfsd4_check_resp_size(resp, plen); | ||
1214 | } | ||
1215 | |||
1216 | if (op->status) | ||
1217 | goto encode_op; | ||
1218 | |||
1191 | if (opdesc->op_func) | 1219 | if (opdesc->op_func) |
1192 | op->status = opdesc->op_func(rqstp, cstate, &op->u); | 1220 | op->status = opdesc->op_func(rqstp, cstate, &op->u); |
1193 | else | 1221 | else |
@@ -1217,7 +1245,7 @@ encode_op: | |||
1217 | be32_to_cpu(status)); | 1245 | be32_to_cpu(status)); |
1218 | 1246 | ||
1219 | if (cstate->replay_owner) { | 1247 | if (cstate->replay_owner) { |
1220 | nfs4_put_stateowner(cstate->replay_owner); | 1248 | nfs4_unlock_state(); |
1221 | cstate->replay_owner = NULL; | 1249 | cstate->replay_owner = NULL; |
1222 | } | 1250 | } |
1223 | /* XXX Ugh, we need to get rid of this kind of special case: */ | 1251 | /* XXX Ugh, we need to get rid of this kind of special case: */ |
@@ -1238,6 +1266,144 @@ out: | |||
1238 | return status; | 1266 | return status; |
1239 | } | 1267 | } |
1240 | 1268 | ||
1269 | #define op_encode_hdr_size (2) | ||
1270 | #define op_encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE)) | ||
1271 | #define op_encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE)) | ||
1272 | #define op_encode_change_info_maxsz (5) | ||
1273 | #define nfs4_fattr_bitmap_maxsz (4) | ||
1274 | |||
1275 | #define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) | ||
1276 | #define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) | ||
1277 | |||
1278 | #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) | ||
1279 | |||
1280 | #define op_encode_ace_maxsz (3 + nfs4_owner_maxsz) | ||
1281 | #define op_encode_delegation_maxsz (1 + op_encode_stateid_maxsz + 1 + \ | ||
1282 | op_encode_ace_maxsz) | ||
1283 | |||
1284 | #define op_encode_channel_attrs_maxsz (6 + 1 + 1) | ||
1285 | |||
1286 | static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1287 | { | ||
1288 | return (op_encode_hdr_size) * sizeof(__be32); | ||
1289 | } | ||
1290 | |||
1291 | static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1292 | { | ||
1293 | return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); | ||
1294 | } | ||
1295 | |||
1296 | static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1297 | { | ||
1298 | return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); | ||
1299 | } | ||
1300 | |||
1301 | static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1302 | { | ||
1303 | return (op_encode_hdr_size + op_encode_change_info_maxsz | ||
1304 | + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); | ||
1305 | } | ||
1306 | |||
1307 | static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1308 | { | ||
1309 | return (op_encode_hdr_size + op_encode_change_info_maxsz) | ||
1310 | * sizeof(__be32); | ||
1311 | } | ||
1312 | |||
1313 | static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1314 | { | ||
1315 | return (op_encode_hdr_size + op_encode_lock_denied_maxsz) | ||
1316 | * sizeof(__be32); | ||
1317 | } | ||
1318 | |||
1319 | static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1320 | { | ||
1321 | return (op_encode_hdr_size + op_encode_stateid_maxsz | ||
1322 | + op_encode_change_info_maxsz + 1 | ||
1323 | + nfs4_fattr_bitmap_maxsz | ||
1324 | + op_encode_delegation_maxsz) * sizeof(__be32); | ||
1325 | } | ||
1326 | |||
1327 | static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1328 | { | ||
1329 | u32 maxcount = 0, rlen = 0; | ||
1330 | |||
1331 | maxcount = svc_max_payload(rqstp); | ||
1332 | rlen = op->u.read.rd_length; | ||
1333 | |||
1334 | if (rlen > maxcount) | ||
1335 | rlen = maxcount; | ||
1336 | |||
1337 | return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; | ||
1338 | } | ||
1339 | |||
1340 | static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1341 | { | ||
1342 | u32 rlen = op->u.readdir.rd_maxcount; | ||
1343 | |||
1344 | if (rlen > PAGE_SIZE) | ||
1345 | rlen = PAGE_SIZE; | ||
1346 | |||
1347 | return (op_encode_hdr_size + op_encode_verifier_maxsz) | ||
1348 | * sizeof(__be32) + rlen; | ||
1349 | } | ||
1350 | |||
1351 | static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1352 | { | ||
1353 | return (op_encode_hdr_size + op_encode_change_info_maxsz) | ||
1354 | * sizeof(__be32); | ||
1355 | } | ||
1356 | |||
1357 | static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1358 | { | ||
1359 | return (op_encode_hdr_size + op_encode_change_info_maxsz | ||
1360 | + op_encode_change_info_maxsz) * sizeof(__be32); | ||
1361 | } | ||
1362 | |||
1363 | static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1364 | { | ||
1365 | return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); | ||
1366 | } | ||
1367 | |||
1368 | static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1369 | { | ||
1370 | return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32); | ||
1371 | } | ||
1372 | |||
1373 | static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1374 | { | ||
1375 | return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); | ||
1376 | } | ||
1377 | |||
1378 | static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1379 | { | ||
1380 | return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ | ||
1381 | 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\ | ||
1382 | 2 + /*eir_server_owner.so_minor_id */\ | ||
1383 | /* eir_server_owner.so_major_id<> */\ | ||
1384 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ | ||
1385 | /* eir_server_scope<> */\ | ||
1386 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ | ||
1387 | 1 + /* eir_server_impl_id array length */\ | ||
1388 | 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32); | ||
1389 | } | ||
1390 | |||
1391 | static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1392 | { | ||
1393 | return (op_encode_hdr_size + \ | ||
1394 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\ | ||
1395 | 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32); | ||
1396 | } | ||
1397 | |||
1398 | static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1399 | { | ||
1400 | return (op_encode_hdr_size + \ | ||
1401 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\ | ||
1402 | 2 + /* csr_sequence, csr_flags */\ | ||
1403 | op_encode_channel_attrs_maxsz + \ | ||
1404 | op_encode_channel_attrs_maxsz) * sizeof(__be32); | ||
1405 | } | ||
1406 | |||
1241 | static struct nfsd4_operation nfsd4_ops[] = { | 1407 | static struct nfsd4_operation nfsd4_ops[] = { |
1242 | [OP_ACCESS] = { | 1408 | [OP_ACCESS] = { |
1243 | .op_func = (nfsd4op_func)nfsd4_access, | 1409 | .op_func = (nfsd4op_func)nfsd4_access, |
@@ -1245,20 +1411,27 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1245 | }, | 1411 | }, |
1246 | [OP_CLOSE] = { | 1412 | [OP_CLOSE] = { |
1247 | .op_func = (nfsd4op_func)nfsd4_close, | 1413 | .op_func = (nfsd4op_func)nfsd4_close, |
1414 | .op_flags = OP_MODIFIES_SOMETHING, | ||
1248 | .op_name = "OP_CLOSE", | 1415 | .op_name = "OP_CLOSE", |
1416 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, | ||
1249 | }, | 1417 | }, |
1250 | [OP_COMMIT] = { | 1418 | [OP_COMMIT] = { |
1251 | .op_func = (nfsd4op_func)nfsd4_commit, | 1419 | .op_func = (nfsd4op_func)nfsd4_commit, |
1420 | .op_flags = OP_MODIFIES_SOMETHING, | ||
1252 | .op_name = "OP_COMMIT", | 1421 | .op_name = "OP_COMMIT", |
1422 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize, | ||
1253 | }, | 1423 | }, |
1254 | [OP_CREATE] = { | 1424 | [OP_CREATE] = { |
1255 | .op_func = (nfsd4op_func)nfsd4_create, | 1425 | .op_func = (nfsd4op_func)nfsd4_create, |
1426 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, | ||
1256 | .op_name = "OP_CREATE", | 1427 | .op_name = "OP_CREATE", |
1257 | .op_cacheresult = true, | 1428 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize, |
1258 | }, | 1429 | }, |
1259 | [OP_DELEGRETURN] = { | 1430 | [OP_DELEGRETURN] = { |
1260 | .op_func = (nfsd4op_func)nfsd4_delegreturn, | 1431 | .op_func = (nfsd4op_func)nfsd4_delegreturn, |
1432 | .op_flags = OP_MODIFIES_SOMETHING, | ||
1261 | .op_name = "OP_DELEGRETURN", | 1433 | .op_name = "OP_DELEGRETURN", |
1434 | .op_rsize_bop = nfsd4_only_status_rsize, | ||
1262 | }, | 1435 | }, |
1263 | [OP_GETATTR] = { | 1436 | [OP_GETATTR] = { |
1264 | .op_func = (nfsd4op_func)nfsd4_getattr, | 1437 | .op_func = (nfsd4op_func)nfsd4_getattr, |
@@ -1271,12 +1444,16 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1271 | }, | 1444 | }, |
1272 | [OP_LINK] = { | 1445 | [OP_LINK] = { |
1273 | .op_func = (nfsd4op_func)nfsd4_link, | 1446 | .op_func = (nfsd4op_func)nfsd4_link, |
1447 | .op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING | ||
1448 | | OP_CACHEME, | ||
1274 | .op_name = "OP_LINK", | 1449 | .op_name = "OP_LINK", |
1275 | .op_cacheresult = true, | 1450 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize, |
1276 | }, | 1451 | }, |
1277 | [OP_LOCK] = { | 1452 | [OP_LOCK] = { |
1278 | .op_func = (nfsd4op_func)nfsd4_lock, | 1453 | .op_func = (nfsd4op_func)nfsd4_lock, |
1454 | .op_flags = OP_MODIFIES_SOMETHING, | ||
1279 | .op_name = "OP_LOCK", | 1455 | .op_name = "OP_LOCK", |
1456 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize, | ||
1280 | }, | 1457 | }, |
1281 | [OP_LOCKT] = { | 1458 | [OP_LOCKT] = { |
1282 | .op_func = (nfsd4op_func)nfsd4_lockt, | 1459 | .op_func = (nfsd4op_func)nfsd4_lockt, |
@@ -1284,7 +1461,9 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1284 | }, | 1461 | }, |
1285 | [OP_LOCKU] = { | 1462 | [OP_LOCKU] = { |
1286 | .op_func = (nfsd4op_func)nfsd4_locku, | 1463 | .op_func = (nfsd4op_func)nfsd4_locku, |
1464 | .op_flags = OP_MODIFIES_SOMETHING, | ||
1287 | .op_name = "OP_LOCKU", | 1465 | .op_name = "OP_LOCKU", |
1466 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, | ||
1288 | }, | 1467 | }, |
1289 | [OP_LOOKUP] = { | 1468 | [OP_LOOKUP] = { |
1290 | .op_func = (nfsd4op_func)nfsd4_lookup, | 1469 | .op_func = (nfsd4op_func)nfsd4_lookup, |
@@ -1302,42 +1481,54 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1302 | }, | 1481 | }, |
1303 | [OP_OPEN] = { | 1482 | [OP_OPEN] = { |
1304 | .op_func = (nfsd4op_func)nfsd4_open, | 1483 | .op_func = (nfsd4op_func)nfsd4_open, |
1305 | .op_flags = OP_HANDLES_WRONGSEC, | 1484 | .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, |
1306 | .op_name = "OP_OPEN", | 1485 | .op_name = "OP_OPEN", |
1486 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize, | ||
1307 | }, | 1487 | }, |
1308 | [OP_OPEN_CONFIRM] = { | 1488 | [OP_OPEN_CONFIRM] = { |
1309 | .op_func = (nfsd4op_func)nfsd4_open_confirm, | 1489 | .op_func = (nfsd4op_func)nfsd4_open_confirm, |
1490 | .op_flags = OP_MODIFIES_SOMETHING, | ||
1310 | .op_name = "OP_OPEN_CONFIRM", | 1491 | .op_name = "OP_OPEN_CONFIRM", |
1492 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, | ||
1311 | }, | 1493 | }, |
1312 | [OP_OPEN_DOWNGRADE] = { | 1494 | [OP_OPEN_DOWNGRADE] = { |
1313 | .op_func = (nfsd4op_func)nfsd4_open_downgrade, | 1495 | .op_func = (nfsd4op_func)nfsd4_open_downgrade, |
1496 | .op_flags = OP_MODIFIES_SOMETHING, | ||
1314 | .op_name = "OP_OPEN_DOWNGRADE", | 1497 | .op_name = "OP_OPEN_DOWNGRADE", |
1498 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, | ||
1315 | }, | 1499 | }, |
1316 | [OP_PUTFH] = { | 1500 | [OP_PUTFH] = { |
1317 | .op_func = (nfsd4op_func)nfsd4_putfh, | 1501 | .op_func = (nfsd4op_func)nfsd4_putfh, |
1318 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS | 1502 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1319 | | OP_IS_PUTFH_LIKE, | 1503 | | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, |
1320 | .op_name = "OP_PUTFH", | 1504 | .op_name = "OP_PUTFH", |
1505 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1321 | }, | 1506 | }, |
1322 | [OP_PUTPUBFH] = { | 1507 | [OP_PUTPUBFH] = { |
1323 | .op_func = (nfsd4op_func)nfsd4_putrootfh, | 1508 | .op_func = (nfsd4op_func)nfsd4_putrootfh, |
1324 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS | 1509 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1325 | | OP_IS_PUTFH_LIKE, | 1510 | | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, |
1326 | .op_name = "OP_PUTPUBFH", | 1511 | .op_name = "OP_PUTPUBFH", |
1512 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1327 | }, | 1513 | }, |
1328 | [OP_PUTROOTFH] = { | 1514 | [OP_PUTROOTFH] = { |
1329 | .op_func = (nfsd4op_func)nfsd4_putrootfh, | 1515 | .op_func = (nfsd4op_func)nfsd4_putrootfh, |
1330 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS | 1516 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1331 | | OP_IS_PUTFH_LIKE, | 1517 | | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, |
1332 | .op_name = "OP_PUTROOTFH", | 1518 | .op_name = "OP_PUTROOTFH", |
1519 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1333 | }, | 1520 | }, |
1334 | [OP_READ] = { | 1521 | [OP_READ] = { |
1335 | .op_func = (nfsd4op_func)nfsd4_read, | 1522 | .op_func = (nfsd4op_func)nfsd4_read, |
1523 | .op_flags = OP_MODIFIES_SOMETHING, | ||
1336 | .op_name = "OP_READ", | 1524 | .op_name = "OP_READ", |
1525 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, | ||
1337 | }, | 1526 | }, |
1338 | [OP_READDIR] = { | 1527 | [OP_READDIR] = { |
1339 | .op_func = (nfsd4op_func)nfsd4_readdir, | 1528 | .op_func = (nfsd4op_func)nfsd4_readdir, |
1529 | .op_flags = OP_MODIFIES_SOMETHING, | ||
1340 | .op_name = "OP_READDIR", | 1530 | .op_name = "OP_READDIR", |
1531 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, | ||
1341 | }, | 1532 | }, |
1342 | [OP_READLINK] = { | 1533 | [OP_READLINK] = { |
1343 | .op_func = (nfsd4op_func)nfsd4_readlink, | 1534 | .op_func = (nfsd4op_func)nfsd4_readlink, |
@@ -1345,29 +1536,36 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1345 | }, | 1536 | }, |
1346 | [OP_REMOVE] = { | 1537 | [OP_REMOVE] = { |
1347 | .op_func = (nfsd4op_func)nfsd4_remove, | 1538 | .op_func = (nfsd4op_func)nfsd4_remove, |
1539 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, | ||
1348 | .op_name = "OP_REMOVE", | 1540 | .op_name = "OP_REMOVE", |
1349 | .op_cacheresult = true, | 1541 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize, |
1350 | }, | 1542 | }, |
1351 | [OP_RENAME] = { | 1543 | [OP_RENAME] = { |
1352 | .op_name = "OP_RENAME", | ||
1353 | .op_func = (nfsd4op_func)nfsd4_rename, | 1544 | .op_func = (nfsd4op_func)nfsd4_rename, |
1354 | .op_cacheresult = true, | 1545 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, |
1546 | .op_name = "OP_RENAME", | ||
1547 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize, | ||
1355 | }, | 1548 | }, |
1356 | [OP_RENEW] = { | 1549 | [OP_RENEW] = { |
1357 | .op_func = (nfsd4op_func)nfsd4_renew, | 1550 | .op_func = (nfsd4op_func)nfsd4_renew, |
1358 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1551 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1552 | | OP_MODIFIES_SOMETHING, | ||
1359 | .op_name = "OP_RENEW", | 1553 | .op_name = "OP_RENEW", |
1554 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1555 | |||
1360 | }, | 1556 | }, |
1361 | [OP_RESTOREFH] = { | 1557 | [OP_RESTOREFH] = { |
1362 | .op_func = (nfsd4op_func)nfsd4_restorefh, | 1558 | .op_func = (nfsd4op_func)nfsd4_restorefh, |
1363 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS | 1559 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1364 | | OP_IS_PUTFH_LIKE, | 1560 | | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, |
1365 | .op_name = "OP_RESTOREFH", | 1561 | .op_name = "OP_RESTOREFH", |
1562 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1366 | }, | 1563 | }, |
1367 | [OP_SAVEFH] = { | 1564 | [OP_SAVEFH] = { |
1368 | .op_func = (nfsd4op_func)nfsd4_savefh, | 1565 | .op_func = (nfsd4op_func)nfsd4_savefh, |
1369 | .op_flags = OP_HANDLES_WRONGSEC, | 1566 | .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, |
1370 | .op_name = "OP_SAVEFH", | 1567 | .op_name = "OP_SAVEFH", |
1568 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1371 | }, | 1569 | }, |
1372 | [OP_SECINFO] = { | 1570 | [OP_SECINFO] = { |
1373 | .op_func = (nfsd4op_func)nfsd4_secinfo, | 1571 | .op_func = (nfsd4op_func)nfsd4_secinfo, |
@@ -1377,19 +1575,22 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1377 | [OP_SETATTR] = { | 1575 | [OP_SETATTR] = { |
1378 | .op_func = (nfsd4op_func)nfsd4_setattr, | 1576 | .op_func = (nfsd4op_func)nfsd4_setattr, |
1379 | .op_name = "OP_SETATTR", | 1577 | .op_name = "OP_SETATTR", |
1380 | .op_cacheresult = true, | 1578 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, |
1579 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize, | ||
1381 | }, | 1580 | }, |
1382 | [OP_SETCLIENTID] = { | 1581 | [OP_SETCLIENTID] = { |
1383 | .op_func = (nfsd4op_func)nfsd4_setclientid, | 1582 | .op_func = (nfsd4op_func)nfsd4_setclientid, |
1384 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1583 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1584 | | OP_MODIFIES_SOMETHING | OP_CACHEME, | ||
1385 | .op_name = "OP_SETCLIENTID", | 1585 | .op_name = "OP_SETCLIENTID", |
1386 | .op_cacheresult = true, | 1586 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize, |
1387 | }, | 1587 | }, |
1388 | [OP_SETCLIENTID_CONFIRM] = { | 1588 | [OP_SETCLIENTID_CONFIRM] = { |
1389 | .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, | 1589 | .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, |
1390 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1590 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1591 | | OP_MODIFIES_SOMETHING | OP_CACHEME, | ||
1391 | .op_name = "OP_SETCLIENTID_CONFIRM", | 1592 | .op_name = "OP_SETCLIENTID_CONFIRM", |
1392 | .op_cacheresult = true, | 1593 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, |
1393 | }, | 1594 | }, |
1394 | [OP_VERIFY] = { | 1595 | [OP_VERIFY] = { |
1395 | .op_func = (nfsd4op_func)nfsd4_verify, | 1596 | .op_func = (nfsd4op_func)nfsd4_verify, |
@@ -1397,35 +1598,46 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1397 | }, | 1598 | }, |
1398 | [OP_WRITE] = { | 1599 | [OP_WRITE] = { |
1399 | .op_func = (nfsd4op_func)nfsd4_write, | 1600 | .op_func = (nfsd4op_func)nfsd4_write, |
1601 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, | ||
1400 | .op_name = "OP_WRITE", | 1602 | .op_name = "OP_WRITE", |
1401 | .op_cacheresult = true, | 1603 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, |
1402 | }, | 1604 | }, |
1403 | [OP_RELEASE_LOCKOWNER] = { | 1605 | [OP_RELEASE_LOCKOWNER] = { |
1404 | .op_func = (nfsd4op_func)nfsd4_release_lockowner, | 1606 | .op_func = (nfsd4op_func)nfsd4_release_lockowner, |
1405 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1607 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1608 | | OP_MODIFIES_SOMETHING, | ||
1406 | .op_name = "OP_RELEASE_LOCKOWNER", | 1609 | .op_name = "OP_RELEASE_LOCKOWNER", |
1610 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1407 | }, | 1611 | }, |
1408 | 1612 | ||
1409 | /* NFSv4.1 operations */ | 1613 | /* NFSv4.1 operations */ |
1410 | [OP_EXCHANGE_ID] = { | 1614 | [OP_EXCHANGE_ID] = { |
1411 | .op_func = (nfsd4op_func)nfsd4_exchange_id, | 1615 | .op_func = (nfsd4op_func)nfsd4_exchange_id, |
1412 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | 1616 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP |
1617 | | OP_MODIFIES_SOMETHING, | ||
1413 | .op_name = "OP_EXCHANGE_ID", | 1618 | .op_name = "OP_EXCHANGE_ID", |
1619 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize, | ||
1414 | }, | 1620 | }, |
1415 | [OP_BIND_CONN_TO_SESSION] = { | 1621 | [OP_BIND_CONN_TO_SESSION] = { |
1416 | .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, | 1622 | .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, |
1417 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | 1623 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP |
1624 | | OP_MODIFIES_SOMETHING, | ||
1418 | .op_name = "OP_BIND_CONN_TO_SESSION", | 1625 | .op_name = "OP_BIND_CONN_TO_SESSION", |
1626 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize, | ||
1419 | }, | 1627 | }, |
1420 | [OP_CREATE_SESSION] = { | 1628 | [OP_CREATE_SESSION] = { |
1421 | .op_func = (nfsd4op_func)nfsd4_create_session, | 1629 | .op_func = (nfsd4op_func)nfsd4_create_session, |
1422 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | 1630 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP |
1631 | | OP_MODIFIES_SOMETHING, | ||
1423 | .op_name = "OP_CREATE_SESSION", | 1632 | .op_name = "OP_CREATE_SESSION", |
1633 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize, | ||
1424 | }, | 1634 | }, |
1425 | [OP_DESTROY_SESSION] = { | 1635 | [OP_DESTROY_SESSION] = { |
1426 | .op_func = (nfsd4op_func)nfsd4_destroy_session, | 1636 | .op_func = (nfsd4op_func)nfsd4_destroy_session, |
1427 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | 1637 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP |
1638 | | OP_MODIFIES_SOMETHING, | ||
1428 | .op_name = "OP_DESTROY_SESSION", | 1639 | .op_name = "OP_DESTROY_SESSION", |
1640 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1429 | }, | 1641 | }, |
1430 | [OP_SEQUENCE] = { | 1642 | [OP_SEQUENCE] = { |
1431 | .op_func = (nfsd4op_func)nfsd4_sequence, | 1643 | .op_func = (nfsd4op_func)nfsd4_sequence, |
@@ -1433,14 +1645,17 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1433 | .op_name = "OP_SEQUENCE", | 1645 | .op_name = "OP_SEQUENCE", |
1434 | }, | 1646 | }, |
1435 | [OP_DESTROY_CLIENTID] = { | 1647 | [OP_DESTROY_CLIENTID] = { |
1436 | .op_func = NULL, | 1648 | .op_func = (nfsd4op_func)nfsd4_destroy_clientid, |
1437 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | 1649 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP |
1650 | | OP_MODIFIES_SOMETHING, | ||
1438 | .op_name = "OP_DESTROY_CLIENTID", | 1651 | .op_name = "OP_DESTROY_CLIENTID", |
1652 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1439 | }, | 1653 | }, |
1440 | [OP_RECLAIM_COMPLETE] = { | 1654 | [OP_RECLAIM_COMPLETE] = { |
1441 | .op_func = (nfsd4op_func)nfsd4_reclaim_complete, | 1655 | .op_func = (nfsd4op_func)nfsd4_reclaim_complete, |
1442 | .op_flags = ALLOWED_WITHOUT_FH, | 1656 | .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, |
1443 | .op_name = "OP_RECLAIM_COMPLETE", | 1657 | .op_name = "OP_RECLAIM_COMPLETE", |
1658 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1444 | }, | 1659 | }, |
1445 | [OP_SECINFO_NO_NAME] = { | 1660 | [OP_SECINFO_NO_NAME] = { |
1446 | .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, | 1661 | .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, |
@@ -1454,8 +1669,9 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1454 | }, | 1669 | }, |
1455 | [OP_FREE_STATEID] = { | 1670 | [OP_FREE_STATEID] = { |
1456 | .op_func = (nfsd4op_func)nfsd4_free_stateid, | 1671 | .op_func = (nfsd4op_func)nfsd4_free_stateid, |
1457 | .op_flags = ALLOWED_WITHOUT_FH, | 1672 | .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, |
1458 | .op_name = "OP_FREE_STATEID", | 1673 | .op_name = "OP_FREE_STATEID", |
1674 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | ||
1459 | }, | 1675 | }, |
1460 | }; | 1676 | }; |
1461 | 1677 | ||
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 29d77f60585b..ed083b9a731b 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -45,6 +45,7 @@ | |||
45 | 45 | ||
46 | /* Globals */ | 46 | /* Globals */ |
47 | static struct file *rec_file; | 47 | static struct file *rec_file; |
48 | static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; | ||
48 | 49 | ||
49 | static int | 50 | static int |
50 | nfs4_save_creds(const struct cred **original_creds) | 51 | nfs4_save_creds(const struct cred **original_creds) |
@@ -88,7 +89,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) | |||
88 | struct xdr_netobj cksum; | 89 | struct xdr_netobj cksum; |
89 | struct hash_desc desc; | 90 | struct hash_desc desc; |
90 | struct scatterlist sg; | 91 | struct scatterlist sg; |
91 | __be32 status = nfserr_resource; | 92 | __be32 status = nfserr_jukebox; |
92 | 93 | ||
93 | dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", | 94 | dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", |
94 | clname->len, clname->data); | 95 | clname->len, clname->data); |
@@ -129,6 +130,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
129 | if (!rec_file || clp->cl_firststate) | 130 | if (!rec_file || clp->cl_firststate) |
130 | return 0; | 131 | return 0; |
131 | 132 | ||
133 | clp->cl_firststate = 1; | ||
132 | status = nfs4_save_creds(&original_cred); | 134 | status = nfs4_save_creds(&original_cred); |
133 | if (status < 0) | 135 | if (status < 0) |
134 | return status; | 136 | return status; |
@@ -143,10 +145,8 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
143 | goto out_unlock; | 145 | goto out_unlock; |
144 | } | 146 | } |
145 | status = -EEXIST; | 147 | status = -EEXIST; |
146 | if (dentry->d_inode) { | 148 | if (dentry->d_inode) |
147 | dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); | ||
148 | goto out_put; | 149 | goto out_put; |
149 | } | ||
150 | status = mnt_want_write(rec_file->f_path.mnt); | 150 | status = mnt_want_write(rec_file->f_path.mnt); |
151 | if (status) | 151 | if (status) |
152 | goto out_put; | 152 | goto out_put; |
@@ -156,12 +156,14 @@ out_put: | |||
156 | dput(dentry); | 156 | dput(dentry); |
157 | out_unlock: | 157 | out_unlock: |
158 | mutex_unlock(&dir->d_inode->i_mutex); | 158 | mutex_unlock(&dir->d_inode->i_mutex); |
159 | if (status == 0) { | 159 | if (status == 0) |
160 | clp->cl_firststate = 1; | ||
161 | vfs_fsync(rec_file, 0); | 160 | vfs_fsync(rec_file, 0); |
162 | } | 161 | else |
162 | printk(KERN_ERR "NFSD: failed to write recovery record" | ||
163 | " (err %d); please check that %s exists" | ||
164 | " and is writeable", status, | ||
165 | user_recovery_dirname); | ||
163 | nfs4_reset_creds(original_cred); | 166 | nfs4_reset_creds(original_cred); |
164 | dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); | ||
165 | return status; | 167 | return status; |
166 | } | 168 | } |
167 | 169 | ||
@@ -354,13 +356,13 @@ nfsd4_recdir_load(void) { | |||
354 | */ | 356 | */ |
355 | 357 | ||
356 | void | 358 | void |
357 | nfsd4_init_recdir(char *rec_dirname) | 359 | nfsd4_init_recdir() |
358 | { | 360 | { |
359 | const struct cred *original_cred; | 361 | const struct cred *original_cred; |
360 | int status; | 362 | int status; |
361 | 363 | ||
362 | printk("NFSD: Using %s as the NFSv4 state recovery directory\n", | 364 | printk("NFSD: Using %s as the NFSv4 state recovery directory\n", |
363 | rec_dirname); | 365 | user_recovery_dirname); |
364 | 366 | ||
365 | BUG_ON(rec_file); | 367 | BUG_ON(rec_file); |
366 | 368 | ||
@@ -372,10 +374,10 @@ nfsd4_init_recdir(char *rec_dirname) | |||
372 | return; | 374 | return; |
373 | } | 375 | } |
374 | 376 | ||
375 | rec_file = filp_open(rec_dirname, O_RDONLY | O_DIRECTORY, 0); | 377 | rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); |
376 | if (IS_ERR(rec_file)) { | 378 | if (IS_ERR(rec_file)) { |
377 | printk("NFSD: unable to find recovery directory %s\n", | 379 | printk("NFSD: unable to find recovery directory %s\n", |
378 | rec_dirname); | 380 | user_recovery_dirname); |
379 | rec_file = NULL; | 381 | rec_file = NULL; |
380 | } | 382 | } |
381 | 383 | ||
@@ -390,3 +392,30 @@ nfsd4_shutdown_recdir(void) | |||
390 | fput(rec_file); | 392 | fput(rec_file); |
391 | rec_file = NULL; | 393 | rec_file = NULL; |
392 | } | 394 | } |
395 | |||
396 | /* | ||
397 | * Change the NFSv4 recovery directory to recdir. | ||
398 | */ | ||
399 | int | ||
400 | nfs4_reset_recoverydir(char *recdir) | ||
401 | { | ||
402 | int status; | ||
403 | struct path path; | ||
404 | |||
405 | status = kern_path(recdir, LOOKUP_FOLLOW, &path); | ||
406 | if (status) | ||
407 | return status; | ||
408 | status = -ENOTDIR; | ||
409 | if (S_ISDIR(path.dentry->d_inode->i_mode)) { | ||
410 | strcpy(user_recovery_dirname, recdir); | ||
411 | status = 0; | ||
412 | } | ||
413 | path_put(&path); | ||
414 | return status; | ||
415 | } | ||
416 | |||
417 | char * | ||
418 | nfs4_recoverydir(void) | ||
419 | { | ||
420 | return user_recovery_dirname; | ||
421 | } | ||
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3787ec117400..47e94e33a975 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -49,9 +49,6 @@ | |||
49 | time_t nfsd4_lease = 90; /* default lease time */ | 49 | time_t nfsd4_lease = 90; /* default lease time */ |
50 | time_t nfsd4_grace = 90; | 50 | time_t nfsd4_grace = 90; |
51 | static time_t boot_time; | 51 | static time_t boot_time; |
52 | static u32 current_ownerid = 1; | ||
53 | static u32 current_fileid = 1; | ||
54 | static u32 current_delegid = 1; | ||
55 | static stateid_t zerostateid; /* bits all 0 */ | 52 | static stateid_t zerostateid; /* bits all 0 */ |
56 | static stateid_t onestateid; /* bits all 1 */ | 53 | static stateid_t onestateid; /* bits all 1 */ |
57 | static u64 current_sessionid = 1; | 54 | static u64 current_sessionid = 1; |
@@ -60,13 +57,7 @@ static u64 current_sessionid = 1; | |||
60 | #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) | 57 | #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) |
61 | 58 | ||
62 | /* forward declarations */ | 59 | /* forward declarations */ |
63 | static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); | 60 | static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); |
64 | static struct nfs4_stateid * search_for_stateid(stateid_t *stid); | ||
65 | static struct nfs4_delegation * search_for_delegation(stateid_t *stid); | ||
66 | static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); | ||
67 | static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; | ||
68 | static void nfs4_set_recdir(char *recdir); | ||
69 | static int check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner); | ||
70 | 61 | ||
71 | /* Locking: */ | 62 | /* Locking: */ |
72 | 63 | ||
@@ -80,7 +71,8 @@ static DEFINE_MUTEX(client_mutex); | |||
80 | */ | 71 | */ |
81 | static DEFINE_SPINLOCK(recall_lock); | 72 | static DEFINE_SPINLOCK(recall_lock); |
82 | 73 | ||
83 | static struct kmem_cache *stateowner_slab = NULL; | 74 | static struct kmem_cache *openowner_slab = NULL; |
75 | static struct kmem_cache *lockowner_slab = NULL; | ||
84 | static struct kmem_cache *file_slab = NULL; | 76 | static struct kmem_cache *file_slab = NULL; |
85 | static struct kmem_cache *stateid_slab = NULL; | 77 | static struct kmem_cache *stateid_slab = NULL; |
86 | static struct kmem_cache *deleg_slab = NULL; | 78 | static struct kmem_cache *deleg_slab = NULL; |
@@ -112,6 +104,11 @@ opaque_hashval(const void *ptr, int nbytes) | |||
112 | 104 | ||
113 | static struct list_head del_recall_lru; | 105 | static struct list_head del_recall_lru; |
114 | 106 | ||
107 | static void nfsd4_free_file(struct nfs4_file *f) | ||
108 | { | ||
109 | kmem_cache_free(file_slab, f); | ||
110 | } | ||
111 | |||
115 | static inline void | 112 | static inline void |
116 | put_nfs4_file(struct nfs4_file *fi) | 113 | put_nfs4_file(struct nfs4_file *fi) |
117 | { | 114 | { |
@@ -119,7 +116,7 @@ put_nfs4_file(struct nfs4_file *fi) | |||
119 | list_del(&fi->fi_hash); | 116 | list_del(&fi->fi_hash); |
120 | spin_unlock(&recall_lock); | 117 | spin_unlock(&recall_lock); |
121 | iput(fi->fi_inode); | 118 | iput(fi->fi_inode); |
122 | kmem_cache_free(file_slab, fi); | 119 | nfsd4_free_file(fi); |
123 | } | 120 | } |
124 | } | 121 | } |
125 | 122 | ||
@@ -136,35 +133,33 @@ unsigned int max_delegations; | |||
136 | * Open owner state (share locks) | 133 | * Open owner state (share locks) |
137 | */ | 134 | */ |
138 | 135 | ||
139 | /* hash tables for nfs4_stateowner */ | 136 | /* hash tables for open owners */ |
140 | #define OWNER_HASH_BITS 8 | 137 | #define OPEN_OWNER_HASH_BITS 8 |
141 | #define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) | 138 | #define OPEN_OWNER_HASH_SIZE (1 << OPEN_OWNER_HASH_BITS) |
142 | #define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) | 139 | #define OPEN_OWNER_HASH_MASK (OPEN_OWNER_HASH_SIZE - 1) |
143 | 140 | ||
144 | #define ownerid_hashval(id) \ | 141 | static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) |
145 | ((id) & OWNER_HASH_MASK) | 142 | { |
146 | #define ownerstr_hashval(clientid, ownername) \ | 143 | unsigned int ret; |
147 | (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK) | ||
148 | 144 | ||
149 | static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE]; | 145 | ret = opaque_hashval(ownername->data, ownername->len); |
150 | static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; | 146 | ret += clientid; |
147 | return ret & OPEN_OWNER_HASH_MASK; | ||
148 | } | ||
149 | |||
150 | static struct list_head open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE]; | ||
151 | 151 | ||
152 | /* hash table for nfs4_file */ | 152 | /* hash table for nfs4_file */ |
153 | #define FILE_HASH_BITS 8 | 153 | #define FILE_HASH_BITS 8 |
154 | #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) | 154 | #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) |
155 | 155 | ||
156 | /* hash table for (open)nfs4_stateid */ | 156 | static unsigned int file_hashval(struct inode *ino) |
157 | #define STATEID_HASH_BITS 10 | 157 | { |
158 | #define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS) | 158 | /* XXX: why are we hashing on inode pointer, anyway? */ |
159 | #define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1) | 159 | return hash_ptr(ino, FILE_HASH_BITS); |
160 | 160 | } | |
161 | #define file_hashval(x) \ | ||
162 | hash_ptr(x, FILE_HASH_BITS) | ||
163 | #define stateid_hashval(owner_id, file_id) \ | ||
164 | (((owner_id) + (file_id)) & STATEID_HASH_MASK) | ||
165 | 161 | ||
166 | static struct list_head file_hashtbl[FILE_HASH_SIZE]; | 162 | static struct list_head file_hashtbl[FILE_HASH_SIZE]; |
167 | static struct list_head stateid_hashtbl[STATEID_HASH_SIZE]; | ||
168 | 163 | ||
169 | static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) | 164 | static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) |
170 | { | 165 | { |
@@ -192,8 +187,15 @@ static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) | |||
192 | static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) | 187 | static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) |
193 | { | 188 | { |
194 | if (atomic_dec_and_test(&fp->fi_access[oflag])) { | 189 | if (atomic_dec_and_test(&fp->fi_access[oflag])) { |
195 | nfs4_file_put_fd(fp, O_RDWR); | ||
196 | nfs4_file_put_fd(fp, oflag); | 190 | nfs4_file_put_fd(fp, oflag); |
191 | /* | ||
192 | * It's also safe to get rid of the RDWR open *if* | ||
193 | * we no longer have need of the other kind of access | ||
194 | * or if we already have the other kind of open: | ||
195 | */ | ||
196 | if (fp->fi_fds[1-oflag] | ||
197 | || atomic_read(&fp->fi_access[1 - oflag]) == 0) | ||
198 | nfs4_file_put_fd(fp, O_RDWR); | ||
197 | } | 199 | } |
198 | } | 200 | } |
199 | 201 | ||
@@ -206,8 +208,73 @@ static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) | |||
206 | __nfs4_file_put_access(fp, oflag); | 208 | __nfs4_file_put_access(fp, oflag); |
207 | } | 209 | } |
208 | 210 | ||
211 | static inline int get_new_stid(struct nfs4_stid *stid) | ||
212 | { | ||
213 | static int min_stateid = 0; | ||
214 | struct idr *stateids = &stid->sc_client->cl_stateids; | ||
215 | int new_stid; | ||
216 | int error; | ||
217 | |||
218 | error = idr_get_new_above(stateids, stid, min_stateid, &new_stid); | ||
219 | /* | ||
220 | * Note: the necessary preallocation was done in | ||
221 | * nfs4_alloc_stateid(). The idr code caps the number of | ||
222 | * preallocations that can exist at a time, but the state lock | ||
223 | * prevents anyone from using ours before we get here: | ||
224 | */ | ||
225 | BUG_ON(error); | ||
226 | /* | ||
227 | * It shouldn't be a problem to reuse an opaque stateid value. | ||
228 | * I don't think it is for 4.1. But with 4.0 I worry that, for | ||
229 | * example, a stray write retransmission could be accepted by | ||
230 | * the server when it should have been rejected. Therefore, | ||
231 | * adopt a trick from the sctp code to attempt to maximize the | ||
232 | * amount of time until an id is reused, by ensuring they always | ||
233 | * "increase" (mod INT_MAX): | ||
234 | */ | ||
235 | |||
236 | min_stateid = new_stid+1; | ||
237 | if (min_stateid == INT_MAX) | ||
238 | min_stateid = 0; | ||
239 | return new_stid; | ||
240 | } | ||
241 | |||
242 | static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type) | ||
243 | { | ||
244 | stateid_t *s = &stid->sc_stateid; | ||
245 | int new_id; | ||
246 | |||
247 | stid->sc_type = type; | ||
248 | stid->sc_client = cl; | ||
249 | s->si_opaque.so_clid = cl->cl_clientid; | ||
250 | new_id = get_new_stid(stid); | ||
251 | s->si_opaque.so_id = (u32)new_id; | ||
252 | /* Will be incremented before return to client: */ | ||
253 | s->si_generation = 0; | ||
254 | } | ||
255 | |||
256 | static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) | ||
257 | { | ||
258 | struct idr *stateids = &cl->cl_stateids; | ||
259 | |||
260 | if (!idr_pre_get(stateids, GFP_KERNEL)) | ||
261 | return NULL; | ||
262 | /* | ||
263 | * Note: if we fail here (or any time between now and the time | ||
264 | * we actually get the new idr), we won't need to undo the idr | ||
265 | * preallocation, since the idr code caps the number of | ||
266 | * preallocated entries. | ||
267 | */ | ||
268 | return kmem_cache_alloc(slab, GFP_KERNEL); | ||
269 | } | ||
270 | |||
271 | static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) | ||
272 | { | ||
273 | return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); | ||
274 | } | ||
275 | |||
209 | static struct nfs4_delegation * | 276 | static struct nfs4_delegation * |
210 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) | 277 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) |
211 | { | 278 | { |
212 | struct nfs4_delegation *dp; | 279 | struct nfs4_delegation *dp; |
213 | struct nfs4_file *fp = stp->st_file; | 280 | struct nfs4_file *fp = stp->st_file; |
@@ -224,21 +291,23 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
224 | return NULL; | 291 | return NULL; |
225 | if (num_delegations > max_delegations) | 292 | if (num_delegations > max_delegations) |
226 | return NULL; | 293 | return NULL; |
227 | dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); | 294 | dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); |
228 | if (dp == NULL) | 295 | if (dp == NULL) |
229 | return dp; | 296 | return dp; |
297 | init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID); | ||
298 | /* | ||
299 | * delegation seqid's are never incremented. The 4.1 special | ||
300 | * meaning of seqid 0 isn't meaningful, really, but let's avoid | ||
301 | * 0 anyway just for consistency and use 1: | ||
302 | */ | ||
303 | dp->dl_stid.sc_stateid.si_generation = 1; | ||
230 | num_delegations++; | 304 | num_delegations++; |
231 | INIT_LIST_HEAD(&dp->dl_perfile); | 305 | INIT_LIST_HEAD(&dp->dl_perfile); |
232 | INIT_LIST_HEAD(&dp->dl_perclnt); | 306 | INIT_LIST_HEAD(&dp->dl_perclnt); |
233 | INIT_LIST_HEAD(&dp->dl_recall_lru); | 307 | INIT_LIST_HEAD(&dp->dl_recall_lru); |
234 | dp->dl_client = clp; | ||
235 | get_nfs4_file(fp); | 308 | get_nfs4_file(fp); |
236 | dp->dl_file = fp; | 309 | dp->dl_file = fp; |
237 | dp->dl_type = type; | 310 | dp->dl_type = type; |
238 | dp->dl_stateid.si_boot = boot_time; | ||
239 | dp->dl_stateid.si_stateownerid = current_delegid++; | ||
240 | dp->dl_stateid.si_fileid = 0; | ||
241 | dp->dl_stateid.si_generation = 0; | ||
242 | fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); | 311 | fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); |
243 | dp->dl_time = 0; | 312 | dp->dl_time = 0; |
244 | atomic_set(&dp->dl_count, 1); | 313 | atomic_set(&dp->dl_count, 1); |
@@ -267,10 +336,18 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) | |||
267 | } | 336 | } |
268 | } | 337 | } |
269 | 338 | ||
339 | static void unhash_stid(struct nfs4_stid *s) | ||
340 | { | ||
341 | struct idr *stateids = &s->sc_client->cl_stateids; | ||
342 | |||
343 | idr_remove(stateids, s->sc_stateid.si_opaque.so_id); | ||
344 | } | ||
345 | |||
270 | /* Called under the state lock. */ | 346 | /* Called under the state lock. */ |
271 | static void | 347 | static void |
272 | unhash_delegation(struct nfs4_delegation *dp) | 348 | unhash_delegation(struct nfs4_delegation *dp) |
273 | { | 349 | { |
350 | unhash_stid(&dp->dl_stid); | ||
274 | list_del_init(&dp->dl_perclnt); | 351 | list_del_init(&dp->dl_perclnt); |
275 | spin_lock(&recall_lock); | 352 | spin_lock(&recall_lock); |
276 | list_del_init(&dp->dl_perfile); | 353 | list_del_init(&dp->dl_perfile); |
@@ -292,10 +369,16 @@ static DEFINE_SPINLOCK(client_lock); | |||
292 | #define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) | 369 | #define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) |
293 | #define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) | 370 | #define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) |
294 | 371 | ||
295 | #define clientid_hashval(id) \ | 372 | static unsigned int clientid_hashval(u32 id) |
296 | ((id) & CLIENT_HASH_MASK) | 373 | { |
297 | #define clientstr_hashval(name) \ | 374 | return id & CLIENT_HASH_MASK; |
298 | (opaque_hashval((name), 8) & CLIENT_HASH_MASK) | 375 | } |
376 | |||
377 | static unsigned int clientstr_hashval(const char *name) | ||
378 | { | ||
379 | return opaque_hashval(name, 8) & CLIENT_HASH_MASK; | ||
380 | } | ||
381 | |||
299 | /* | 382 | /* |
300 | * reclaim_str_hashtbl[] holds known client info from previous reset/reboot | 383 | * reclaim_str_hashtbl[] holds known client info from previous reset/reboot |
301 | * used in reboot/reset lease grace period processing | 384 | * used in reboot/reset lease grace period processing |
@@ -362,7 +445,7 @@ set_deny(unsigned int *deny, unsigned long bmap) { | |||
362 | } | 445 | } |
363 | 446 | ||
364 | static int | 447 | static int |
365 | test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { | 448 | test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { |
366 | unsigned int access, deny; | 449 | unsigned int access, deny; |
367 | 450 | ||
368 | set_access(&access, stp->st_access_bmap); | 451 | set_access(&access, stp->st_access_bmap); |
@@ -385,14 +468,13 @@ static int nfs4_access_to_omode(u32 access) | |||
385 | BUG(); | 468 | BUG(); |
386 | } | 469 | } |
387 | 470 | ||
388 | static void unhash_generic_stateid(struct nfs4_stateid *stp) | 471 | static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) |
389 | { | 472 | { |
390 | list_del(&stp->st_hash); | ||
391 | list_del(&stp->st_perfile); | 473 | list_del(&stp->st_perfile); |
392 | list_del(&stp->st_perstateowner); | 474 | list_del(&stp->st_perstateowner); |
393 | } | 475 | } |
394 | 476 | ||
395 | static void free_generic_stateid(struct nfs4_stateid *stp) | 477 | static void close_generic_stateid(struct nfs4_ol_stateid *stp) |
396 | { | 478 | { |
397 | int i; | 479 | int i; |
398 | 480 | ||
@@ -401,84 +483,106 @@ static void free_generic_stateid(struct nfs4_stateid *stp) | |||
401 | if (test_bit(i, &stp->st_access_bmap)) | 483 | if (test_bit(i, &stp->st_access_bmap)) |
402 | nfs4_file_put_access(stp->st_file, | 484 | nfs4_file_put_access(stp->st_file, |
403 | nfs4_access_to_omode(i)); | 485 | nfs4_access_to_omode(i)); |
486 | __clear_bit(i, &stp->st_access_bmap); | ||
404 | } | 487 | } |
405 | } | 488 | } |
406 | put_nfs4_file(stp->st_file); | 489 | put_nfs4_file(stp->st_file); |
490 | stp->st_file = NULL; | ||
491 | } | ||
492 | |||
493 | static void free_generic_stateid(struct nfs4_ol_stateid *stp) | ||
494 | { | ||
407 | kmem_cache_free(stateid_slab, stp); | 495 | kmem_cache_free(stateid_slab, stp); |
408 | } | 496 | } |
409 | 497 | ||
410 | static void release_lock_stateid(struct nfs4_stateid *stp) | 498 | static void release_lock_stateid(struct nfs4_ol_stateid *stp) |
411 | { | 499 | { |
412 | struct file *file; | 500 | struct file *file; |
413 | 501 | ||
414 | unhash_generic_stateid(stp); | 502 | unhash_generic_stateid(stp); |
503 | unhash_stid(&stp->st_stid); | ||
415 | file = find_any_file(stp->st_file); | 504 | file = find_any_file(stp->st_file); |
416 | if (file) | 505 | if (file) |
417 | locks_remove_posix(file, (fl_owner_t)stp->st_stateowner); | 506 | locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner)); |
507 | close_generic_stateid(stp); | ||
418 | free_generic_stateid(stp); | 508 | free_generic_stateid(stp); |
419 | } | 509 | } |
420 | 510 | ||
421 | static void unhash_lockowner(struct nfs4_stateowner *sop) | 511 | static void unhash_lockowner(struct nfs4_lockowner *lo) |
422 | { | 512 | { |
423 | struct nfs4_stateid *stp; | 513 | struct nfs4_ol_stateid *stp; |
424 | 514 | ||
425 | list_del(&sop->so_idhash); | 515 | list_del(&lo->lo_owner.so_strhash); |
426 | list_del(&sop->so_strhash); | 516 | list_del(&lo->lo_perstateid); |
427 | list_del(&sop->so_perstateid); | 517 | while (!list_empty(&lo->lo_owner.so_stateids)) { |
428 | while (!list_empty(&sop->so_stateids)) { | 518 | stp = list_first_entry(&lo->lo_owner.so_stateids, |
429 | stp = list_first_entry(&sop->so_stateids, | 519 | struct nfs4_ol_stateid, st_perstateowner); |
430 | struct nfs4_stateid, st_perstateowner); | ||
431 | release_lock_stateid(stp); | 520 | release_lock_stateid(stp); |
432 | } | 521 | } |
433 | } | 522 | } |
434 | 523 | ||
435 | static void release_lockowner(struct nfs4_stateowner *sop) | 524 | static void release_lockowner(struct nfs4_lockowner *lo) |
436 | { | 525 | { |
437 | unhash_lockowner(sop); | 526 | unhash_lockowner(lo); |
438 | nfs4_put_stateowner(sop); | 527 | nfs4_free_lockowner(lo); |
439 | } | 528 | } |
440 | 529 | ||
441 | static void | 530 | static void |
442 | release_stateid_lockowners(struct nfs4_stateid *open_stp) | 531 | release_stateid_lockowners(struct nfs4_ol_stateid *open_stp) |
443 | { | 532 | { |
444 | struct nfs4_stateowner *lock_sop; | 533 | struct nfs4_lockowner *lo; |
445 | 534 | ||
446 | while (!list_empty(&open_stp->st_lockowners)) { | 535 | while (!list_empty(&open_stp->st_lockowners)) { |
447 | lock_sop = list_entry(open_stp->st_lockowners.next, | 536 | lo = list_entry(open_stp->st_lockowners.next, |
448 | struct nfs4_stateowner, so_perstateid); | 537 | struct nfs4_lockowner, lo_perstateid); |
449 | /* list_del(&open_stp->st_lockowners); */ | 538 | release_lockowner(lo); |
450 | BUG_ON(lock_sop->so_is_open_owner); | ||
451 | release_lockowner(lock_sop); | ||
452 | } | 539 | } |
453 | } | 540 | } |
454 | 541 | ||
455 | static void release_open_stateid(struct nfs4_stateid *stp) | 542 | static void unhash_open_stateid(struct nfs4_ol_stateid *stp) |
456 | { | 543 | { |
457 | unhash_generic_stateid(stp); | 544 | unhash_generic_stateid(stp); |
458 | release_stateid_lockowners(stp); | 545 | release_stateid_lockowners(stp); |
546 | close_generic_stateid(stp); | ||
547 | } | ||
548 | |||
549 | static void release_open_stateid(struct nfs4_ol_stateid *stp) | ||
550 | { | ||
551 | unhash_open_stateid(stp); | ||
552 | unhash_stid(&stp->st_stid); | ||
459 | free_generic_stateid(stp); | 553 | free_generic_stateid(stp); |
460 | } | 554 | } |
461 | 555 | ||
462 | static void unhash_openowner(struct nfs4_stateowner *sop) | 556 | static void unhash_openowner(struct nfs4_openowner *oo) |
463 | { | 557 | { |
464 | struct nfs4_stateid *stp; | 558 | struct nfs4_ol_stateid *stp; |
465 | 559 | ||
466 | list_del(&sop->so_idhash); | 560 | list_del(&oo->oo_owner.so_strhash); |
467 | list_del(&sop->so_strhash); | 561 | list_del(&oo->oo_perclient); |
468 | list_del(&sop->so_perclient); | 562 | while (!list_empty(&oo->oo_owner.so_stateids)) { |
469 | list_del(&sop->so_perstateid); /* XXX: necessary? */ | 563 | stp = list_first_entry(&oo->oo_owner.so_stateids, |
470 | while (!list_empty(&sop->so_stateids)) { | 564 | struct nfs4_ol_stateid, st_perstateowner); |
471 | stp = list_first_entry(&sop->so_stateids, | ||
472 | struct nfs4_stateid, st_perstateowner); | ||
473 | release_open_stateid(stp); | 565 | release_open_stateid(stp); |
474 | } | 566 | } |
475 | } | 567 | } |
476 | 568 | ||
477 | static void release_openowner(struct nfs4_stateowner *sop) | 569 | static void release_last_closed_stateid(struct nfs4_openowner *oo) |
478 | { | 570 | { |
479 | unhash_openowner(sop); | 571 | struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; |
480 | list_del(&sop->so_close_lru); | 572 | |
481 | nfs4_put_stateowner(sop); | 573 | if (s) { |
574 | unhash_stid(&s->st_stid); | ||
575 | free_generic_stateid(s); | ||
576 | oo->oo_last_closed_stid = NULL; | ||
577 | } | ||
578 | } | ||
579 | |||
580 | static void release_openowner(struct nfs4_openowner *oo) | ||
581 | { | ||
582 | unhash_openowner(oo); | ||
583 | list_del(&oo->oo_close_lru); | ||
584 | release_last_closed_stateid(oo); | ||
585 | nfs4_free_openowner(oo); | ||
482 | } | 586 | } |
483 | 587 | ||
484 | #define SESSION_HASH_SIZE 512 | 588 | #define SESSION_HASH_SIZE 512 |
@@ -843,9 +947,6 @@ renew_client_locked(struct nfs4_client *clp) | |||
843 | return; | 947 | return; |
844 | } | 948 | } |
845 | 949 | ||
846 | /* | ||
847 | * Move client to the end to the LRU list. | ||
848 | */ | ||
849 | dprintk("renewing client (clientid %08x/%08x)\n", | 950 | dprintk("renewing client (clientid %08x/%08x)\n", |
850 | clp->cl_clientid.cl_boot, | 951 | clp->cl_clientid.cl_boot, |
851 | clp->cl_clientid.cl_id); | 952 | clp->cl_clientid.cl_id); |
@@ -943,7 +1044,7 @@ unhash_client_locked(struct nfs4_client *clp) | |||
943 | static void | 1044 | static void |
944 | expire_client(struct nfs4_client *clp) | 1045 | expire_client(struct nfs4_client *clp) |
945 | { | 1046 | { |
946 | struct nfs4_stateowner *sop; | 1047 | struct nfs4_openowner *oo; |
947 | struct nfs4_delegation *dp; | 1048 | struct nfs4_delegation *dp; |
948 | struct list_head reaplist; | 1049 | struct list_head reaplist; |
949 | 1050 | ||
@@ -961,8 +1062,8 @@ expire_client(struct nfs4_client *clp) | |||
961 | unhash_delegation(dp); | 1062 | unhash_delegation(dp); |
962 | } | 1063 | } |
963 | while (!list_empty(&clp->cl_openowners)) { | 1064 | while (!list_empty(&clp->cl_openowners)) { |
964 | sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); | 1065 | oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); |
965 | release_openowner(sop); | 1066 | release_openowner(oo); |
966 | } | 1067 | } |
967 | nfsd4_shutdown_callback(clp); | 1068 | nfsd4_shutdown_callback(clp); |
968 | if (clp->cl_cb_conn.cb_xprt) | 1069 | if (clp->cl_cb_conn.cb_xprt) |
@@ -1038,6 +1139,23 @@ static void gen_confirm(struct nfs4_client *clp) | |||
1038 | *p++ = i++; | 1139 | *p++ = i++; |
1039 | } | 1140 | } |
1040 | 1141 | ||
1142 | static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) | ||
1143 | { | ||
1144 | return idr_find(&cl->cl_stateids, t->si_opaque.so_id); | ||
1145 | } | ||
1146 | |||
1147 | static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) | ||
1148 | { | ||
1149 | struct nfs4_stid *s; | ||
1150 | |||
1151 | s = find_stateid(cl, t); | ||
1152 | if (!s) | ||
1153 | return NULL; | ||
1154 | if (typemask & s->sc_type) | ||
1155 | return s; | ||
1156 | return NULL; | ||
1157 | } | ||
1158 | |||
1041 | static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | 1159 | static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, |
1042 | struct svc_rqst *rqstp, nfs4_verifier *verf) | 1160 | struct svc_rqst *rqstp, nfs4_verifier *verf) |
1043 | { | 1161 | { |
@@ -1060,6 +1178,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | |||
1060 | } | 1178 | } |
1061 | } | 1179 | } |
1062 | 1180 | ||
1181 | idr_init(&clp->cl_stateids); | ||
1063 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); | 1182 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); |
1064 | atomic_set(&clp->cl_refcount, 0); | 1183 | atomic_set(&clp->cl_refcount, 0); |
1065 | clp->cl_cb_state = NFSD4_CB_UNKNOWN; | 1184 | clp->cl_cb_state = NFSD4_CB_UNKNOWN; |
@@ -1083,17 +1202,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | |||
1083 | return clp; | 1202 | return clp; |
1084 | } | 1203 | } |
1085 | 1204 | ||
1086 | static int check_name(struct xdr_netobj name) | ||
1087 | { | ||
1088 | if (name.len == 0) | ||
1089 | return 0; | ||
1090 | if (name.len > NFS4_OPAQUE_LIMIT) { | ||
1091 | dprintk("NFSD: check_name: name too long(%d)!\n", name.len); | ||
1092 | return 0; | ||
1093 | } | ||
1094 | return 1; | ||
1095 | } | ||
1096 | |||
1097 | static void | 1205 | static void |
1098 | add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) | 1206 | add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) |
1099 | { | 1207 | { |
@@ -1125,8 +1233,10 @@ find_confirmed_client(clientid_t *clid) | |||
1125 | unsigned int idhashval = clientid_hashval(clid->cl_id); | 1233 | unsigned int idhashval = clientid_hashval(clid->cl_id); |
1126 | 1234 | ||
1127 | list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { | 1235 | list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { |
1128 | if (same_clid(&clp->cl_clientid, clid)) | 1236 | if (same_clid(&clp->cl_clientid, clid)) { |
1237 | renew_client(clp); | ||
1129 | return clp; | 1238 | return clp; |
1239 | } | ||
1130 | } | 1240 | } |
1131 | return NULL; | 1241 | return NULL; |
1132 | } | 1242 | } |
@@ -1173,20 +1283,6 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) | |||
1173 | return NULL; | 1283 | return NULL; |
1174 | } | 1284 | } |
1175 | 1285 | ||
1176 | static void rpc_svcaddr2sockaddr(struct sockaddr *sa, unsigned short family, union svc_addr_u *svcaddr) | ||
1177 | { | ||
1178 | switch (family) { | ||
1179 | case AF_INET: | ||
1180 | ((struct sockaddr_in *)sa)->sin_family = AF_INET; | ||
1181 | ((struct sockaddr_in *)sa)->sin_addr = svcaddr->addr; | ||
1182 | return; | ||
1183 | case AF_INET6: | ||
1184 | ((struct sockaddr_in6 *)sa)->sin6_family = AF_INET6; | ||
1185 | ((struct sockaddr_in6 *)sa)->sin6_addr = svcaddr->addr6; | ||
1186 | return; | ||
1187 | } | ||
1188 | } | ||
1189 | |||
1190 | static void | 1286 | static void |
1191 | gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) | 1287 | gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) |
1192 | { | 1288 | { |
@@ -1218,7 +1314,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r | |||
1218 | 1314 | ||
1219 | conn->cb_prog = se->se_callback_prog; | 1315 | conn->cb_prog = se->se_callback_prog; |
1220 | conn->cb_ident = se->se_callback_ident; | 1316 | conn->cb_ident = se->se_callback_ident; |
1221 | rpc_svcaddr2sockaddr((struct sockaddr *)&conn->cb_saddr, expected_family, &rqstp->rq_daddr); | 1317 | memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen); |
1222 | return; | 1318 | return; |
1223 | out_err: | 1319 | out_err: |
1224 | conn->cb_addr.ss_family = AF_UNSPEC; | 1320 | conn->cb_addr.ss_family = AF_UNSPEC; |
@@ -1350,7 +1446,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
1350 | __func__, rqstp, exid, exid->clname.len, exid->clname.data, | 1446 | __func__, rqstp, exid, exid->clname.len, exid->clname.data, |
1351 | addr_str, exid->flags, exid->spa_how); | 1447 | addr_str, exid->flags, exid->spa_how); |
1352 | 1448 | ||
1353 | if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) | 1449 | if (exid->flags & ~EXCHGID4_FLAG_MASK_A) |
1354 | return nfserr_inval; | 1450 | return nfserr_inval; |
1355 | 1451 | ||
1356 | /* Currently only support SP4_NONE */ | 1452 | /* Currently only support SP4_NONE */ |
@@ -1849,8 +1945,16 @@ out: | |||
1849 | 1945 | ||
1850 | nfsd4_get_session(cstate->session); | 1946 | nfsd4_get_session(cstate->session); |
1851 | atomic_inc(&clp->cl_refcount); | 1947 | atomic_inc(&clp->cl_refcount); |
1852 | if (clp->cl_cb_state == NFSD4_CB_DOWN) | 1948 | switch (clp->cl_cb_state) { |
1853 | seq->status_flags |= SEQ4_STATUS_CB_PATH_DOWN; | 1949 | case NFSD4_CB_DOWN: |
1950 | seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; | ||
1951 | break; | ||
1952 | case NFSD4_CB_FAULT: | ||
1953 | seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; | ||
1954 | break; | ||
1955 | default: | ||
1956 | seq->status_flags = 0; | ||
1957 | } | ||
1854 | } | 1958 | } |
1855 | kfree(conn); | 1959 | kfree(conn); |
1856 | spin_unlock(&client_lock); | 1960 | spin_unlock(&client_lock); |
@@ -1858,6 +1962,50 @@ out: | |||
1858 | return status; | 1962 | return status; |
1859 | } | 1963 | } |
1860 | 1964 | ||
1965 | static inline bool has_resources(struct nfs4_client *clp) | ||
1966 | { | ||
1967 | return !list_empty(&clp->cl_openowners) | ||
1968 | || !list_empty(&clp->cl_delegations) | ||
1969 | || !list_empty(&clp->cl_sessions); | ||
1970 | } | ||
1971 | |||
1972 | __be32 | ||
1973 | nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) | ||
1974 | { | ||
1975 | struct nfs4_client *conf, *unconf, *clp; | ||
1976 | int status = 0; | ||
1977 | |||
1978 | nfs4_lock_state(); | ||
1979 | unconf = find_unconfirmed_client(&dc->clientid); | ||
1980 | conf = find_confirmed_client(&dc->clientid); | ||
1981 | |||
1982 | if (conf) { | ||
1983 | clp = conf; | ||
1984 | |||
1985 | if (!is_client_expired(conf) && has_resources(conf)) { | ||
1986 | status = nfserr_clientid_busy; | ||
1987 | goto out; | ||
1988 | } | ||
1989 | |||
1990 | /* rfc5661 18.50.3 */ | ||
1991 | if (cstate->session && conf == cstate->session->se_client) { | ||
1992 | status = nfserr_clientid_busy; | ||
1993 | goto out; | ||
1994 | } | ||
1995 | } else if (unconf) | ||
1996 | clp = unconf; | ||
1997 | else { | ||
1998 | status = nfserr_stale_clientid; | ||
1999 | goto out; | ||
2000 | } | ||
2001 | |||
2002 | expire_client(clp); | ||
2003 | out: | ||
2004 | nfs4_unlock_state(); | ||
2005 | dprintk("%s return %d\n", __func__, ntohl(status)); | ||
2006 | return status; | ||
2007 | } | ||
2008 | |||
1861 | __be32 | 2009 | __be32 |
1862 | nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) | 2010 | nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) |
1863 | { | 2011 | { |
@@ -1900,19 +2048,13 @@ __be32 | |||
1900 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 2048 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
1901 | struct nfsd4_setclientid *setclid) | 2049 | struct nfsd4_setclientid *setclid) |
1902 | { | 2050 | { |
1903 | struct xdr_netobj clname = { | 2051 | struct xdr_netobj clname = setclid->se_name; |
1904 | .len = setclid->se_namelen, | ||
1905 | .data = setclid->se_name, | ||
1906 | }; | ||
1907 | nfs4_verifier clverifier = setclid->se_verf; | 2052 | nfs4_verifier clverifier = setclid->se_verf; |
1908 | unsigned int strhashval; | 2053 | unsigned int strhashval; |
1909 | struct nfs4_client *conf, *unconf, *new; | 2054 | struct nfs4_client *conf, *unconf, *new; |
1910 | __be32 status; | 2055 | __be32 status; |
1911 | char dname[HEXDIR_LEN]; | 2056 | char dname[HEXDIR_LEN]; |
1912 | 2057 | ||
1913 | if (!check_name(clname)) | ||
1914 | return nfserr_inval; | ||
1915 | |||
1916 | status = nfs4_make_rec_clidname(dname, &clname); | 2058 | status = nfs4_make_rec_clidname(dname, &clname); |
1917 | if (status) | 2059 | if (status) |
1918 | return status; | 2060 | return status; |
@@ -1946,7 +2088,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1946 | * of 5 bullet points, labeled as CASE0 - CASE4 below. | 2088 | * of 5 bullet points, labeled as CASE0 - CASE4 below. |
1947 | */ | 2089 | */ |
1948 | unconf = find_unconfirmed_client_by_str(dname, strhashval); | 2090 | unconf = find_unconfirmed_client_by_str(dname, strhashval); |
1949 | status = nfserr_resource; | 2091 | status = nfserr_jukebox; |
1950 | if (!conf) { | 2092 | if (!conf) { |
1951 | /* | 2093 | /* |
1952 | * RFC 3530 14.2.33 CASE 4: | 2094 | * RFC 3530 14.2.33 CASE 4: |
@@ -2116,31 +2258,28 @@ out: | |||
2116 | return status; | 2258 | return status; |
2117 | } | 2259 | } |
2118 | 2260 | ||
2261 | static struct nfs4_file *nfsd4_alloc_file(void) | ||
2262 | { | ||
2263 | return kmem_cache_alloc(file_slab, GFP_KERNEL); | ||
2264 | } | ||
2265 | |||
2119 | /* OPEN Share state helper functions */ | 2266 | /* OPEN Share state helper functions */ |
2120 | static inline struct nfs4_file * | 2267 | static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) |
2121 | alloc_init_file(struct inode *ino) | ||
2122 | { | 2268 | { |
2123 | struct nfs4_file *fp; | ||
2124 | unsigned int hashval = file_hashval(ino); | 2269 | unsigned int hashval = file_hashval(ino); |
2125 | 2270 | ||
2126 | fp = kmem_cache_alloc(file_slab, GFP_KERNEL); | 2271 | atomic_set(&fp->fi_ref, 1); |
2127 | if (fp) { | 2272 | INIT_LIST_HEAD(&fp->fi_hash); |
2128 | atomic_set(&fp->fi_ref, 1); | 2273 | INIT_LIST_HEAD(&fp->fi_stateids); |
2129 | INIT_LIST_HEAD(&fp->fi_hash); | 2274 | INIT_LIST_HEAD(&fp->fi_delegations); |
2130 | INIT_LIST_HEAD(&fp->fi_stateids); | 2275 | fp->fi_inode = igrab(ino); |
2131 | INIT_LIST_HEAD(&fp->fi_delegations); | 2276 | fp->fi_had_conflict = false; |
2132 | fp->fi_inode = igrab(ino); | 2277 | fp->fi_lease = NULL; |
2133 | fp->fi_id = current_fileid++; | 2278 | memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); |
2134 | fp->fi_had_conflict = false; | 2279 | memset(fp->fi_access, 0, sizeof(fp->fi_access)); |
2135 | fp->fi_lease = NULL; | 2280 | spin_lock(&recall_lock); |
2136 | memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); | 2281 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); |
2137 | memset(fp->fi_access, 0, sizeof(fp->fi_access)); | 2282 | spin_unlock(&recall_lock); |
2138 | spin_lock(&recall_lock); | ||
2139 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); | ||
2140 | spin_unlock(&recall_lock); | ||
2141 | return fp; | ||
2142 | } | ||
2143 | return NULL; | ||
2144 | } | 2283 | } |
2145 | 2284 | ||
2146 | static void | 2285 | static void |
@@ -2155,7 +2294,8 @@ nfsd4_free_slab(struct kmem_cache **slab) | |||
2155 | void | 2294 | void |
2156 | nfsd4_free_slabs(void) | 2295 | nfsd4_free_slabs(void) |
2157 | { | 2296 | { |
2158 | nfsd4_free_slab(&stateowner_slab); | 2297 | nfsd4_free_slab(&openowner_slab); |
2298 | nfsd4_free_slab(&lockowner_slab); | ||
2159 | nfsd4_free_slab(&file_slab); | 2299 | nfsd4_free_slab(&file_slab); |
2160 | nfsd4_free_slab(&stateid_slab); | 2300 | nfsd4_free_slab(&stateid_slab); |
2161 | nfsd4_free_slab(&deleg_slab); | 2301 | nfsd4_free_slab(&deleg_slab); |
@@ -2164,16 +2304,20 @@ nfsd4_free_slabs(void) | |||
2164 | static int | 2304 | static int |
2165 | nfsd4_init_slabs(void) | 2305 | nfsd4_init_slabs(void) |
2166 | { | 2306 | { |
2167 | stateowner_slab = kmem_cache_create("nfsd4_stateowners", | 2307 | openowner_slab = kmem_cache_create("nfsd4_openowners", |
2168 | sizeof(struct nfs4_stateowner), 0, 0, NULL); | 2308 | sizeof(struct nfs4_openowner), 0, 0, NULL); |
2169 | if (stateowner_slab == NULL) | 2309 | if (openowner_slab == NULL) |
2310 | goto out_nomem; | ||
2311 | lockowner_slab = kmem_cache_create("nfsd4_lockowners", | ||
2312 | sizeof(struct nfs4_openowner), 0, 0, NULL); | ||
2313 | if (lockowner_slab == NULL) | ||
2170 | goto out_nomem; | 2314 | goto out_nomem; |
2171 | file_slab = kmem_cache_create("nfsd4_files", | 2315 | file_slab = kmem_cache_create("nfsd4_files", |
2172 | sizeof(struct nfs4_file), 0, 0, NULL); | 2316 | sizeof(struct nfs4_file), 0, 0, NULL); |
2173 | if (file_slab == NULL) | 2317 | if (file_slab == NULL) |
2174 | goto out_nomem; | 2318 | goto out_nomem; |
2175 | stateid_slab = kmem_cache_create("nfsd4_stateids", | 2319 | stateid_slab = kmem_cache_create("nfsd4_stateids", |
2176 | sizeof(struct nfs4_stateid), 0, 0, NULL); | 2320 | sizeof(struct nfs4_ol_stateid), 0, 0, NULL); |
2177 | if (stateid_slab == NULL) | 2321 | if (stateid_slab == NULL) |
2178 | goto out_nomem; | 2322 | goto out_nomem; |
2179 | deleg_slab = kmem_cache_create("nfsd4_delegations", | 2323 | deleg_slab = kmem_cache_create("nfsd4_delegations", |
@@ -2187,97 +2331,94 @@ out_nomem: | |||
2187 | return -ENOMEM; | 2331 | return -ENOMEM; |
2188 | } | 2332 | } |
2189 | 2333 | ||
2190 | void | 2334 | void nfs4_free_openowner(struct nfs4_openowner *oo) |
2191 | nfs4_free_stateowner(struct kref *kref) | ||
2192 | { | 2335 | { |
2193 | struct nfs4_stateowner *sop = | 2336 | kfree(oo->oo_owner.so_owner.data); |
2194 | container_of(kref, struct nfs4_stateowner, so_ref); | 2337 | kmem_cache_free(openowner_slab, oo); |
2195 | kfree(sop->so_owner.data); | ||
2196 | kmem_cache_free(stateowner_slab, sop); | ||
2197 | } | 2338 | } |
2198 | 2339 | ||
2199 | static inline struct nfs4_stateowner * | 2340 | void nfs4_free_lockowner(struct nfs4_lockowner *lo) |
2200 | alloc_stateowner(struct xdr_netobj *owner) | ||
2201 | { | 2341 | { |
2202 | struct nfs4_stateowner *sop; | 2342 | kfree(lo->lo_owner.so_owner.data); |
2343 | kmem_cache_free(lockowner_slab, lo); | ||
2344 | } | ||
2203 | 2345 | ||
2204 | if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) { | 2346 | static void init_nfs4_replay(struct nfs4_replay *rp) |
2205 | if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) { | 2347 | { |
2206 | memcpy(sop->so_owner.data, owner->data, owner->len); | 2348 | rp->rp_status = nfserr_serverfault; |
2207 | sop->so_owner.len = owner->len; | 2349 | rp->rp_buflen = 0; |
2208 | kref_init(&sop->so_ref); | 2350 | rp->rp_buf = rp->rp_ibuf; |
2209 | return sop; | ||
2210 | } | ||
2211 | kmem_cache_free(stateowner_slab, sop); | ||
2212 | } | ||
2213 | return NULL; | ||
2214 | } | 2351 | } |
2215 | 2352 | ||
2216 | static struct nfs4_stateowner * | 2353 | static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) |
2217 | alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { | 2354 | { |
2218 | struct nfs4_stateowner *sop; | 2355 | struct nfs4_stateowner *sop; |
2219 | struct nfs4_replay *rp; | ||
2220 | unsigned int idhashval; | ||
2221 | 2356 | ||
2222 | if (!(sop = alloc_stateowner(&open->op_owner))) | 2357 | sop = kmem_cache_alloc(slab, GFP_KERNEL); |
2358 | if (!sop) | ||
2359 | return NULL; | ||
2360 | |||
2361 | sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL); | ||
2362 | if (!sop->so_owner.data) { | ||
2363 | kmem_cache_free(slab, sop); | ||
2223 | return NULL; | 2364 | return NULL; |
2224 | idhashval = ownerid_hashval(current_ownerid); | 2365 | } |
2225 | INIT_LIST_HEAD(&sop->so_idhash); | 2366 | sop->so_owner.len = owner->len; |
2226 | INIT_LIST_HEAD(&sop->so_strhash); | 2367 | |
2227 | INIT_LIST_HEAD(&sop->so_perclient); | ||
2228 | INIT_LIST_HEAD(&sop->so_stateids); | 2368 | INIT_LIST_HEAD(&sop->so_stateids); |
2229 | INIT_LIST_HEAD(&sop->so_perstateid); /* not used */ | ||
2230 | INIT_LIST_HEAD(&sop->so_close_lru); | ||
2231 | sop->so_time = 0; | ||
2232 | list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); | ||
2233 | list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); | ||
2234 | list_add(&sop->so_perclient, &clp->cl_openowners); | ||
2235 | sop->so_is_open_owner = 1; | ||
2236 | sop->so_id = current_ownerid++; | ||
2237 | sop->so_client = clp; | 2369 | sop->so_client = clp; |
2238 | sop->so_seqid = open->op_seqid; | 2370 | init_nfs4_replay(&sop->so_replay); |
2239 | sop->so_confirmed = 0; | ||
2240 | rp = &sop->so_replay; | ||
2241 | rp->rp_status = nfserr_serverfault; | ||
2242 | rp->rp_buflen = 0; | ||
2243 | rp->rp_buf = rp->rp_ibuf; | ||
2244 | return sop; | 2371 | return sop; |
2245 | } | 2372 | } |
2246 | 2373 | ||
2247 | static inline void | 2374 | static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) |
2248 | init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { | 2375 | { |
2249 | struct nfs4_stateowner *sop = open->op_stateowner; | 2376 | list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]); |
2250 | unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); | 2377 | list_add(&oo->oo_perclient, &clp->cl_openowners); |
2378 | } | ||
2251 | 2379 | ||
2252 | INIT_LIST_HEAD(&stp->st_hash); | 2380 | static struct nfs4_openowner * |
2253 | INIT_LIST_HEAD(&stp->st_perstateowner); | 2381 | alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { |
2382 | struct nfs4_openowner *oo; | ||
2383 | |||
2384 | oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); | ||
2385 | if (!oo) | ||
2386 | return NULL; | ||
2387 | oo->oo_owner.so_is_open_owner = 1; | ||
2388 | oo->oo_owner.so_seqid = open->op_seqid; | ||
2389 | oo->oo_flags = NFS4_OO_NEW; | ||
2390 | oo->oo_time = 0; | ||
2391 | oo->oo_last_closed_stid = NULL; | ||
2392 | INIT_LIST_HEAD(&oo->oo_close_lru); | ||
2393 | hash_openowner(oo, clp, strhashval); | ||
2394 | return oo; | ||
2395 | } | ||
2396 | |||
2397 | static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { | ||
2398 | struct nfs4_openowner *oo = open->op_openowner; | ||
2399 | struct nfs4_client *clp = oo->oo_owner.so_client; | ||
2400 | |||
2401 | init_stid(&stp->st_stid, clp, NFS4_OPEN_STID); | ||
2254 | INIT_LIST_HEAD(&stp->st_lockowners); | 2402 | INIT_LIST_HEAD(&stp->st_lockowners); |
2255 | INIT_LIST_HEAD(&stp->st_perfile); | 2403 | list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); |
2256 | list_add(&stp->st_hash, &stateid_hashtbl[hashval]); | ||
2257 | list_add(&stp->st_perstateowner, &sop->so_stateids); | ||
2258 | list_add(&stp->st_perfile, &fp->fi_stateids); | 2404 | list_add(&stp->st_perfile, &fp->fi_stateids); |
2259 | stp->st_stateowner = sop; | 2405 | stp->st_stateowner = &oo->oo_owner; |
2260 | get_nfs4_file(fp); | 2406 | get_nfs4_file(fp); |
2261 | stp->st_file = fp; | 2407 | stp->st_file = fp; |
2262 | stp->st_stateid.si_boot = boot_time; | ||
2263 | stp->st_stateid.si_stateownerid = sop->so_id; | ||
2264 | stp->st_stateid.si_fileid = fp->fi_id; | ||
2265 | stp->st_stateid.si_generation = 0; | ||
2266 | stp->st_access_bmap = 0; | 2408 | stp->st_access_bmap = 0; |
2267 | stp->st_deny_bmap = 0; | 2409 | stp->st_deny_bmap = 0; |
2268 | __set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK, | 2410 | __set_bit(open->op_share_access, &stp->st_access_bmap); |
2269 | &stp->st_access_bmap); | ||
2270 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); | 2411 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); |
2271 | stp->st_openstp = NULL; | 2412 | stp->st_openstp = NULL; |
2272 | } | 2413 | } |
2273 | 2414 | ||
2274 | static void | 2415 | static void |
2275 | move_to_close_lru(struct nfs4_stateowner *sop) | 2416 | move_to_close_lru(struct nfs4_openowner *oo) |
2276 | { | 2417 | { |
2277 | dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); | 2418 | dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); |
2278 | 2419 | ||
2279 | list_move_tail(&sop->so_close_lru, &close_lru); | 2420 | list_move_tail(&oo->oo_close_lru, &close_lru); |
2280 | sop->so_time = get_seconds(); | 2421 | oo->oo_time = get_seconds(); |
2281 | } | 2422 | } |
2282 | 2423 | ||
2283 | static int | 2424 | static int |
@@ -2289,14 +2430,18 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, | |||
2289 | (sop->so_client->cl_clientid.cl_id == clid->cl_id); | 2430 | (sop->so_client->cl_clientid.cl_id == clid->cl_id); |
2290 | } | 2431 | } |
2291 | 2432 | ||
2292 | static struct nfs4_stateowner * | 2433 | static struct nfs4_openowner * |
2293 | find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) | 2434 | find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) |
2294 | { | 2435 | { |
2295 | struct nfs4_stateowner *so = NULL; | 2436 | struct nfs4_stateowner *so; |
2437 | struct nfs4_openowner *oo; | ||
2296 | 2438 | ||
2297 | list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { | 2439 | list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) { |
2298 | if (same_owner_str(so, &open->op_owner, &open->op_clientid)) | 2440 | if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { |
2299 | return so; | 2441 | oo = openowner(so); |
2442 | renew_client(oo->oo_owner.so_client); | ||
2443 | return oo; | ||
2444 | } | ||
2300 | } | 2445 | } |
2301 | return NULL; | 2446 | return NULL; |
2302 | } | 2447 | } |
@@ -2320,31 +2465,6 @@ find_file(struct inode *ino) | |||
2320 | return NULL; | 2465 | return NULL; |
2321 | } | 2466 | } |
2322 | 2467 | ||
2323 | static inline int access_valid(u32 x, u32 minorversion) | ||
2324 | { | ||
2325 | if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ) | ||
2326 | return 0; | ||
2327 | if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH) | ||
2328 | return 0; | ||
2329 | x &= ~NFS4_SHARE_ACCESS_MASK; | ||
2330 | if (minorversion && x) { | ||
2331 | if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL) | ||
2332 | return 0; | ||
2333 | if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED) | ||
2334 | return 0; | ||
2335 | x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK); | ||
2336 | } | ||
2337 | if (x) | ||
2338 | return 0; | ||
2339 | return 1; | ||
2340 | } | ||
2341 | |||
2342 | static inline int deny_valid(u32 x) | ||
2343 | { | ||
2344 | /* Note: unlike access bits, deny bits may be zero. */ | ||
2345 | return x <= NFS4_SHARE_DENY_BOTH; | ||
2346 | } | ||
2347 | |||
2348 | /* | 2468 | /* |
2349 | * Called to check deny when READ with all zero stateid or | 2469 | * Called to check deny when READ with all zero stateid or |
2350 | * WRITE with all zero or all one stateid | 2470 | * WRITE with all zero or all one stateid |
@@ -2354,7 +2474,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) | |||
2354 | { | 2474 | { |
2355 | struct inode *ino = current_fh->fh_dentry->d_inode; | 2475 | struct inode *ino = current_fh->fh_dentry->d_inode; |
2356 | struct nfs4_file *fp; | 2476 | struct nfs4_file *fp; |
2357 | struct nfs4_stateid *stp; | 2477 | struct nfs4_ol_stateid *stp; |
2358 | __be32 ret; | 2478 | __be32 ret; |
2359 | 2479 | ||
2360 | dprintk("NFSD: nfs4_share_conflict\n"); | 2480 | dprintk("NFSD: nfs4_share_conflict\n"); |
@@ -2429,6 +2549,16 @@ static const struct lock_manager_operations nfsd_lease_mng_ops = { | |||
2429 | .lm_change = nfsd_change_deleg_cb, | 2549 | .lm_change = nfsd_change_deleg_cb, |
2430 | }; | 2550 | }; |
2431 | 2551 | ||
2552 | static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid) | ||
2553 | { | ||
2554 | if (nfsd4_has_session(cstate)) | ||
2555 | return nfs_ok; | ||
2556 | if (seqid == so->so_seqid - 1) | ||
2557 | return nfserr_replay_me; | ||
2558 | if (seqid == so->so_seqid) | ||
2559 | return nfs_ok; | ||
2560 | return nfserr_bad_seqid; | ||
2561 | } | ||
2432 | 2562 | ||
2433 | __be32 | 2563 | __be32 |
2434 | nfsd4_process_open1(struct nfsd4_compound_state *cstate, | 2564 | nfsd4_process_open1(struct nfsd4_compound_state *cstate, |
@@ -2437,57 +2567,49 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, | |||
2437 | clientid_t *clientid = &open->op_clientid; | 2567 | clientid_t *clientid = &open->op_clientid; |
2438 | struct nfs4_client *clp = NULL; | 2568 | struct nfs4_client *clp = NULL; |
2439 | unsigned int strhashval; | 2569 | unsigned int strhashval; |
2440 | struct nfs4_stateowner *sop = NULL; | 2570 | struct nfs4_openowner *oo = NULL; |
2441 | 2571 | __be32 status; | |
2442 | if (!check_name(open->op_owner)) | ||
2443 | return nfserr_inval; | ||
2444 | 2572 | ||
2445 | if (STALE_CLIENTID(&open->op_clientid)) | 2573 | if (STALE_CLIENTID(&open->op_clientid)) |
2446 | return nfserr_stale_clientid; | 2574 | return nfserr_stale_clientid; |
2575 | /* | ||
2576 | * In case we need it later, after we've already created the | ||
2577 | * file and don't want to risk a further failure: | ||
2578 | */ | ||
2579 | open->op_file = nfsd4_alloc_file(); | ||
2580 | if (open->op_file == NULL) | ||
2581 | return nfserr_jukebox; | ||
2447 | 2582 | ||
2448 | strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); | 2583 | strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner); |
2449 | sop = find_openstateowner_str(strhashval, open); | 2584 | oo = find_openstateowner_str(strhashval, open); |
2450 | open->op_stateowner = sop; | 2585 | open->op_openowner = oo; |
2451 | if (!sop) { | 2586 | if (!oo) { |
2452 | /* Make sure the client's lease hasn't expired. */ | ||
2453 | clp = find_confirmed_client(clientid); | 2587 | clp = find_confirmed_client(clientid); |
2454 | if (clp == NULL) | 2588 | if (clp == NULL) |
2455 | return nfserr_expired; | 2589 | return nfserr_expired; |
2456 | goto renew; | 2590 | goto new_owner; |
2457 | } | 2591 | } |
2458 | /* When sessions are used, skip open sequenceid processing */ | 2592 | if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { |
2459 | if (nfsd4_has_session(cstate)) | ||
2460 | goto renew; | ||
2461 | if (!sop->so_confirmed) { | ||
2462 | /* Replace unconfirmed owners without checking for replay. */ | 2593 | /* Replace unconfirmed owners without checking for replay. */ |
2463 | clp = sop->so_client; | 2594 | clp = oo->oo_owner.so_client; |
2464 | release_openowner(sop); | 2595 | release_openowner(oo); |
2465 | open->op_stateowner = NULL; | 2596 | open->op_openowner = NULL; |
2466 | goto renew; | 2597 | goto new_owner; |
2467 | } | ||
2468 | if (open->op_seqid == sop->so_seqid - 1) { | ||
2469 | if (sop->so_replay.rp_buflen) | ||
2470 | return nfserr_replay_me; | ||
2471 | /* The original OPEN failed so spectacularly | ||
2472 | * that we don't even have replay data saved! | ||
2473 | * Therefore, we have no choice but to continue | ||
2474 | * processing this OPEN; presumably, we'll | ||
2475 | * fail again for the same reason. | ||
2476 | */ | ||
2477 | dprintk("nfsd4_process_open1: replay with no replay cache\n"); | ||
2478 | goto renew; | ||
2479 | } | ||
2480 | if (open->op_seqid != sop->so_seqid) | ||
2481 | return nfserr_bad_seqid; | ||
2482 | renew: | ||
2483 | if (open->op_stateowner == NULL) { | ||
2484 | sop = alloc_init_open_stateowner(strhashval, clp, open); | ||
2485 | if (sop == NULL) | ||
2486 | return nfserr_resource; | ||
2487 | open->op_stateowner = sop; | ||
2488 | } | 2598 | } |
2489 | list_del_init(&sop->so_close_lru); | 2599 | status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); |
2490 | renew_client(sop->so_client); | 2600 | if (status) |
2601 | return status; | ||
2602 | clp = oo->oo_owner.so_client; | ||
2603 | goto alloc_stateid; | ||
2604 | new_owner: | ||
2605 | oo = alloc_init_open_stateowner(strhashval, clp, open); | ||
2606 | if (oo == NULL) | ||
2607 | return nfserr_jukebox; | ||
2608 | open->op_openowner = oo; | ||
2609 | alloc_stateid: | ||
2610 | open->op_stp = nfs4_alloc_stateid(clp); | ||
2611 | if (!open->op_stp) | ||
2612 | return nfserr_jukebox; | ||
2491 | return nfs_ok; | 2613 | return nfs_ok; |
2492 | } | 2614 | } |
2493 | 2615 | ||
@@ -2500,36 +2622,37 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) | |||
2500 | return nfs_ok; | 2622 | return nfs_ok; |
2501 | } | 2623 | } |
2502 | 2624 | ||
2503 | static struct nfs4_delegation * | 2625 | static int share_access_to_flags(u32 share_access) |
2504 | find_delegation_file(struct nfs4_file *fp, stateid_t *stid) | ||
2505 | { | 2626 | { |
2506 | struct nfs4_delegation *dp; | 2627 | share_access &= ~NFS4_SHARE_WANT_MASK; |
2507 | 2628 | ||
2508 | spin_lock(&recall_lock); | 2629 | return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; |
2509 | list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) | ||
2510 | if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) { | ||
2511 | spin_unlock(&recall_lock); | ||
2512 | return dp; | ||
2513 | } | ||
2514 | spin_unlock(&recall_lock); | ||
2515 | return NULL; | ||
2516 | } | 2630 | } |
2517 | 2631 | ||
2518 | static int share_access_to_flags(u32 share_access) | 2632 | static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s) |
2519 | { | 2633 | { |
2520 | share_access &= ~NFS4_SHARE_WANT_MASK; | 2634 | struct nfs4_stid *ret; |
2521 | 2635 | ||
2522 | return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; | 2636 | ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); |
2637 | if (!ret) | ||
2638 | return NULL; | ||
2639 | return delegstateid(ret); | ||
2640 | } | ||
2641 | |||
2642 | static bool nfsd4_is_deleg_cur(struct nfsd4_open *open) | ||
2643 | { | ||
2644 | return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR || | ||
2645 | open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH; | ||
2523 | } | 2646 | } |
2524 | 2647 | ||
2525 | static __be32 | 2648 | static __be32 |
2526 | nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, | 2649 | nfs4_check_deleg(struct nfs4_client *cl, struct nfs4_file *fp, struct nfsd4_open *open, |
2527 | struct nfs4_delegation **dp) | 2650 | struct nfs4_delegation **dp) |
2528 | { | 2651 | { |
2529 | int flags; | 2652 | int flags; |
2530 | __be32 status = nfserr_bad_stateid; | 2653 | __be32 status = nfserr_bad_stateid; |
2531 | 2654 | ||
2532 | *dp = find_delegation_file(fp, &open->op_delegate_stateid); | 2655 | *dp = find_deleg_stateid(cl, &open->op_delegate_stateid); |
2533 | if (*dp == NULL) | 2656 | if (*dp == NULL) |
2534 | goto out; | 2657 | goto out; |
2535 | flags = share_access_to_flags(open->op_share_access); | 2658 | flags = share_access_to_flags(open->op_share_access); |
@@ -2537,41 +2660,37 @@ nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, | |||
2537 | if (status) | 2660 | if (status) |
2538 | *dp = NULL; | 2661 | *dp = NULL; |
2539 | out: | 2662 | out: |
2540 | if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR) | 2663 | if (!nfsd4_is_deleg_cur(open)) |
2541 | return nfs_ok; | 2664 | return nfs_ok; |
2542 | if (status) | 2665 | if (status) |
2543 | return status; | 2666 | return status; |
2544 | open->op_stateowner->so_confirmed = 1; | 2667 | open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; |
2545 | return nfs_ok; | 2668 | return nfs_ok; |
2546 | } | 2669 | } |
2547 | 2670 | ||
2548 | static __be32 | 2671 | static __be32 |
2549 | nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp) | 2672 | nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp) |
2550 | { | 2673 | { |
2551 | struct nfs4_stateid *local; | 2674 | struct nfs4_ol_stateid *local; |
2552 | __be32 status = nfserr_share_denied; | 2675 | struct nfs4_openowner *oo = open->op_openowner; |
2553 | struct nfs4_stateowner *sop = open->op_stateowner; | ||
2554 | 2676 | ||
2555 | list_for_each_entry(local, &fp->fi_stateids, st_perfile) { | 2677 | list_for_each_entry(local, &fp->fi_stateids, st_perfile) { |
2556 | /* ignore lock owners */ | 2678 | /* ignore lock owners */ |
2557 | if (local->st_stateowner->so_is_open_owner == 0) | 2679 | if (local->st_stateowner->so_is_open_owner == 0) |
2558 | continue; | 2680 | continue; |
2559 | /* remember if we have seen this open owner */ | 2681 | /* remember if we have seen this open owner */ |
2560 | if (local->st_stateowner == sop) | 2682 | if (local->st_stateowner == &oo->oo_owner) |
2561 | *stpp = local; | 2683 | *stpp = local; |
2562 | /* check for conflicting share reservations */ | 2684 | /* check for conflicting share reservations */ |
2563 | if (!test_share(local, open)) | 2685 | if (!test_share(local, open)) |
2564 | goto out; | 2686 | return nfserr_share_denied; |
2565 | } | 2687 | } |
2566 | status = 0; | 2688 | return nfs_ok; |
2567 | out: | ||
2568 | return status; | ||
2569 | } | 2689 | } |
2570 | 2690 | ||
2571 | static inline struct nfs4_stateid * | 2691 | static void nfs4_free_stateid(struct nfs4_ol_stateid *s) |
2572 | nfs4_alloc_stateid(void) | ||
2573 | { | 2692 | { |
2574 | return kmem_cache_alloc(stateid_slab, GFP_KERNEL); | 2693 | kmem_cache_free(stateid_slab, s); |
2575 | } | 2694 | } |
2576 | 2695 | ||
2577 | static inline int nfs4_access_to_access(u32 nfs4_access) | 2696 | static inline int nfs4_access_to_access(u32 nfs4_access) |
@@ -2592,12 +2711,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, | |||
2592 | int oflag = nfs4_access_to_omode(open->op_share_access); | 2711 | int oflag = nfs4_access_to_omode(open->op_share_access); |
2593 | int access = nfs4_access_to_access(open->op_share_access); | 2712 | int access = nfs4_access_to_access(open->op_share_access); |
2594 | 2713 | ||
2595 | /* CLAIM_DELEGATE_CUR is used in response to a broken lease; | ||
2596 | * allowing it to break the lease and return EAGAIN leaves the | ||
2597 | * client unable to make progress in returning the delegation */ | ||
2598 | if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) | ||
2599 | access |= NFSD_MAY_NOT_BREAK_LEASE; | ||
2600 | |||
2601 | if (!fp->fi_fds[oflag]) { | 2714 | if (!fp->fi_fds[oflag]) { |
2602 | status = nfsd_open(rqstp, cur_fh, S_IFREG, access, | 2715 | status = nfsd_open(rqstp, cur_fh, S_IFREG, access, |
2603 | &fp->fi_fds[oflag]); | 2716 | &fp->fi_fds[oflag]); |
@@ -2609,27 +2722,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, | |||
2609 | return nfs_ok; | 2722 | return nfs_ok; |
2610 | } | 2723 | } |
2611 | 2724 | ||
2612 | static __be32 | ||
2613 | nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, | ||
2614 | struct nfs4_file *fp, struct svc_fh *cur_fh, | ||
2615 | struct nfsd4_open *open) | ||
2616 | { | ||
2617 | struct nfs4_stateid *stp; | ||
2618 | __be32 status; | ||
2619 | |||
2620 | stp = nfs4_alloc_stateid(); | ||
2621 | if (stp == NULL) | ||
2622 | return nfserr_resource; | ||
2623 | |||
2624 | status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); | ||
2625 | if (status) { | ||
2626 | kmem_cache_free(stateid_slab, stp); | ||
2627 | return status; | ||
2628 | } | ||
2629 | *stpp = stp; | ||
2630 | return 0; | ||
2631 | } | ||
2632 | |||
2633 | static inline __be32 | 2725 | static inline __be32 |
2634 | nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, | 2726 | nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, |
2635 | struct nfsd4_open *open) | 2727 | struct nfsd4_open *open) |
@@ -2646,9 +2738,9 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, | |||
2646 | } | 2738 | } |
2647 | 2739 | ||
2648 | static __be32 | 2740 | static __be32 |
2649 | nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) | 2741 | nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) |
2650 | { | 2742 | { |
2651 | u32 op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; | 2743 | u32 op_share_access = open->op_share_access; |
2652 | bool new_access; | 2744 | bool new_access; |
2653 | __be32 status; | 2745 | __be32 status; |
2654 | 2746 | ||
@@ -2677,8 +2769,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c | |||
2677 | static void | 2769 | static void |
2678 | nfs4_set_claim_prev(struct nfsd4_open *open) | 2770 | nfs4_set_claim_prev(struct nfsd4_open *open) |
2679 | { | 2771 | { |
2680 | open->op_stateowner->so_confirmed = 1; | 2772 | open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; |
2681 | open->op_stateowner->so_client->cl_firststate = 1; | 2773 | open->op_openowner->oo_owner.so_client->cl_firststate = 1; |
2682 | } | 2774 | } |
2683 | 2775 | ||
2684 | /* Should we give out recallable state?: */ | 2776 | /* Should we give out recallable state?: */ |
@@ -2721,7 +2813,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) | |||
2721 | if (!fl) | 2813 | if (!fl) |
2722 | return -ENOMEM; | 2814 | return -ENOMEM; |
2723 | fl->fl_file = find_readable_file(fp); | 2815 | fl->fl_file = find_readable_file(fp); |
2724 | list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); | 2816 | list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); |
2725 | status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); | 2817 | status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); |
2726 | if (status) { | 2818 | if (status) { |
2727 | list_del_init(&dp->dl_perclnt); | 2819 | list_del_init(&dp->dl_perclnt); |
@@ -2750,7 +2842,7 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) | |||
2750 | atomic_inc(&fp->fi_delegees); | 2842 | atomic_inc(&fp->fi_delegees); |
2751 | list_add(&dp->dl_perfile, &fp->fi_delegations); | 2843 | list_add(&dp->dl_perfile, &fp->fi_delegations); |
2752 | spin_unlock(&recall_lock); | 2844 | spin_unlock(&recall_lock); |
2753 | list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); | 2845 | list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); |
2754 | return 0; | 2846 | return 0; |
2755 | } | 2847 | } |
2756 | 2848 | ||
@@ -2758,14 +2850,14 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) | |||
2758 | * Attempt to hand out a delegation. | 2850 | * Attempt to hand out a delegation. |
2759 | */ | 2851 | */ |
2760 | static void | 2852 | static void |
2761 | nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp) | 2853 | nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_stateid *stp) |
2762 | { | 2854 | { |
2763 | struct nfs4_delegation *dp; | 2855 | struct nfs4_delegation *dp; |
2764 | struct nfs4_stateowner *sop = stp->st_stateowner; | 2856 | struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); |
2765 | int cb_up; | 2857 | int cb_up; |
2766 | int status, flag = 0; | 2858 | int status, flag = 0; |
2767 | 2859 | ||
2768 | cb_up = nfsd4_cb_channel_good(sop->so_client); | 2860 | cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); |
2769 | flag = NFS4_OPEN_DELEGATE_NONE; | 2861 | flag = NFS4_OPEN_DELEGATE_NONE; |
2770 | open->op_recall = 0; | 2862 | open->op_recall = 0; |
2771 | switch (open->op_claim_type) { | 2863 | switch (open->op_claim_type) { |
@@ -2781,7 +2873,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
2781 | * had the chance to reclaim theirs.... */ | 2873 | * had the chance to reclaim theirs.... */ |
2782 | if (locks_in_grace()) | 2874 | if (locks_in_grace()) |
2783 | goto out; | 2875 | goto out; |
2784 | if (!cb_up || !sop->so_confirmed) | 2876 | if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) |
2785 | goto out; | 2877 | goto out; |
2786 | if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) | 2878 | if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) |
2787 | flag = NFS4_OPEN_DELEGATE_WRITE; | 2879 | flag = NFS4_OPEN_DELEGATE_WRITE; |
@@ -2792,17 +2884,17 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
2792 | goto out; | 2884 | goto out; |
2793 | } | 2885 | } |
2794 | 2886 | ||
2795 | dp = alloc_init_deleg(sop->so_client, stp, fh, flag); | 2887 | dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); |
2796 | if (dp == NULL) | 2888 | if (dp == NULL) |
2797 | goto out_no_deleg; | 2889 | goto out_no_deleg; |
2798 | status = nfs4_set_delegation(dp, flag); | 2890 | status = nfs4_set_delegation(dp, flag); |
2799 | if (status) | 2891 | if (status) |
2800 | goto out_free; | 2892 | goto out_free; |
2801 | 2893 | ||
2802 | memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); | 2894 | memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); |
2803 | 2895 | ||
2804 | dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", | 2896 | dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", |
2805 | STATEID_VAL(&dp->dl_stateid)); | 2897 | STATEID_VAL(&dp->dl_stid.sc_stateid)); |
2806 | out: | 2898 | out: |
2807 | if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS | 2899 | if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS |
2808 | && flag == NFS4_OPEN_DELEGATE_NONE | 2900 | && flag == NFS4_OPEN_DELEGATE_NONE |
@@ -2824,16 +2916,13 @@ __be32 | |||
2824 | nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) | 2916 | nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) |
2825 | { | 2917 | { |
2826 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | 2918 | struct nfsd4_compoundres *resp = rqstp->rq_resp; |
2919 | struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; | ||
2827 | struct nfs4_file *fp = NULL; | 2920 | struct nfs4_file *fp = NULL; |
2828 | struct inode *ino = current_fh->fh_dentry->d_inode; | 2921 | struct inode *ino = current_fh->fh_dentry->d_inode; |
2829 | struct nfs4_stateid *stp = NULL; | 2922 | struct nfs4_ol_stateid *stp = NULL; |
2830 | struct nfs4_delegation *dp = NULL; | 2923 | struct nfs4_delegation *dp = NULL; |
2831 | __be32 status; | 2924 | __be32 status; |
2832 | 2925 | ||
2833 | status = nfserr_inval; | ||
2834 | if (!access_valid(open->op_share_access, resp->cstate.minorversion) | ||
2835 | || !deny_valid(open->op_share_deny)) | ||
2836 | goto out; | ||
2837 | /* | 2926 | /* |
2838 | * Lookup file; if found, lookup stateid and check open request, | 2927 | * Lookup file; if found, lookup stateid and check open request, |
2839 | * and check for delegations in the process of being recalled. | 2928 | * and check for delegations in the process of being recalled. |
@@ -2843,17 +2932,17 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf | |||
2843 | if (fp) { | 2932 | if (fp) { |
2844 | if ((status = nfs4_check_open(fp, open, &stp))) | 2933 | if ((status = nfs4_check_open(fp, open, &stp))) |
2845 | goto out; | 2934 | goto out; |
2846 | status = nfs4_check_deleg(fp, open, &dp); | 2935 | status = nfs4_check_deleg(cl, fp, open, &dp); |
2847 | if (status) | 2936 | if (status) |
2848 | goto out; | 2937 | goto out; |
2849 | } else { | 2938 | } else { |
2850 | status = nfserr_bad_stateid; | 2939 | status = nfserr_bad_stateid; |
2851 | if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) | 2940 | if (nfsd4_is_deleg_cur(open)) |
2852 | goto out; | ||
2853 | status = nfserr_resource; | ||
2854 | fp = alloc_init_file(ino); | ||
2855 | if (fp == NULL) | ||
2856 | goto out; | 2941 | goto out; |
2942 | status = nfserr_jukebox; | ||
2943 | fp = open->op_file; | ||
2944 | open->op_file = NULL; | ||
2945 | nfsd4_init_file(fp, ino); | ||
2857 | } | 2946 | } |
2858 | 2947 | ||
2859 | /* | 2948 | /* |
@@ -2865,24 +2954,24 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf | |||
2865 | status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); | 2954 | status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); |
2866 | if (status) | 2955 | if (status) |
2867 | goto out; | 2956 | goto out; |
2868 | update_stateid(&stp->st_stateid); | ||
2869 | } else { | 2957 | } else { |
2870 | status = nfs4_new_open(rqstp, &stp, fp, current_fh, open); | 2958 | status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); |
2871 | if (status) | 2959 | if (status) |
2872 | goto out; | 2960 | goto out; |
2873 | init_stateid(stp, fp, open); | 2961 | stp = open->op_stp; |
2962 | open->op_stp = NULL; | ||
2963 | init_open_stateid(stp, fp, open); | ||
2874 | status = nfsd4_truncate(rqstp, current_fh, open); | 2964 | status = nfsd4_truncate(rqstp, current_fh, open); |
2875 | if (status) { | 2965 | if (status) { |
2876 | release_open_stateid(stp); | 2966 | release_open_stateid(stp); |
2877 | goto out; | 2967 | goto out; |
2878 | } | 2968 | } |
2879 | if (nfsd4_has_session(&resp->cstate)) | ||
2880 | update_stateid(&stp->st_stateid); | ||
2881 | } | 2969 | } |
2882 | memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); | 2970 | update_stateid(&stp->st_stid.sc_stateid); |
2971 | memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); | ||
2883 | 2972 | ||
2884 | if (nfsd4_has_session(&resp->cstate)) | 2973 | if (nfsd4_has_session(&resp->cstate)) |
2885 | open->op_stateowner->so_confirmed = 1; | 2974 | open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; |
2886 | 2975 | ||
2887 | /* | 2976 | /* |
2888 | * Attempt to hand out a delegation. No error return, because the | 2977 | * Attempt to hand out a delegation. No error return, because the |
@@ -2893,7 +2982,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf | |||
2893 | status = nfs_ok; | 2982 | status = nfs_ok; |
2894 | 2983 | ||
2895 | dprintk("%s: stateid=" STATEID_FMT "\n", __func__, | 2984 | dprintk("%s: stateid=" STATEID_FMT "\n", __func__, |
2896 | STATEID_VAL(&stp->st_stateid)); | 2985 | STATEID_VAL(&stp->st_stid.sc_stateid)); |
2897 | out: | 2986 | out: |
2898 | if (fp) | 2987 | if (fp) |
2899 | put_nfs4_file(fp); | 2988 | put_nfs4_file(fp); |
@@ -2903,13 +2992,34 @@ out: | |||
2903 | * To finish the open response, we just need to set the rflags. | 2992 | * To finish the open response, we just need to set the rflags. |
2904 | */ | 2993 | */ |
2905 | open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; | 2994 | open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; |
2906 | if (!open->op_stateowner->so_confirmed && | 2995 | if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && |
2907 | !nfsd4_has_session(&resp->cstate)) | 2996 | !nfsd4_has_session(&resp->cstate)) |
2908 | open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; | 2997 | open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; |
2909 | 2998 | ||
2910 | return status; | 2999 | return status; |
2911 | } | 3000 | } |
2912 | 3001 | ||
3002 | void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) | ||
3003 | { | ||
3004 | if (open->op_openowner) { | ||
3005 | struct nfs4_openowner *oo = open->op_openowner; | ||
3006 | |||
3007 | if (!list_empty(&oo->oo_owner.so_stateids)) | ||
3008 | list_del_init(&oo->oo_close_lru); | ||
3009 | if (oo->oo_flags & NFS4_OO_NEW) { | ||
3010 | if (status) { | ||
3011 | release_openowner(oo); | ||
3012 | open->op_openowner = NULL; | ||
3013 | } else | ||
3014 | oo->oo_flags &= ~NFS4_OO_NEW; | ||
3015 | } | ||
3016 | } | ||
3017 | if (open->op_file) | ||
3018 | nfsd4_free_file(open->op_file); | ||
3019 | if (open->op_stp) | ||
3020 | nfs4_free_stateid(open->op_stp); | ||
3021 | } | ||
3022 | |||
2913 | __be32 | 3023 | __be32 |
2914 | nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 3024 | nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
2915 | clientid_t *clid) | 3025 | clientid_t *clid) |
@@ -2930,7 +3040,6 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2930 | dprintk("nfsd4_renew: clientid not found!\n"); | 3040 | dprintk("nfsd4_renew: clientid not found!\n"); |
2931 | goto out; | 3041 | goto out; |
2932 | } | 3042 | } |
2933 | renew_client(clp); | ||
2934 | status = nfserr_cb_path_down; | 3043 | status = nfserr_cb_path_down; |
2935 | if (!list_empty(&clp->cl_delegations) | 3044 | if (!list_empty(&clp->cl_delegations) |
2936 | && clp->cl_cb_state != NFSD4_CB_UP) | 3045 | && clp->cl_cb_state != NFSD4_CB_UP) |
@@ -2962,7 +3071,7 @@ static time_t | |||
2962 | nfs4_laundromat(void) | 3071 | nfs4_laundromat(void) |
2963 | { | 3072 | { |
2964 | struct nfs4_client *clp; | 3073 | struct nfs4_client *clp; |
2965 | struct nfs4_stateowner *sop; | 3074 | struct nfs4_openowner *oo; |
2966 | struct nfs4_delegation *dp; | 3075 | struct nfs4_delegation *dp; |
2967 | struct list_head *pos, *next, reaplist; | 3076 | struct list_head *pos, *next, reaplist; |
2968 | time_t cutoff = get_seconds() - nfsd4_lease; | 3077 | time_t cutoff = get_seconds() - nfsd4_lease; |
@@ -3019,16 +3128,14 @@ nfs4_laundromat(void) | |||
3019 | } | 3128 | } |
3020 | test_val = nfsd4_lease; | 3129 | test_val = nfsd4_lease; |
3021 | list_for_each_safe(pos, next, &close_lru) { | 3130 | list_for_each_safe(pos, next, &close_lru) { |
3022 | sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); | 3131 | oo = container_of(pos, struct nfs4_openowner, oo_close_lru); |
3023 | if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { | 3132 | if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { |
3024 | u = sop->so_time - cutoff; | 3133 | u = oo->oo_time - cutoff; |
3025 | if (test_val > u) | 3134 | if (test_val > u) |
3026 | test_val = u; | 3135 | test_val = u; |
3027 | break; | 3136 | break; |
3028 | } | 3137 | } |
3029 | dprintk("NFSD: purging unused open stateowner (so_id %d)\n", | 3138 | release_openowner(oo); |
3030 | sop->so_id); | ||
3031 | release_openowner(sop); | ||
3032 | } | 3139 | } |
3033 | if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) | 3140 | if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) |
3034 | clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; | 3141 | clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; |
@@ -3050,30 +3157,17 @@ laundromat_main(struct work_struct *not_used) | |||
3050 | queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); | 3157 | queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); |
3051 | } | 3158 | } |
3052 | 3159 | ||
3053 | static struct nfs4_stateowner * | 3160 | static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) |
3054 | search_close_lru(u32 st_id, int flags) | ||
3055 | { | 3161 | { |
3056 | struct nfs4_stateowner *local = NULL; | 3162 | if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode) |
3057 | 3163 | return nfserr_bad_stateid; | |
3058 | if (flags & CLOSE_STATE) { | 3164 | return nfs_ok; |
3059 | list_for_each_entry(local, &close_lru, so_close_lru) { | ||
3060 | if (local->so_id == st_id) | ||
3061 | return local; | ||
3062 | } | ||
3063 | } | ||
3064 | return NULL; | ||
3065 | } | ||
3066 | |||
3067 | static inline int | ||
3068 | nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) | ||
3069 | { | ||
3070 | return fhp->fh_dentry->d_inode != stp->st_file->fi_inode; | ||
3071 | } | 3165 | } |
3072 | 3166 | ||
3073 | static int | 3167 | static int |
3074 | STALE_STATEID(stateid_t *stateid) | 3168 | STALE_STATEID(stateid_t *stateid) |
3075 | { | 3169 | { |
3076 | if (stateid->si_boot == boot_time) | 3170 | if (stateid->si_opaque.so_clid.cl_boot == boot_time) |
3077 | return 0; | 3171 | return 0; |
3078 | dprintk("NFSD: stale stateid " STATEID_FMT "!\n", | 3172 | dprintk("NFSD: stale stateid " STATEID_FMT "!\n", |
3079 | STATEID_VAL(stateid)); | 3173 | STATEID_VAL(stateid)); |
@@ -3096,7 +3190,7 @@ access_permit_write(unsigned long access_bmap) | |||
3096 | } | 3190 | } |
3097 | 3191 | ||
3098 | static | 3192 | static |
3099 | __be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags) | 3193 | __be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) |
3100 | { | 3194 | { |
3101 | __be32 status = nfserr_openmode; | 3195 | __be32 status = nfserr_openmode; |
3102 | 3196 | ||
@@ -3139,68 +3233,80 @@ grace_disallows_io(struct inode *inode) | |||
3139 | return locks_in_grace() && mandatory_lock(inode); | 3233 | return locks_in_grace() && mandatory_lock(inode); |
3140 | } | 3234 | } |
3141 | 3235 | ||
3142 | static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags) | 3236 | /* Returns true iff a is later than b: */ |
3237 | static bool stateid_generation_after(stateid_t *a, stateid_t *b) | ||
3238 | { | ||
3239 | return (s32)a->si_generation - (s32)b->si_generation > 0; | ||
3240 | } | ||
3241 | |||
3242 | static int check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) | ||
3143 | { | 3243 | { |
3144 | /* | 3244 | /* |
3145 | * When sessions are used the stateid generation number is ignored | 3245 | * When sessions are used the stateid generation number is ignored |
3146 | * when it is zero. | 3246 | * when it is zero. |
3147 | */ | 3247 | */ |
3148 | if ((flags & HAS_SESSION) && in->si_generation == 0) | 3248 | if (has_session && in->si_generation == 0) |
3149 | goto out; | 3249 | return nfs_ok; |
3250 | |||
3251 | if (in->si_generation == ref->si_generation) | ||
3252 | return nfs_ok; | ||
3150 | 3253 | ||
3151 | /* If the client sends us a stateid from the future, it's buggy: */ | 3254 | /* If the client sends us a stateid from the future, it's buggy: */ |
3152 | if (in->si_generation > ref->si_generation) | 3255 | if (stateid_generation_after(in, ref)) |
3153 | return nfserr_bad_stateid; | 3256 | return nfserr_bad_stateid; |
3154 | /* | 3257 | /* |
3155 | * The following, however, can happen. For example, if the | 3258 | * However, we could see a stateid from the past, even from a |
3156 | * client sends an open and some IO at the same time, the open | 3259 | * non-buggy client. For example, if the client sends a lock |
3157 | * may bump si_generation while the IO is still in flight. | 3260 | * while some IO is outstanding, the lock may bump si_generation |
3158 | * Thanks to hard links and renames, the client never knows what | 3261 | * while the IO is still in flight. The client could avoid that |
3159 | * file an open will affect. So it could avoid that situation | 3262 | * situation by waiting for responses on all the IO requests, |
3160 | * only by serializing all opens and IO from the same open | 3263 | * but better performance may result in retrying IO that |
3161 | * owner. To recover from the old_stateid error, the client | 3264 | * receives an old_stateid error if requests are rarely |
3162 | * will just have to retry the IO: | 3265 | * reordered in flight: |
3163 | */ | 3266 | */ |
3164 | if (in->si_generation < ref->si_generation) | 3267 | return nfserr_old_stateid; |
3165 | return nfserr_old_stateid; | ||
3166 | out: | ||
3167 | return nfs_ok; | ||
3168 | } | 3268 | } |
3169 | 3269 | ||
3170 | static int is_delegation_stateid(stateid_t *stateid) | 3270 | __be32 nfs4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) |
3171 | { | 3271 | { |
3172 | return stateid->si_fileid == 0; | 3272 | struct nfs4_stid *s; |
3173 | } | 3273 | struct nfs4_ol_stateid *ols; |
3274 | __be32 status; | ||
3174 | 3275 | ||
3175 | static int is_open_stateid(struct nfs4_stateid *stateid) | 3276 | if (STALE_STATEID(stateid)) |
3176 | { | 3277 | return nfserr_stale_stateid; |
3177 | return stateid->st_openstp == NULL; | 3278 | |
3279 | s = find_stateid(cl, stateid); | ||
3280 | if (!s) | ||
3281 | return nfserr_stale_stateid; | ||
3282 | status = check_stateid_generation(stateid, &s->sc_stateid, 1); | ||
3283 | if (status) | ||
3284 | return status; | ||
3285 | if (!(s->sc_type & (NFS4_OPEN_STID | NFS4_LOCK_STID))) | ||
3286 | return nfs_ok; | ||
3287 | ols = openlockstateid(s); | ||
3288 | if (ols->st_stateowner->so_is_open_owner | ||
3289 | && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) | ||
3290 | return nfserr_bad_stateid; | ||
3291 | return nfs_ok; | ||
3178 | } | 3292 | } |
3179 | 3293 | ||
3180 | __be32 nfs4_validate_stateid(stateid_t *stateid, int flags) | 3294 | static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s) |
3181 | { | 3295 | { |
3182 | struct nfs4_stateid *stp = NULL; | 3296 | struct nfs4_client *cl; |
3183 | __be32 status = nfserr_stale_stateid; | ||
3184 | 3297 | ||
3298 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) | ||
3299 | return nfserr_bad_stateid; | ||
3185 | if (STALE_STATEID(stateid)) | 3300 | if (STALE_STATEID(stateid)) |
3186 | goto out; | 3301 | return nfserr_stale_stateid; |
3187 | 3302 | cl = find_confirmed_client(&stateid->si_opaque.so_clid); | |
3188 | status = nfserr_expired; | 3303 | if (!cl) |
3189 | stp = search_for_stateid(stateid); | 3304 | return nfserr_expired; |
3190 | if (!stp) | 3305 | *s = find_stateid_by_type(cl, stateid, typemask); |
3191 | goto out; | 3306 | if (!*s) |
3192 | status = nfserr_bad_stateid; | 3307 | return nfserr_bad_stateid; |
3193 | 3308 | return nfs_ok; | |
3194 | if (!stp->st_stateowner->so_confirmed) | ||
3195 | goto out; | ||
3196 | |||
3197 | status = check_stateid_generation(stateid, &stp->st_stateid, flags); | ||
3198 | if (status) | ||
3199 | goto out; | ||
3200 | 3309 | ||
3201 | status = nfs_ok; | ||
3202 | out: | ||
3203 | return status; | ||
3204 | } | 3310 | } |
3205 | 3311 | ||
3206 | /* | 3312 | /* |
@@ -3210,7 +3316,8 @@ __be32 | |||
3210 | nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, | 3316 | nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, |
3211 | stateid_t *stateid, int flags, struct file **filpp) | 3317 | stateid_t *stateid, int flags, struct file **filpp) |
3212 | { | 3318 | { |
3213 | struct nfs4_stateid *stp = NULL; | 3319 | struct nfs4_stid *s; |
3320 | struct nfs4_ol_stateid *stp = NULL; | ||
3214 | struct nfs4_delegation *dp = NULL; | 3321 | struct nfs4_delegation *dp = NULL; |
3215 | struct svc_fh *current_fh = &cstate->current_fh; | 3322 | struct svc_fh *current_fh = &cstate->current_fh; |
3216 | struct inode *ino = current_fh->fh_dentry->d_inode; | 3323 | struct inode *ino = current_fh->fh_dentry->d_inode; |
@@ -3222,60 +3329,47 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, | |||
3222 | if (grace_disallows_io(ino)) | 3329 | if (grace_disallows_io(ino)) |
3223 | return nfserr_grace; | 3330 | return nfserr_grace; |
3224 | 3331 | ||
3225 | if (nfsd4_has_session(cstate)) | ||
3226 | flags |= HAS_SESSION; | ||
3227 | |||
3228 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) | 3332 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) |
3229 | return check_special_stateids(current_fh, stateid, flags); | 3333 | return check_special_stateids(current_fh, stateid, flags); |
3230 | 3334 | ||
3231 | status = nfserr_stale_stateid; | 3335 | status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s); |
3232 | if (STALE_STATEID(stateid)) | 3336 | if (status) |
3337 | return status; | ||
3338 | status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); | ||
3339 | if (status) | ||
3233 | goto out; | 3340 | goto out; |
3234 | 3341 | switch (s->sc_type) { | |
3235 | /* | 3342 | case NFS4_DELEG_STID: |
3236 | * We assume that any stateid that has the current boot time, | 3343 | dp = delegstateid(s); |
3237 | * but that we can't find, is expired: | ||
3238 | */ | ||
3239 | status = nfserr_expired; | ||
3240 | if (is_delegation_stateid(stateid)) { | ||
3241 | dp = find_delegation_stateid(ino, stateid); | ||
3242 | if (!dp) | ||
3243 | goto out; | ||
3244 | status = check_stateid_generation(stateid, &dp->dl_stateid, | ||
3245 | flags); | ||
3246 | if (status) | ||
3247 | goto out; | ||
3248 | status = nfs4_check_delegmode(dp, flags); | 3344 | status = nfs4_check_delegmode(dp, flags); |
3249 | if (status) | 3345 | if (status) |
3250 | goto out; | 3346 | goto out; |
3251 | renew_client(dp->dl_client); | ||
3252 | if (filpp) { | 3347 | if (filpp) { |
3253 | *filpp = dp->dl_file->fi_deleg_file; | 3348 | *filpp = dp->dl_file->fi_deleg_file; |
3254 | BUG_ON(!*filpp); | 3349 | BUG_ON(!*filpp); |
3255 | } | 3350 | } |
3256 | } else { /* open or lock stateid */ | 3351 | break; |
3257 | stp = find_stateid(stateid, flags); | 3352 | case NFS4_OPEN_STID: |
3258 | if (!stp) | 3353 | case NFS4_LOCK_STID: |
3259 | goto out; | 3354 | stp = openlockstateid(s); |
3260 | status = nfserr_bad_stateid; | 3355 | status = nfs4_check_fh(current_fh, stp); |
3261 | if (nfs4_check_fh(current_fh, stp)) | ||
3262 | goto out; | ||
3263 | if (!stp->st_stateowner->so_confirmed) | ||
3264 | goto out; | ||
3265 | status = check_stateid_generation(stateid, &stp->st_stateid, | ||
3266 | flags); | ||
3267 | if (status) | 3356 | if (status) |
3268 | goto out; | 3357 | goto out; |
3358 | if (stp->st_stateowner->so_is_open_owner | ||
3359 | && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) | ||
3360 | goto out; | ||
3269 | status = nfs4_check_openmode(stp, flags); | 3361 | status = nfs4_check_openmode(stp, flags); |
3270 | if (status) | 3362 | if (status) |
3271 | goto out; | 3363 | goto out; |
3272 | renew_client(stp->st_stateowner->so_client); | ||
3273 | if (filpp) { | 3364 | if (filpp) { |
3274 | if (flags & RD_STATE) | 3365 | if (flags & RD_STATE) |
3275 | *filpp = find_readable_file(stp->st_file); | 3366 | *filpp = find_readable_file(stp->st_file); |
3276 | else | 3367 | else |
3277 | *filpp = find_writeable_file(stp->st_file); | 3368 | *filpp = find_writeable_file(stp->st_file); |
3278 | } | 3369 | } |
3370 | break; | ||
3371 | default: | ||
3372 | return nfserr_bad_stateid; | ||
3279 | } | 3373 | } |
3280 | status = nfs_ok; | 3374 | status = nfs_ok; |
3281 | out: | 3375 | out: |
@@ -3283,18 +3377,9 @@ out: | |||
3283 | } | 3377 | } |
3284 | 3378 | ||
3285 | static __be32 | 3379 | static __be32 |
3286 | nfsd4_free_delegation_stateid(stateid_t *stateid) | 3380 | nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) |
3287 | { | 3381 | { |
3288 | struct nfs4_delegation *dp = search_for_delegation(stateid); | 3382 | if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner))) |
3289 | if (dp) | ||
3290 | return nfserr_locks_held; | ||
3291 | return nfserr_bad_stateid; | ||
3292 | } | ||
3293 | |||
3294 | static __be32 | ||
3295 | nfsd4_free_lock_stateid(struct nfs4_stateid *stp) | ||
3296 | { | ||
3297 | if (check_for_locks(stp->st_file, stp->st_stateowner)) | ||
3298 | return nfserr_locks_held; | 3383 | return nfserr_locks_held; |
3299 | release_lock_stateid(stp); | 3384 | release_lock_stateid(stp); |
3300 | return nfs_ok; | 3385 | return nfs_ok; |
@@ -3307,51 +3392,40 @@ __be32 | |||
3307 | nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 3392 | nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
3308 | struct nfsd4_test_stateid *test_stateid) | 3393 | struct nfsd4_test_stateid *test_stateid) |
3309 | { | 3394 | { |
3310 | test_stateid->ts_has_session = nfsd4_has_session(cstate); | 3395 | /* real work is done during encoding */ |
3311 | return nfs_ok; | 3396 | return nfs_ok; |
3312 | } | 3397 | } |
3313 | 3398 | ||
3314 | /* | ||
3315 | * Free a state id | ||
3316 | */ | ||
3317 | __be32 | 3399 | __be32 |
3318 | nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 3400 | nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
3319 | struct nfsd4_free_stateid *free_stateid) | 3401 | struct nfsd4_free_stateid *free_stateid) |
3320 | { | 3402 | { |
3321 | stateid_t *stateid = &free_stateid->fr_stateid; | 3403 | stateid_t *stateid = &free_stateid->fr_stateid; |
3322 | struct nfs4_stateid *stp; | 3404 | struct nfs4_stid *s; |
3323 | __be32 ret; | 3405 | struct nfs4_client *cl = cstate->session->se_client; |
3406 | __be32 ret = nfserr_bad_stateid; | ||
3324 | 3407 | ||
3325 | nfs4_lock_state(); | 3408 | nfs4_lock_state(); |
3326 | if (is_delegation_stateid(stateid)) { | 3409 | s = find_stateid(cl, stateid); |
3327 | ret = nfsd4_free_delegation_stateid(stateid); | 3410 | if (!s) |
3328 | goto out; | ||
3329 | } | ||
3330 | |||
3331 | stp = search_for_stateid(stateid); | ||
3332 | if (!stp) { | ||
3333 | ret = nfserr_bad_stateid; | ||
3334 | goto out; | 3411 | goto out; |
3335 | } | 3412 | switch (s->sc_type) { |
3336 | if (stateid->si_generation != 0) { | 3413 | case NFS4_DELEG_STID: |
3337 | if (stateid->si_generation < stp->st_stateid.si_generation) { | ||
3338 | ret = nfserr_old_stateid; | ||
3339 | goto out; | ||
3340 | } | ||
3341 | if (stateid->si_generation > stp->st_stateid.si_generation) { | ||
3342 | ret = nfserr_bad_stateid; | ||
3343 | goto out; | ||
3344 | } | ||
3345 | } | ||
3346 | |||
3347 | if (is_open_stateid(stp)) { | ||
3348 | ret = nfserr_locks_held; | 3414 | ret = nfserr_locks_held; |
3349 | goto out; | 3415 | goto out; |
3350 | } else { | 3416 | case NFS4_OPEN_STID: |
3351 | ret = nfsd4_free_lock_stateid(stp); | 3417 | case NFS4_LOCK_STID: |
3352 | goto out; | 3418 | ret = check_stateid_generation(stateid, &s->sc_stateid, 1); |
3419 | if (ret) | ||
3420 | goto out; | ||
3421 | if (s->sc_type == NFS4_LOCK_STID) | ||
3422 | ret = nfsd4_free_lock_stateid(openlockstateid(s)); | ||
3423 | else | ||
3424 | ret = nfserr_locks_held; | ||
3425 | break; | ||
3426 | default: | ||
3427 | ret = nfserr_bad_stateid; | ||
3353 | } | 3428 | } |
3354 | |||
3355 | out: | 3429 | out: |
3356 | nfs4_unlock_state(); | 3430 | nfs4_unlock_state(); |
3357 | return ret; | 3431 | return ret; |
@@ -3364,124 +3438,64 @@ setlkflg (int type) | |||
3364 | RD_STATE : WR_STATE; | 3438 | RD_STATE : WR_STATE; |
3365 | } | 3439 | } |
3366 | 3440 | ||
3441 | static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp) | ||
3442 | { | ||
3443 | struct svc_fh *current_fh = &cstate->current_fh; | ||
3444 | struct nfs4_stateowner *sop = stp->st_stateowner; | ||
3445 | __be32 status; | ||
3446 | |||
3447 | status = nfsd4_check_seqid(cstate, sop, seqid); | ||
3448 | if (status) | ||
3449 | return status; | ||
3450 | if (stp->st_stid.sc_type == NFS4_CLOSED_STID) | ||
3451 | /* | ||
3452 | * "Closed" stateid's exist *only* to return | ||
3453 | * nfserr_replay_me from the previous step. | ||
3454 | */ | ||
3455 | return nfserr_bad_stateid; | ||
3456 | status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); | ||
3457 | if (status) | ||
3458 | return status; | ||
3459 | return nfs4_check_fh(current_fh, stp); | ||
3460 | } | ||
3461 | |||
3367 | /* | 3462 | /* |
3368 | * Checks for sequence id mutating operations. | 3463 | * Checks for sequence id mutating operations. |
3369 | */ | 3464 | */ |
3370 | static __be32 | 3465 | static __be32 |
3371 | nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, | 3466 | nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, |
3372 | stateid_t *stateid, int flags, | 3467 | stateid_t *stateid, char typemask, |
3373 | struct nfs4_stateowner **sopp, | 3468 | struct nfs4_ol_stateid **stpp) |
3374 | struct nfs4_stateid **stpp, struct nfsd4_lock *lock) | ||
3375 | { | 3469 | { |
3376 | struct nfs4_stateid *stp; | ||
3377 | struct nfs4_stateowner *sop; | ||
3378 | struct svc_fh *current_fh = &cstate->current_fh; | ||
3379 | __be32 status; | 3470 | __be32 status; |
3471 | struct nfs4_stid *s; | ||
3380 | 3472 | ||
3381 | dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, | 3473 | dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, |
3382 | seqid, STATEID_VAL(stateid)); | 3474 | seqid, STATEID_VAL(stateid)); |
3383 | 3475 | ||
3384 | *stpp = NULL; | 3476 | *stpp = NULL; |
3385 | *sopp = NULL; | 3477 | status = nfsd4_lookup_stateid(stateid, typemask, &s); |
3386 | 3478 | if (status) | |
3387 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { | 3479 | return status; |
3388 | dprintk("NFSD: preprocess_seqid_op: magic stateid!\n"); | 3480 | *stpp = openlockstateid(s); |
3389 | return nfserr_bad_stateid; | 3481 | cstate->replay_owner = (*stpp)->st_stateowner; |
3390 | } | ||
3391 | |||
3392 | if (STALE_STATEID(stateid)) | ||
3393 | return nfserr_stale_stateid; | ||
3394 | |||
3395 | if (nfsd4_has_session(cstate)) | ||
3396 | flags |= HAS_SESSION; | ||
3397 | |||
3398 | /* | ||
3399 | * We return BAD_STATEID if filehandle doesn't match stateid, | ||
3400 | * the confirmed flag is incorrecly set, or the generation | ||
3401 | * number is incorrect. | ||
3402 | */ | ||
3403 | stp = find_stateid(stateid, flags); | ||
3404 | if (stp == NULL) { | ||
3405 | /* | ||
3406 | * Also, we should make sure this isn't just the result of | ||
3407 | * a replayed close: | ||
3408 | */ | ||
3409 | sop = search_close_lru(stateid->si_stateownerid, flags); | ||
3410 | /* It's not stale; let's assume it's expired: */ | ||
3411 | if (sop == NULL) | ||
3412 | return nfserr_expired; | ||
3413 | *sopp = sop; | ||
3414 | goto check_replay; | ||
3415 | } | ||
3416 | |||
3417 | *stpp = stp; | ||
3418 | *sopp = sop = stp->st_stateowner; | ||
3419 | |||
3420 | if (lock) { | ||
3421 | clientid_t *lockclid = &lock->v.new.clientid; | ||
3422 | struct nfs4_client *clp = sop->so_client; | ||
3423 | int lkflg = 0; | ||
3424 | __be32 status; | ||
3425 | |||
3426 | lkflg = setlkflg(lock->lk_type); | ||
3427 | |||
3428 | if (lock->lk_is_new) { | ||
3429 | if (!sop->so_is_open_owner) | ||
3430 | return nfserr_bad_stateid; | ||
3431 | if (!(flags & HAS_SESSION) && | ||
3432 | !same_clid(&clp->cl_clientid, lockclid)) | ||
3433 | return nfserr_bad_stateid; | ||
3434 | /* stp is the open stateid */ | ||
3435 | status = nfs4_check_openmode(stp, lkflg); | ||
3436 | if (status) | ||
3437 | return status; | ||
3438 | } else { | ||
3439 | /* stp is the lock stateid */ | ||
3440 | status = nfs4_check_openmode(stp->st_openstp, lkflg); | ||
3441 | if (status) | ||
3442 | return status; | ||
3443 | } | ||
3444 | } | ||
3445 | 3482 | ||
3446 | if (nfs4_check_fh(current_fh, stp)) { | 3483 | return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); |
3447 | dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); | 3484 | } |
3448 | return nfserr_bad_stateid; | ||
3449 | } | ||
3450 | 3485 | ||
3451 | /* | 3486 | static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp) |
3452 | * We now validate the seqid and stateid generation numbers. | 3487 | { |
3453 | * For the moment, we ignore the possibility of | 3488 | __be32 status; |
3454 | * generation number wraparound. | 3489 | struct nfs4_openowner *oo; |
3455 | */ | ||
3456 | if (!(flags & HAS_SESSION) && seqid != sop->so_seqid) | ||
3457 | goto check_replay; | ||
3458 | 3490 | ||
3459 | if (sop->so_confirmed && flags & CONFIRM) { | 3491 | status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, |
3460 | dprintk("NFSD: preprocess_seqid_op: expected" | 3492 | NFS4_OPEN_STID, stpp); |
3461 | " unconfirmed stateowner!\n"); | ||
3462 | return nfserr_bad_stateid; | ||
3463 | } | ||
3464 | if (!sop->so_confirmed && !(flags & CONFIRM)) { | ||
3465 | dprintk("NFSD: preprocess_seqid_op: stateowner not" | ||
3466 | " confirmed yet!\n"); | ||
3467 | return nfserr_bad_stateid; | ||
3468 | } | ||
3469 | status = check_stateid_generation(stateid, &stp->st_stateid, flags); | ||
3470 | if (status) | 3493 | if (status) |
3471 | return status; | 3494 | return status; |
3472 | renew_client(sop->so_client); | 3495 | oo = openowner((*stpp)->st_stateowner); |
3496 | if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) | ||
3497 | return nfserr_bad_stateid; | ||
3473 | return nfs_ok; | 3498 | return nfs_ok; |
3474 | |||
3475 | check_replay: | ||
3476 | if (seqid == sop->so_seqid - 1) { | ||
3477 | dprintk("NFSD: preprocess_seqid_op: retransmission?\n"); | ||
3478 | /* indicate replay to calling function */ | ||
3479 | return nfserr_replay_me; | ||
3480 | } | ||
3481 | dprintk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n", | ||
3482 | sop->so_seqid, seqid); | ||
3483 | *sopp = NULL; | ||
3484 | return nfserr_bad_seqid; | ||
3485 | } | 3499 | } |
3486 | 3500 | ||
3487 | __be32 | 3501 | __be32 |
@@ -3489,8 +3503,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3489 | struct nfsd4_open_confirm *oc) | 3503 | struct nfsd4_open_confirm *oc) |
3490 | { | 3504 | { |
3491 | __be32 status; | 3505 | __be32 status; |
3492 | struct nfs4_stateowner *sop; | 3506 | struct nfs4_openowner *oo; |
3493 | struct nfs4_stateid *stp; | 3507 | struct nfs4_ol_stateid *stp; |
3494 | 3508 | ||
3495 | dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", | 3509 | dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", |
3496 | (int)cstate->current_fh.fh_dentry->d_name.len, | 3510 | (int)cstate->current_fh.fh_dentry->d_name.len, |
@@ -3502,38 +3516,52 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3502 | 3516 | ||
3503 | nfs4_lock_state(); | 3517 | nfs4_lock_state(); |
3504 | 3518 | ||
3505 | if ((status = nfs4_preprocess_seqid_op(cstate, | 3519 | status = nfs4_preprocess_seqid_op(cstate, |
3506 | oc->oc_seqid, &oc->oc_req_stateid, | 3520 | oc->oc_seqid, &oc->oc_req_stateid, |
3507 | CONFIRM | OPEN_STATE, | 3521 | NFS4_OPEN_STID, &stp); |
3508 | &oc->oc_stateowner, &stp, NULL))) | 3522 | if (status) |
3509 | goto out; | 3523 | goto out; |
3510 | 3524 | oo = openowner(stp->st_stateowner); | |
3511 | sop = oc->oc_stateowner; | 3525 | status = nfserr_bad_stateid; |
3512 | sop->so_confirmed = 1; | 3526 | if (oo->oo_flags & NFS4_OO_CONFIRMED) |
3513 | update_stateid(&stp->st_stateid); | 3527 | goto out; |
3514 | memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); | 3528 | oo->oo_flags |= NFS4_OO_CONFIRMED; |
3529 | update_stateid(&stp->st_stid.sc_stateid); | ||
3530 | memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); | ||
3515 | dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", | 3531 | dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", |
3516 | __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid)); | 3532 | __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); |
3517 | 3533 | ||
3518 | nfsd4_create_clid_dir(sop->so_client); | 3534 | nfsd4_create_clid_dir(oo->oo_owner.so_client); |
3535 | status = nfs_ok; | ||
3519 | out: | 3536 | out: |
3520 | if (oc->oc_stateowner) { | 3537 | if (!cstate->replay_owner) |
3521 | nfs4_get_stateowner(oc->oc_stateowner); | 3538 | nfs4_unlock_state(); |
3522 | cstate->replay_owner = oc->oc_stateowner; | ||
3523 | } | ||
3524 | nfs4_unlock_state(); | ||
3525 | return status; | 3539 | return status; |
3526 | } | 3540 | } |
3527 | 3541 | ||
3528 | static inline void nfs4_file_downgrade(struct nfs4_stateid *stp, unsigned int to_access) | 3542 | static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) |
3529 | { | 3543 | { |
3530 | int i; | 3544 | if (!test_bit(access, &stp->st_access_bmap)) |
3545 | return; | ||
3546 | nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); | ||
3547 | __clear_bit(access, &stp->st_access_bmap); | ||
3548 | } | ||
3531 | 3549 | ||
3532 | for (i = 1; i < 4; i++) { | 3550 | static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) |
3533 | if (test_bit(i, &stp->st_access_bmap) && !(i & to_access)) { | 3551 | { |
3534 | nfs4_file_put_access(stp->st_file, i); | 3552 | switch (to_access) { |
3535 | __clear_bit(i, &stp->st_access_bmap); | 3553 | case NFS4_SHARE_ACCESS_READ: |
3536 | } | 3554 | nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE); |
3555 | nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); | ||
3556 | break; | ||
3557 | case NFS4_SHARE_ACCESS_WRITE: | ||
3558 | nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ); | ||
3559 | nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); | ||
3560 | break; | ||
3561 | case NFS4_SHARE_ACCESS_BOTH: | ||
3562 | break; | ||
3563 | default: | ||
3564 | BUG(); | ||
3537 | } | 3565 | } |
3538 | } | 3566 | } |
3539 | 3567 | ||
@@ -3553,24 +3581,20 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, | |||
3553 | struct nfsd4_open_downgrade *od) | 3581 | struct nfsd4_open_downgrade *od) |
3554 | { | 3582 | { |
3555 | __be32 status; | 3583 | __be32 status; |
3556 | struct nfs4_stateid *stp; | 3584 | struct nfs4_ol_stateid *stp; |
3557 | 3585 | ||
3558 | dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", | 3586 | dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", |
3559 | (int)cstate->current_fh.fh_dentry->d_name.len, | 3587 | (int)cstate->current_fh.fh_dentry->d_name.len, |
3560 | cstate->current_fh.fh_dentry->d_name.name); | 3588 | cstate->current_fh.fh_dentry->d_name.name); |
3561 | 3589 | ||
3562 | if (!access_valid(od->od_share_access, cstate->minorversion) | 3590 | /* We don't yet support WANT bits: */ |
3563 | || !deny_valid(od->od_share_deny)) | 3591 | od->od_share_access &= NFS4_SHARE_ACCESS_MASK; |
3564 | return nfserr_inval; | ||
3565 | 3592 | ||
3566 | nfs4_lock_state(); | 3593 | nfs4_lock_state(); |
3567 | if ((status = nfs4_preprocess_seqid_op(cstate, | 3594 | status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, |
3568 | od->od_seqid, | 3595 | &od->od_stateid, &stp); |
3569 | &od->od_stateid, | 3596 | if (status) |
3570 | OPEN_STATE, | ||
3571 | &od->od_stateowner, &stp, NULL))) | ||
3572 | goto out; | 3597 | goto out; |
3573 | |||
3574 | status = nfserr_inval; | 3598 | status = nfserr_inval; |
3575 | if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { | 3599 | if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { |
3576 | dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", | 3600 | dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", |
@@ -3582,22 +3606,45 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, | |||
3582 | stp->st_deny_bmap, od->od_share_deny); | 3606 | stp->st_deny_bmap, od->od_share_deny); |
3583 | goto out; | 3607 | goto out; |
3584 | } | 3608 | } |
3585 | nfs4_file_downgrade(stp, od->od_share_access); | 3609 | nfs4_stateid_downgrade(stp, od->od_share_access); |
3586 | 3610 | ||
3587 | reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); | 3611 | reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); |
3588 | 3612 | ||
3589 | update_stateid(&stp->st_stateid); | 3613 | update_stateid(&stp->st_stid.sc_stateid); |
3590 | memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t)); | 3614 | memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); |
3591 | status = nfs_ok; | 3615 | status = nfs_ok; |
3592 | out: | 3616 | out: |
3593 | if (od->od_stateowner) { | 3617 | if (!cstate->replay_owner) |
3594 | nfs4_get_stateowner(od->od_stateowner); | 3618 | nfs4_unlock_state(); |
3595 | cstate->replay_owner = od->od_stateowner; | ||
3596 | } | ||
3597 | nfs4_unlock_state(); | ||
3598 | return status; | 3619 | return status; |
3599 | } | 3620 | } |
3600 | 3621 | ||
3622 | void nfsd4_purge_closed_stateid(struct nfs4_stateowner *so) | ||
3623 | { | ||
3624 | struct nfs4_openowner *oo; | ||
3625 | struct nfs4_ol_stateid *s; | ||
3626 | |||
3627 | if (!so->so_is_open_owner) | ||
3628 | return; | ||
3629 | oo = openowner(so); | ||
3630 | s = oo->oo_last_closed_stid; | ||
3631 | if (!s) | ||
3632 | return; | ||
3633 | if (!(oo->oo_flags & NFS4_OO_PURGE_CLOSE)) { | ||
3634 | /* Release the last_closed_stid on the next seqid bump: */ | ||
3635 | oo->oo_flags |= NFS4_OO_PURGE_CLOSE; | ||
3636 | return; | ||
3637 | } | ||
3638 | oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE; | ||
3639 | release_last_closed_stateid(oo); | ||
3640 | } | ||
3641 | |||
3642 | static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) | ||
3643 | { | ||
3644 | unhash_open_stateid(s); | ||
3645 | s->st_stid.sc_type = NFS4_CLOSED_STID; | ||
3646 | } | ||
3647 | |||
3601 | /* | 3648 | /* |
3602 | * nfs4_unlock_state() called after encode | 3649 | * nfs4_unlock_state() called after encode |
3603 | */ | 3650 | */ |
@@ -3606,39 +3653,37 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3606 | struct nfsd4_close *close) | 3653 | struct nfsd4_close *close) |
3607 | { | 3654 | { |
3608 | __be32 status; | 3655 | __be32 status; |
3609 | struct nfs4_stateid *stp; | 3656 | struct nfs4_openowner *oo; |
3657 | struct nfs4_ol_stateid *stp; | ||
3610 | 3658 | ||
3611 | dprintk("NFSD: nfsd4_close on file %.*s\n", | 3659 | dprintk("NFSD: nfsd4_close on file %.*s\n", |
3612 | (int)cstate->current_fh.fh_dentry->d_name.len, | 3660 | (int)cstate->current_fh.fh_dentry->d_name.len, |
3613 | cstate->current_fh.fh_dentry->d_name.name); | 3661 | cstate->current_fh.fh_dentry->d_name.name); |
3614 | 3662 | ||
3615 | nfs4_lock_state(); | 3663 | nfs4_lock_state(); |
3616 | /* check close_lru for replay */ | 3664 | status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, |
3617 | if ((status = nfs4_preprocess_seqid_op(cstate, | 3665 | &close->cl_stateid, |
3618 | close->cl_seqid, | 3666 | NFS4_OPEN_STID|NFS4_CLOSED_STID, |
3619 | &close->cl_stateid, | 3667 | &stp); |
3620 | OPEN_STATE | CLOSE_STATE, | 3668 | if (status) |
3621 | &close->cl_stateowner, &stp, NULL))) | ||
3622 | goto out; | 3669 | goto out; |
3670 | oo = openowner(stp->st_stateowner); | ||
3623 | status = nfs_ok; | 3671 | status = nfs_ok; |
3624 | update_stateid(&stp->st_stateid); | 3672 | update_stateid(&stp->st_stid.sc_stateid); |
3625 | memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); | 3673 | memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); |
3626 | 3674 | ||
3627 | /* release_stateid() calls nfsd_close() if needed */ | 3675 | nfsd4_close_open_stateid(stp); |
3628 | release_open_stateid(stp); | 3676 | oo->oo_last_closed_stid = stp; |
3629 | 3677 | ||
3630 | /* place unused nfs4_stateowners on so_close_lru list to be | 3678 | /* place unused nfs4_stateowners on so_close_lru list to be |
3631 | * released by the laundromat service after the lease period | 3679 | * released by the laundromat service after the lease period |
3632 | * to enable us to handle CLOSE replay | 3680 | * to enable us to handle CLOSE replay |
3633 | */ | 3681 | */ |
3634 | if (list_empty(&close->cl_stateowner->so_stateids)) | 3682 | if (list_empty(&oo->oo_owner.so_stateids)) |
3635 | move_to_close_lru(close->cl_stateowner); | 3683 | move_to_close_lru(oo); |
3636 | out: | 3684 | out: |
3637 | if (close->cl_stateowner) { | 3685 | if (!cstate->replay_owner) |
3638 | nfs4_get_stateowner(close->cl_stateowner); | 3686 | nfs4_unlock_state(); |
3639 | cstate->replay_owner = close->cl_stateowner; | ||
3640 | } | ||
3641 | nfs4_unlock_state(); | ||
3642 | return status; | 3687 | return status; |
3643 | } | 3688 | } |
3644 | 3689 | ||
@@ -3648,34 +3693,22 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3648 | { | 3693 | { |
3649 | struct nfs4_delegation *dp; | 3694 | struct nfs4_delegation *dp; |
3650 | stateid_t *stateid = &dr->dr_stateid; | 3695 | stateid_t *stateid = &dr->dr_stateid; |
3696 | struct nfs4_stid *s; | ||
3651 | struct inode *inode; | 3697 | struct inode *inode; |
3652 | __be32 status; | 3698 | __be32 status; |
3653 | int flags = 0; | ||
3654 | 3699 | ||
3655 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) | 3700 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) |
3656 | return status; | 3701 | return status; |
3657 | inode = cstate->current_fh.fh_dentry->d_inode; | 3702 | inode = cstate->current_fh.fh_dentry->d_inode; |
3658 | 3703 | ||
3659 | if (nfsd4_has_session(cstate)) | ||
3660 | flags |= HAS_SESSION; | ||
3661 | nfs4_lock_state(); | 3704 | nfs4_lock_state(); |
3662 | status = nfserr_bad_stateid; | 3705 | status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s); |
3663 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) | 3706 | if (status) |
3664 | goto out; | ||
3665 | status = nfserr_stale_stateid; | ||
3666 | if (STALE_STATEID(stateid)) | ||
3667 | goto out; | ||
3668 | status = nfserr_bad_stateid; | ||
3669 | if (!is_delegation_stateid(stateid)) | ||
3670 | goto out; | ||
3671 | status = nfserr_expired; | ||
3672 | dp = find_delegation_stateid(inode, stateid); | ||
3673 | if (!dp) | ||
3674 | goto out; | 3707 | goto out; |
3675 | status = check_stateid_generation(stateid, &dp->dl_stateid, flags); | 3708 | dp = delegstateid(s); |
3709 | status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); | ||
3676 | if (status) | 3710 | if (status) |
3677 | goto out; | 3711 | goto out; |
3678 | renew_client(dp->dl_client); | ||
3679 | 3712 | ||
3680 | unhash_delegation(dp); | 3713 | unhash_delegation(dp); |
3681 | out: | 3714 | out: |
@@ -3713,9 +3746,6 @@ last_byte_offset(u64 start, u64 len) | |||
3713 | return end > start ? end - 1: NFS4_MAX_UINT64; | 3746 | return end > start ? end - 1: NFS4_MAX_UINT64; |
3714 | } | 3747 | } |
3715 | 3748 | ||
3716 | #define lockownerid_hashval(id) \ | ||
3717 | ((id) & LOCK_HASH_MASK) | ||
3718 | |||
3719 | static inline unsigned int | 3749 | static inline unsigned int |
3720 | lock_ownerstr_hashval(struct inode *inode, u32 cl_id, | 3750 | lock_ownerstr_hashval(struct inode *inode, u32 cl_id, |
3721 | struct xdr_netobj *ownername) | 3751 | struct xdr_netobj *ownername) |
@@ -3725,101 +3755,7 @@ lock_ownerstr_hashval(struct inode *inode, u32 cl_id, | |||
3725 | & LOCK_HASH_MASK; | 3755 | & LOCK_HASH_MASK; |
3726 | } | 3756 | } |
3727 | 3757 | ||
3728 | static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE]; | ||
3729 | static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; | 3758 | static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; |
3730 | static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE]; | ||
3731 | |||
3732 | static int | ||
3733 | same_stateid(stateid_t *id_one, stateid_t *id_two) | ||
3734 | { | ||
3735 | if (id_one->si_stateownerid != id_two->si_stateownerid) | ||
3736 | return 0; | ||
3737 | return id_one->si_fileid == id_two->si_fileid; | ||
3738 | } | ||
3739 | |||
3740 | static struct nfs4_stateid * | ||
3741 | find_stateid(stateid_t *stid, int flags) | ||
3742 | { | ||
3743 | struct nfs4_stateid *local; | ||
3744 | u32 st_id = stid->si_stateownerid; | ||
3745 | u32 f_id = stid->si_fileid; | ||
3746 | unsigned int hashval; | ||
3747 | |||
3748 | dprintk("NFSD: find_stateid flags 0x%x\n",flags); | ||
3749 | if (flags & (LOCK_STATE | RD_STATE | WR_STATE)) { | ||
3750 | hashval = stateid_hashval(st_id, f_id); | ||
3751 | list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) { | ||
3752 | if ((local->st_stateid.si_stateownerid == st_id) && | ||
3753 | (local->st_stateid.si_fileid == f_id)) | ||
3754 | return local; | ||
3755 | } | ||
3756 | } | ||
3757 | |||
3758 | if (flags & (OPEN_STATE | RD_STATE | WR_STATE)) { | ||
3759 | hashval = stateid_hashval(st_id, f_id); | ||
3760 | list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) { | ||
3761 | if ((local->st_stateid.si_stateownerid == st_id) && | ||
3762 | (local->st_stateid.si_fileid == f_id)) | ||
3763 | return local; | ||
3764 | } | ||
3765 | } | ||
3766 | return NULL; | ||
3767 | } | ||
3768 | |||
3769 | static struct nfs4_stateid * | ||
3770 | search_for_stateid(stateid_t *stid) | ||
3771 | { | ||
3772 | struct nfs4_stateid *local; | ||
3773 | unsigned int hashval = stateid_hashval(stid->si_stateownerid, stid->si_fileid); | ||
3774 | |||
3775 | list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) { | ||
3776 | if (same_stateid(&local->st_stateid, stid)) | ||
3777 | return local; | ||
3778 | } | ||
3779 | |||
3780 | list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) { | ||
3781 | if (same_stateid(&local->st_stateid, stid)) | ||
3782 | return local; | ||
3783 | } | ||
3784 | return NULL; | ||
3785 | } | ||
3786 | |||
3787 | static struct nfs4_delegation * | ||
3788 | search_for_delegation(stateid_t *stid) | ||
3789 | { | ||
3790 | struct nfs4_file *fp; | ||
3791 | struct nfs4_delegation *dp; | ||
3792 | struct list_head *pos; | ||
3793 | int i; | ||
3794 | |||
3795 | for (i = 0; i < FILE_HASH_SIZE; i++) { | ||
3796 | list_for_each_entry(fp, &file_hashtbl[i], fi_hash) { | ||
3797 | list_for_each(pos, &fp->fi_delegations) { | ||
3798 | dp = list_entry(pos, struct nfs4_delegation, dl_perfile); | ||
3799 | if (same_stateid(&dp->dl_stateid, stid)) | ||
3800 | return dp; | ||
3801 | } | ||
3802 | } | ||
3803 | } | ||
3804 | return NULL; | ||
3805 | } | ||
3806 | |||
3807 | static struct nfs4_delegation * | ||
3808 | find_delegation_stateid(struct inode *ino, stateid_t *stid) | ||
3809 | { | ||
3810 | struct nfs4_file *fp; | ||
3811 | struct nfs4_delegation *dl; | ||
3812 | |||
3813 | dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__, | ||
3814 | STATEID_VAL(stid)); | ||
3815 | |||
3816 | fp = find_file(ino); | ||
3817 | if (!fp) | ||
3818 | return NULL; | ||
3819 | dl = find_delegation_file(fp, stid); | ||
3820 | put_nfs4_file(fp); | ||
3821 | return dl; | ||
3822 | } | ||
3823 | 3759 | ||
3824 | /* | 3760 | /* |
3825 | * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that | 3761 | * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that |
@@ -3846,15 +3782,21 @@ static const struct lock_manager_operations nfsd_posix_mng_ops = { | |||
3846 | static inline void | 3782 | static inline void |
3847 | nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) | 3783 | nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) |
3848 | { | 3784 | { |
3849 | struct nfs4_stateowner *sop; | 3785 | struct nfs4_lockowner *lo; |
3850 | 3786 | ||
3851 | if (fl->fl_lmops == &nfsd_posix_mng_ops) { | 3787 | if (fl->fl_lmops == &nfsd_posix_mng_ops) { |
3852 | sop = (struct nfs4_stateowner *) fl->fl_owner; | 3788 | lo = (struct nfs4_lockowner *) fl->fl_owner; |
3853 | kref_get(&sop->so_ref); | 3789 | deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data, |
3854 | deny->ld_sop = sop; | 3790 | lo->lo_owner.so_owner.len, GFP_KERNEL); |
3855 | deny->ld_clientid = sop->so_client->cl_clientid; | 3791 | if (!deny->ld_owner.data) |
3792 | /* We just don't care that much */ | ||
3793 | goto nevermind; | ||
3794 | deny->ld_owner.len = lo->lo_owner.so_owner.len; | ||
3795 | deny->ld_clientid = lo->lo_owner.so_client->cl_clientid; | ||
3856 | } else { | 3796 | } else { |
3857 | deny->ld_sop = NULL; | 3797 | nevermind: |
3798 | deny->ld_owner.len = 0; | ||
3799 | deny->ld_owner.data = NULL; | ||
3858 | deny->ld_clientid.cl_boot = 0; | 3800 | deny->ld_clientid.cl_boot = 0; |
3859 | deny->ld_clientid.cl_id = 0; | 3801 | deny->ld_clientid.cl_id = 0; |
3860 | } | 3802 | } |
@@ -3867,8 +3809,8 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) | |||
3867 | deny->ld_type = NFS4_WRITE_LT; | 3809 | deny->ld_type = NFS4_WRITE_LT; |
3868 | } | 3810 | } |
3869 | 3811 | ||
3870 | static struct nfs4_stateowner * | 3812 | static struct nfs4_lockowner * |
3871 | find_lockstateowner_str(struct inode *inode, clientid_t *clid, | 3813 | find_lockowner_str(struct inode *inode, clientid_t *clid, |
3872 | struct xdr_netobj *owner) | 3814 | struct xdr_netobj *owner) |
3873 | { | 3815 | { |
3874 | unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); | 3816 | unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); |
@@ -3876,11 +3818,17 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid, | |||
3876 | 3818 | ||
3877 | list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { | 3819 | list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { |
3878 | if (same_owner_str(op, owner, clid)) | 3820 | if (same_owner_str(op, owner, clid)) |
3879 | return op; | 3821 | return lockowner(op); |
3880 | } | 3822 | } |
3881 | return NULL; | 3823 | return NULL; |
3882 | } | 3824 | } |
3883 | 3825 | ||
3826 | static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) | ||
3827 | { | ||
3828 | list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]); | ||
3829 | list_add(&lo->lo_perstateid, &open_stp->st_lockowners); | ||
3830 | } | ||
3831 | |||
3884 | /* | 3832 | /* |
3885 | * Alloc a lock owner structure. | 3833 | * Alloc a lock owner structure. |
3886 | * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has | 3834 | * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has |
@@ -3889,67 +3837,40 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid, | |||
3889 | * strhashval = lock_ownerstr_hashval | 3837 | * strhashval = lock_ownerstr_hashval |
3890 | */ | 3838 | */ |
3891 | 3839 | ||
3892 | static struct nfs4_stateowner * | 3840 | static struct nfs4_lockowner * |
3893 | alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) { | 3841 | alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { |
3894 | struct nfs4_stateowner *sop; | 3842 | struct nfs4_lockowner *lo; |
3895 | struct nfs4_replay *rp; | ||
3896 | unsigned int idhashval; | ||
3897 | 3843 | ||
3898 | if (!(sop = alloc_stateowner(&lock->lk_new_owner))) | 3844 | lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); |
3845 | if (!lo) | ||
3899 | return NULL; | 3846 | return NULL; |
3900 | idhashval = lockownerid_hashval(current_ownerid); | 3847 | INIT_LIST_HEAD(&lo->lo_owner.so_stateids); |
3901 | INIT_LIST_HEAD(&sop->so_idhash); | 3848 | lo->lo_owner.so_is_open_owner = 0; |
3902 | INIT_LIST_HEAD(&sop->so_strhash); | ||
3903 | INIT_LIST_HEAD(&sop->so_perclient); | ||
3904 | INIT_LIST_HEAD(&sop->so_stateids); | ||
3905 | INIT_LIST_HEAD(&sop->so_perstateid); | ||
3906 | INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ | ||
3907 | sop->so_time = 0; | ||
3908 | list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); | ||
3909 | list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); | ||
3910 | list_add(&sop->so_perstateid, &open_stp->st_lockowners); | ||
3911 | sop->so_is_open_owner = 0; | ||
3912 | sop->so_id = current_ownerid++; | ||
3913 | sop->so_client = clp; | ||
3914 | /* It is the openowner seqid that will be incremented in encode in the | 3849 | /* It is the openowner seqid that will be incremented in encode in the |
3915 | * case of new lockowners; so increment the lock seqid manually: */ | 3850 | * case of new lockowners; so increment the lock seqid manually: */ |
3916 | sop->so_seqid = lock->lk_new_lock_seqid + 1; | 3851 | lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; |
3917 | sop->so_confirmed = 1; | 3852 | hash_lockowner(lo, strhashval, clp, open_stp); |
3918 | rp = &sop->so_replay; | 3853 | return lo; |
3919 | rp->rp_status = nfserr_serverfault; | ||
3920 | rp->rp_buflen = 0; | ||
3921 | rp->rp_buf = rp->rp_ibuf; | ||
3922 | return sop; | ||
3923 | } | 3854 | } |
3924 | 3855 | ||
3925 | static struct nfs4_stateid * | 3856 | static struct nfs4_ol_stateid * |
3926 | alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp) | 3857 | alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) |
3927 | { | 3858 | { |
3928 | struct nfs4_stateid *stp; | 3859 | struct nfs4_ol_stateid *stp; |
3929 | unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); | 3860 | struct nfs4_client *clp = lo->lo_owner.so_client; |
3930 | 3861 | ||
3931 | stp = nfs4_alloc_stateid(); | 3862 | stp = nfs4_alloc_stateid(clp); |
3932 | if (stp == NULL) | 3863 | if (stp == NULL) |
3933 | goto out; | 3864 | return NULL; |
3934 | INIT_LIST_HEAD(&stp->st_hash); | 3865 | init_stid(&stp->st_stid, clp, NFS4_LOCK_STID); |
3935 | INIT_LIST_HEAD(&stp->st_perfile); | ||
3936 | INIT_LIST_HEAD(&stp->st_perstateowner); | ||
3937 | INIT_LIST_HEAD(&stp->st_lockowners); /* not used */ | ||
3938 | list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]); | ||
3939 | list_add(&stp->st_perfile, &fp->fi_stateids); | 3866 | list_add(&stp->st_perfile, &fp->fi_stateids); |
3940 | list_add(&stp->st_perstateowner, &sop->so_stateids); | 3867 | list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); |
3941 | stp->st_stateowner = sop; | 3868 | stp->st_stateowner = &lo->lo_owner; |
3942 | get_nfs4_file(fp); | 3869 | get_nfs4_file(fp); |
3943 | stp->st_file = fp; | 3870 | stp->st_file = fp; |
3944 | stp->st_stateid.si_boot = boot_time; | ||
3945 | stp->st_stateid.si_stateownerid = sop->so_id; | ||
3946 | stp->st_stateid.si_fileid = fp->fi_id; | ||
3947 | stp->st_stateid.si_generation = 0; | ||
3948 | stp->st_access_bmap = 0; | 3871 | stp->st_access_bmap = 0; |
3949 | stp->st_deny_bmap = open_stp->st_deny_bmap; | 3872 | stp->st_deny_bmap = open_stp->st_deny_bmap; |
3950 | stp->st_openstp = open_stp; | 3873 | stp->st_openstp = open_stp; |
3951 | |||
3952 | out: | ||
3953 | return stp; | 3874 | return stp; |
3954 | } | 3875 | } |
3955 | 3876 | ||
@@ -3960,7 +3881,7 @@ check_lock_length(u64 offset, u64 length) | |||
3960 | LOFF_OVERFLOW(offset, length))); | 3881 | LOFF_OVERFLOW(offset, length))); |
3961 | } | 3882 | } |
3962 | 3883 | ||
3963 | static void get_lock_access(struct nfs4_stateid *lock_stp, u32 access) | 3884 | static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) |
3964 | { | 3885 | { |
3965 | struct nfs4_file *fp = lock_stp->st_file; | 3886 | struct nfs4_file *fp = lock_stp->st_file; |
3966 | int oflag = nfs4_access_to_omode(access); | 3887 | int oflag = nfs4_access_to_omode(access); |
@@ -3978,15 +3899,16 @@ __be32 | |||
3978 | nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 3899 | nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
3979 | struct nfsd4_lock *lock) | 3900 | struct nfsd4_lock *lock) |
3980 | { | 3901 | { |
3981 | struct nfs4_stateowner *open_sop = NULL; | 3902 | struct nfs4_openowner *open_sop = NULL; |
3982 | struct nfs4_stateowner *lock_sop = NULL; | 3903 | struct nfs4_lockowner *lock_sop = NULL; |
3983 | struct nfs4_stateid *lock_stp; | 3904 | struct nfs4_ol_stateid *lock_stp; |
3984 | struct nfs4_file *fp; | 3905 | struct nfs4_file *fp; |
3985 | struct file *filp = NULL; | 3906 | struct file *filp = NULL; |
3986 | struct file_lock file_lock; | 3907 | struct file_lock file_lock; |
3987 | struct file_lock conflock; | 3908 | struct file_lock conflock; |
3988 | __be32 status = 0; | 3909 | __be32 status = 0; |
3989 | unsigned int strhashval; | 3910 | unsigned int strhashval; |
3911 | int lkflg; | ||
3990 | int err; | 3912 | int err; |
3991 | 3913 | ||
3992 | dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", | 3914 | dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", |
@@ -4010,7 +3932,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4010 | * Use open owner and open stateid to create lock owner and | 3932 | * Use open owner and open stateid to create lock owner and |
4011 | * lock stateid. | 3933 | * lock stateid. |
4012 | */ | 3934 | */ |
4013 | struct nfs4_stateid *open_stp = NULL; | 3935 | struct nfs4_ol_stateid *open_stp = NULL; |
4014 | 3936 | ||
4015 | status = nfserr_stale_clientid; | 3937 | status = nfserr_stale_clientid; |
4016 | if (!nfsd4_has_session(cstate) && | 3938 | if (!nfsd4_has_session(cstate) && |
@@ -4018,26 +3940,29 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4018 | goto out; | 3940 | goto out; |
4019 | 3941 | ||
4020 | /* validate and update open stateid and open seqid */ | 3942 | /* validate and update open stateid and open seqid */ |
4021 | status = nfs4_preprocess_seqid_op(cstate, | 3943 | status = nfs4_preprocess_confirmed_seqid_op(cstate, |
4022 | lock->lk_new_open_seqid, | 3944 | lock->lk_new_open_seqid, |
4023 | &lock->lk_new_open_stateid, | 3945 | &lock->lk_new_open_stateid, |
4024 | OPEN_STATE, | 3946 | &open_stp); |
4025 | &lock->lk_replay_owner, &open_stp, | ||
4026 | lock); | ||
4027 | if (status) | 3947 | if (status) |
4028 | goto out; | 3948 | goto out; |
4029 | open_sop = lock->lk_replay_owner; | 3949 | open_sop = openowner(open_stp->st_stateowner); |
3950 | status = nfserr_bad_stateid; | ||
3951 | if (!nfsd4_has_session(cstate) && | ||
3952 | !same_clid(&open_sop->oo_owner.so_client->cl_clientid, | ||
3953 | &lock->v.new.clientid)) | ||
3954 | goto out; | ||
4030 | /* create lockowner and lock stateid */ | 3955 | /* create lockowner and lock stateid */ |
4031 | fp = open_stp->st_file; | 3956 | fp = open_stp->st_file; |
4032 | strhashval = lock_ownerstr_hashval(fp->fi_inode, | 3957 | strhashval = lock_ownerstr_hashval(fp->fi_inode, |
4033 | open_sop->so_client->cl_clientid.cl_id, | 3958 | open_sop->oo_owner.so_client->cl_clientid.cl_id, |
4034 | &lock->v.new.owner); | 3959 | &lock->v.new.owner); |
4035 | /* XXX: Do we need to check for duplicate stateowners on | 3960 | /* XXX: Do we need to check for duplicate stateowners on |
4036 | * the same file, or should they just be allowed (and | 3961 | * the same file, or should they just be allowed (and |
4037 | * create new stateids)? */ | 3962 | * create new stateids)? */ |
4038 | status = nfserr_resource; | 3963 | status = nfserr_jukebox; |
4039 | lock_sop = alloc_init_lock_stateowner(strhashval, | 3964 | lock_sop = alloc_init_lock_stateowner(strhashval, |
4040 | open_sop->so_client, open_stp, lock); | 3965 | open_sop->oo_owner.so_client, open_stp, lock); |
4041 | if (lock_sop == NULL) | 3966 | if (lock_sop == NULL) |
4042 | goto out; | 3967 | goto out; |
4043 | lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); | 3968 | lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); |
@@ -4046,16 +3971,20 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4046 | } else { | 3971 | } else { |
4047 | /* lock (lock owner + lock stateid) already exists */ | 3972 | /* lock (lock owner + lock stateid) already exists */ |
4048 | status = nfs4_preprocess_seqid_op(cstate, | 3973 | status = nfs4_preprocess_seqid_op(cstate, |
4049 | lock->lk_old_lock_seqid, | 3974 | lock->lk_old_lock_seqid, |
4050 | &lock->lk_old_lock_stateid, | 3975 | &lock->lk_old_lock_stateid, |
4051 | LOCK_STATE, | 3976 | NFS4_LOCK_STID, &lock_stp); |
4052 | &lock->lk_replay_owner, &lock_stp, lock); | ||
4053 | if (status) | 3977 | if (status) |
4054 | goto out; | 3978 | goto out; |
4055 | lock_sop = lock->lk_replay_owner; | 3979 | lock_sop = lockowner(lock_stp->st_stateowner); |
4056 | fp = lock_stp->st_file; | 3980 | fp = lock_stp->st_file; |
4057 | } | 3981 | } |
4058 | /* lock->lk_replay_owner and lock_stp have been created or found */ | 3982 | /* lock_sop and lock_stp have been created or found */ |
3983 | |||
3984 | lkflg = setlkflg(lock->lk_type); | ||
3985 | status = nfs4_check_openmode(lock_stp, lkflg); | ||
3986 | if (status) | ||
3987 | goto out; | ||
4059 | 3988 | ||
4060 | status = nfserr_grace; | 3989 | status = nfserr_grace; |
4061 | if (locks_in_grace() && !lock->lk_reclaim) | 3990 | if (locks_in_grace() && !lock->lk_reclaim) |
@@ -4106,8 +4035,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4106 | err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock); | 4035 | err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock); |
4107 | switch (-err) { | 4036 | switch (-err) { |
4108 | case 0: /* success! */ | 4037 | case 0: /* success! */ |
4109 | update_stateid(&lock_stp->st_stateid); | 4038 | update_stateid(&lock_stp->st_stid.sc_stateid); |
4110 | memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, | 4039 | memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, |
4111 | sizeof(stateid_t)); | 4040 | sizeof(stateid_t)); |
4112 | status = 0; | 4041 | status = 0; |
4113 | break; | 4042 | break; |
@@ -4119,19 +4048,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4119 | case (EDEADLK): | 4048 | case (EDEADLK): |
4120 | status = nfserr_deadlock; | 4049 | status = nfserr_deadlock; |
4121 | break; | 4050 | break; |
4122 | default: | 4051 | default: |
4123 | dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err); | 4052 | dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err); |
4124 | status = nfserr_resource; | 4053 | status = nfserrno(err); |
4125 | break; | 4054 | break; |
4126 | } | 4055 | } |
4127 | out: | 4056 | out: |
4128 | if (status && lock->lk_is_new && lock_sop) | 4057 | if (status && lock->lk_is_new && lock_sop) |
4129 | release_lockowner(lock_sop); | 4058 | release_lockowner(lock_sop); |
4130 | if (lock->lk_replay_owner) { | 4059 | if (!cstate->replay_owner) |
4131 | nfs4_get_stateowner(lock->lk_replay_owner); | 4060 | nfs4_unlock_state(); |
4132 | cstate->replay_owner = lock->lk_replay_owner; | ||
4133 | } | ||
4134 | nfs4_unlock_state(); | ||
4135 | return status; | 4061 | return status; |
4136 | } | 4062 | } |
4137 | 4063 | ||
@@ -4163,6 +4089,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4163 | { | 4089 | { |
4164 | struct inode *inode; | 4090 | struct inode *inode; |
4165 | struct file_lock file_lock; | 4091 | struct file_lock file_lock; |
4092 | struct nfs4_lockowner *lo; | ||
4166 | int error; | 4093 | int error; |
4167 | __be32 status; | 4094 | __be32 status; |
4168 | 4095 | ||
@@ -4172,19 +4099,14 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4172 | if (check_lock_length(lockt->lt_offset, lockt->lt_length)) | 4099 | if (check_lock_length(lockt->lt_offset, lockt->lt_length)) |
4173 | return nfserr_inval; | 4100 | return nfserr_inval; |
4174 | 4101 | ||
4175 | lockt->lt_stateowner = NULL; | ||
4176 | nfs4_lock_state(); | 4102 | nfs4_lock_state(); |
4177 | 4103 | ||
4178 | status = nfserr_stale_clientid; | 4104 | status = nfserr_stale_clientid; |
4179 | if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) | 4105 | if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) |
4180 | goto out; | 4106 | goto out; |
4181 | 4107 | ||
4182 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { | 4108 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) |
4183 | dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n"); | ||
4184 | if (status == nfserr_symlink) | ||
4185 | status = nfserr_inval; | ||
4186 | goto out; | 4109 | goto out; |
4187 | } | ||
4188 | 4110 | ||
4189 | inode = cstate->current_fh.fh_dentry->d_inode; | 4111 | inode = cstate->current_fh.fh_dentry->d_inode; |
4190 | locks_init_lock(&file_lock); | 4112 | locks_init_lock(&file_lock); |
@@ -4203,10 +4125,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4203 | goto out; | 4125 | goto out; |
4204 | } | 4126 | } |
4205 | 4127 | ||
4206 | lockt->lt_stateowner = find_lockstateowner_str(inode, | 4128 | lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner); |
4207 | &lockt->lt_clientid, &lockt->lt_owner); | 4129 | if (lo) |
4208 | if (lockt->lt_stateowner) | 4130 | file_lock.fl_owner = (fl_owner_t)lo; |
4209 | file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner; | ||
4210 | file_lock.fl_pid = current->tgid; | 4131 | file_lock.fl_pid = current->tgid; |
4211 | file_lock.fl_flags = FL_POSIX; | 4132 | file_lock.fl_flags = FL_POSIX; |
4212 | 4133 | ||
@@ -4234,7 +4155,7 @@ __be32 | |||
4234 | nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 4155 | nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
4235 | struct nfsd4_locku *locku) | 4156 | struct nfsd4_locku *locku) |
4236 | { | 4157 | { |
4237 | struct nfs4_stateid *stp; | 4158 | struct nfs4_ol_stateid *stp; |
4238 | struct file *filp = NULL; | 4159 | struct file *filp = NULL; |
4239 | struct file_lock file_lock; | 4160 | struct file_lock file_lock; |
4240 | __be32 status; | 4161 | __be32 status; |
@@ -4249,13 +4170,10 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4249 | 4170 | ||
4250 | nfs4_lock_state(); | 4171 | nfs4_lock_state(); |
4251 | 4172 | ||
4252 | if ((status = nfs4_preprocess_seqid_op(cstate, | 4173 | status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, |
4253 | locku->lu_seqid, | 4174 | &locku->lu_stateid, NFS4_LOCK_STID, &stp); |
4254 | &locku->lu_stateid, | 4175 | if (status) |
4255 | LOCK_STATE, | ||
4256 | &locku->lu_stateowner, &stp, NULL))) | ||
4257 | goto out; | 4176 | goto out; |
4258 | |||
4259 | filp = find_any_file(stp->st_file); | 4177 | filp = find_any_file(stp->st_file); |
4260 | if (!filp) { | 4178 | if (!filp) { |
4261 | status = nfserr_lock_range; | 4179 | status = nfserr_lock_range; |
@@ -4264,7 +4182,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4264 | BUG_ON(!filp); | 4182 | BUG_ON(!filp); |
4265 | locks_init_lock(&file_lock); | 4183 | locks_init_lock(&file_lock); |
4266 | file_lock.fl_type = F_UNLCK; | 4184 | file_lock.fl_type = F_UNLCK; |
4267 | file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner; | 4185 | file_lock.fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); |
4268 | file_lock.fl_pid = current->tgid; | 4186 | file_lock.fl_pid = current->tgid; |
4269 | file_lock.fl_file = filp; | 4187 | file_lock.fl_file = filp; |
4270 | file_lock.fl_flags = FL_POSIX; | 4188 | file_lock.fl_flags = FL_POSIX; |
@@ -4285,15 +4203,12 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4285 | /* | 4203 | /* |
4286 | * OK, unlock succeeded; the only thing left to do is update the stateid. | 4204 | * OK, unlock succeeded; the only thing left to do is update the stateid. |
4287 | */ | 4205 | */ |
4288 | update_stateid(&stp->st_stateid); | 4206 | update_stateid(&stp->st_stid.sc_stateid); |
4289 | memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t)); | 4207 | memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); |
4290 | 4208 | ||
4291 | out: | 4209 | out: |
4292 | if (locku->lu_stateowner) { | 4210 | if (!cstate->replay_owner) |
4293 | nfs4_get_stateowner(locku->lu_stateowner); | 4211 | nfs4_unlock_state(); |
4294 | cstate->replay_owner = locku->lu_stateowner; | ||
4295 | } | ||
4296 | nfs4_unlock_state(); | ||
4297 | return status; | 4212 | return status; |
4298 | 4213 | ||
4299 | out_nfserr: | 4214 | out_nfserr: |
@@ -4307,7 +4222,7 @@ out_nfserr: | |||
4307 | * 0: no locks held by lockowner | 4222 | * 0: no locks held by lockowner |
4308 | */ | 4223 | */ |
4309 | static int | 4224 | static int |
4310 | check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner) | 4225 | check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) |
4311 | { | 4226 | { |
4312 | struct file_lock **flpp; | 4227 | struct file_lock **flpp; |
4313 | struct inode *inode = filp->fi_inode; | 4228 | struct inode *inode = filp->fi_inode; |
@@ -4332,7 +4247,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, | |||
4332 | { | 4247 | { |
4333 | clientid_t *clid = &rlockowner->rl_clientid; | 4248 | clientid_t *clid = &rlockowner->rl_clientid; |
4334 | struct nfs4_stateowner *sop; | 4249 | struct nfs4_stateowner *sop; |
4335 | struct nfs4_stateid *stp; | 4250 | struct nfs4_lockowner *lo; |
4251 | struct nfs4_ol_stateid *stp; | ||
4336 | struct xdr_netobj *owner = &rlockowner->rl_owner; | 4252 | struct xdr_netobj *owner = &rlockowner->rl_owner; |
4337 | struct list_head matches; | 4253 | struct list_head matches; |
4338 | int i; | 4254 | int i; |
@@ -4356,16 +4272,15 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, | |||
4356 | * data structures. */ | 4272 | * data structures. */ |
4357 | INIT_LIST_HEAD(&matches); | 4273 | INIT_LIST_HEAD(&matches); |
4358 | for (i = 0; i < LOCK_HASH_SIZE; i++) { | 4274 | for (i = 0; i < LOCK_HASH_SIZE; i++) { |
4359 | list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) { | 4275 | list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) { |
4360 | if (!same_owner_str(sop, owner, clid)) | 4276 | if (!same_owner_str(sop, owner, clid)) |
4361 | continue; | 4277 | continue; |
4362 | list_for_each_entry(stp, &sop->so_stateids, | 4278 | list_for_each_entry(stp, &sop->so_stateids, |
4363 | st_perstateowner) { | 4279 | st_perstateowner) { |
4364 | if (check_for_locks(stp->st_file, sop)) | 4280 | lo = lockowner(sop); |
4281 | if (check_for_locks(stp->st_file, lo)) | ||
4365 | goto out; | 4282 | goto out; |
4366 | /* Note: so_perclient unused for lockowners, | 4283 | list_add(&lo->lo_list, &matches); |
4367 | * so it's OK to fool with here. */ | ||
4368 | list_add(&sop->so_perclient, &matches); | ||
4369 | } | 4284 | } |
4370 | } | 4285 | } |
4371 | } | 4286 | } |
@@ -4374,12 +4289,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, | |||
4374 | * have been checked. */ | 4289 | * have been checked. */ |
4375 | status = nfs_ok; | 4290 | status = nfs_ok; |
4376 | while (!list_empty(&matches)) { | 4291 | while (!list_empty(&matches)) { |
4377 | sop = list_entry(matches.next, struct nfs4_stateowner, | 4292 | lo = list_entry(matches.next, struct nfs4_lockowner, |
4378 | so_perclient); | 4293 | lo_list); |
4379 | /* unhash_stateowner deletes so_perclient only | 4294 | /* unhash_stateowner deletes so_perclient only |
4380 | * for openowners. */ | 4295 | * for openowners. */ |
4381 | list_del(&sop->so_perclient); | 4296 | list_del(&lo->lo_list); |
4382 | release_lockowner(sop); | 4297 | release_lockowner(lo); |
4383 | } | 4298 | } |
4384 | out: | 4299 | out: |
4385 | nfs4_unlock_state(); | 4300 | nfs4_unlock_state(); |
@@ -4501,16 +4416,10 @@ nfs4_state_init(void) | |||
4501 | for (i = 0; i < FILE_HASH_SIZE; i++) { | 4416 | for (i = 0; i < FILE_HASH_SIZE; i++) { |
4502 | INIT_LIST_HEAD(&file_hashtbl[i]); | 4417 | INIT_LIST_HEAD(&file_hashtbl[i]); |
4503 | } | 4418 | } |
4504 | for (i = 0; i < OWNER_HASH_SIZE; i++) { | 4419 | for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) { |
4505 | INIT_LIST_HEAD(&ownerstr_hashtbl[i]); | 4420 | INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]); |
4506 | INIT_LIST_HEAD(&ownerid_hashtbl[i]); | ||
4507 | } | ||
4508 | for (i = 0; i < STATEID_HASH_SIZE; i++) { | ||
4509 | INIT_LIST_HEAD(&stateid_hashtbl[i]); | ||
4510 | INIT_LIST_HEAD(&lockstateid_hashtbl[i]); | ||
4511 | } | 4421 | } |
4512 | for (i = 0; i < LOCK_HASH_SIZE; i++) { | 4422 | for (i = 0; i < LOCK_HASH_SIZE; i++) { |
4513 | INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]); | ||
4514 | INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); | 4423 | INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); |
4515 | } | 4424 | } |
4516 | memset(&onestateid, ~0, sizeof(stateid_t)); | 4425 | memset(&onestateid, ~0, sizeof(stateid_t)); |
@@ -4527,7 +4436,7 @@ nfsd4_load_reboot_recovery_data(void) | |||
4527 | int status; | 4436 | int status; |
4528 | 4437 | ||
4529 | nfs4_lock_state(); | 4438 | nfs4_lock_state(); |
4530 | nfsd4_init_recdir(user_recovery_dirname); | 4439 | nfsd4_init_recdir(); |
4531 | status = nfsd4_recdir_load(); | 4440 | status = nfsd4_recdir_load(); |
4532 | nfs4_unlock_state(); | 4441 | nfs4_unlock_state(); |
4533 | if (status) | 4442 | if (status) |
@@ -4636,40 +4545,3 @@ nfs4_state_shutdown(void) | |||
4636 | nfs4_unlock_state(); | 4545 | nfs4_unlock_state(); |
4637 | nfsd4_destroy_callback_queue(); | 4546 | nfsd4_destroy_callback_queue(); |
4638 | } | 4547 | } |
4639 | |||
4640 | /* | ||
4641 | * user_recovery_dirname is protected by the nfsd_mutex since it's only | ||
4642 | * accessed when nfsd is starting. | ||
4643 | */ | ||
4644 | static void | ||
4645 | nfs4_set_recdir(char *recdir) | ||
4646 | { | ||
4647 | strcpy(user_recovery_dirname, recdir); | ||
4648 | } | ||
4649 | |||
4650 | /* | ||
4651 | * Change the NFSv4 recovery directory to recdir. | ||
4652 | */ | ||
4653 | int | ||
4654 | nfs4_reset_recoverydir(char *recdir) | ||
4655 | { | ||
4656 | int status; | ||
4657 | struct path path; | ||
4658 | |||
4659 | status = kern_path(recdir, LOOKUP_FOLLOW, &path); | ||
4660 | if (status) | ||
4661 | return status; | ||
4662 | status = -ENOTDIR; | ||
4663 | if (S_ISDIR(path.dentry->d_inode->i_mode)) { | ||
4664 | nfs4_set_recdir(recdir); | ||
4665 | status = 0; | ||
4666 | } | ||
4667 | path_put(&path); | ||
4668 | return status; | ||
4669 | } | ||
4670 | |||
4671 | char * | ||
4672 | nfs4_recoverydir(void) | ||
4673 | { | ||
4674 | return user_recovery_dirname; | ||
4675 | } | ||
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c8bf405d19de..66d095d7955e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -456,7 +456,6 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) | |||
456 | { | 456 | { |
457 | DECODE_HEAD; | 457 | DECODE_HEAD; |
458 | 458 | ||
459 | close->cl_stateowner = NULL; | ||
460 | READ_BUF(4); | 459 | READ_BUF(4); |
461 | READ32(close->cl_seqid); | 460 | READ32(close->cl_seqid); |
462 | return nfsd4_decode_stateid(argp, &close->cl_stateid); | 461 | return nfsd4_decode_stateid(argp, &close->cl_stateid); |
@@ -551,7 +550,6 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) | |||
551 | { | 550 | { |
552 | DECODE_HEAD; | 551 | DECODE_HEAD; |
553 | 552 | ||
554 | lock->lk_replay_owner = NULL; | ||
555 | /* | 553 | /* |
556 | * type, reclaim(boolean), offset, length, new_lock_owner(boolean) | 554 | * type, reclaim(boolean), offset, length, new_lock_owner(boolean) |
557 | */ | 555 | */ |
@@ -611,7 +609,6 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) | |||
611 | { | 609 | { |
612 | DECODE_HEAD; | 610 | DECODE_HEAD; |
613 | 611 | ||
614 | locku->lu_stateowner = NULL; | ||
615 | READ_BUF(8); | 612 | READ_BUF(8); |
616 | READ32(locku->lu_type); | 613 | READ32(locku->lu_type); |
617 | if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) | 614 | if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) |
@@ -642,6 +639,83 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup | |||
642 | DECODE_TAIL; | 639 | DECODE_TAIL; |
643 | } | 640 | } |
644 | 641 | ||
642 | static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x) | ||
643 | { | ||
644 | __be32 *p; | ||
645 | u32 w; | ||
646 | |||
647 | READ_BUF(4); | ||
648 | READ32(w); | ||
649 | *x = w; | ||
650 | switch (w & NFS4_SHARE_ACCESS_MASK) { | ||
651 | case NFS4_SHARE_ACCESS_READ: | ||
652 | case NFS4_SHARE_ACCESS_WRITE: | ||
653 | case NFS4_SHARE_ACCESS_BOTH: | ||
654 | break; | ||
655 | default: | ||
656 | return nfserr_bad_xdr; | ||
657 | } | ||
658 | w &= !NFS4_SHARE_ACCESS_MASK; | ||
659 | if (!w) | ||
660 | return nfs_ok; | ||
661 | if (!argp->minorversion) | ||
662 | return nfserr_bad_xdr; | ||
663 | switch (w & NFS4_SHARE_WANT_MASK) { | ||
664 | case NFS4_SHARE_WANT_NO_PREFERENCE: | ||
665 | case NFS4_SHARE_WANT_READ_DELEG: | ||
666 | case NFS4_SHARE_WANT_WRITE_DELEG: | ||
667 | case NFS4_SHARE_WANT_ANY_DELEG: | ||
668 | case NFS4_SHARE_WANT_NO_DELEG: | ||
669 | case NFS4_SHARE_WANT_CANCEL: | ||
670 | break; | ||
671 | default: | ||
672 | return nfserr_bad_xdr; | ||
673 | } | ||
674 | w &= ~NFS4_SHARE_WANT_MASK; | ||
675 | if (!w) | ||
676 | return nfs_ok; | ||
677 | switch (w) { | ||
678 | case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL: | ||
679 | case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED: | ||
680 | case (NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL | | ||
681 | NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED): | ||
682 | return nfs_ok; | ||
683 | } | ||
684 | xdr_error: | ||
685 | return nfserr_bad_xdr; | ||
686 | } | ||
687 | |||
688 | static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) | ||
689 | { | ||
690 | __be32 *p; | ||
691 | |||
692 | READ_BUF(4); | ||
693 | READ32(*x); | ||
694 | /* Note: unlinke access bits, deny bits may be zero. */ | ||
695 | if (*x & ~NFS4_SHARE_DENY_BOTH) | ||
696 | return nfserr_bad_xdr; | ||
697 | return nfs_ok; | ||
698 | xdr_error: | ||
699 | return nfserr_bad_xdr; | ||
700 | } | ||
701 | |||
702 | static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) | ||
703 | { | ||
704 | __be32 *p; | ||
705 | |||
706 | READ_BUF(4); | ||
707 | READ32(o->len); | ||
708 | |||
709 | if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) | ||
710 | return nfserr_bad_xdr; | ||
711 | |||
712 | READ_BUF(o->len); | ||
713 | SAVEMEM(o->data, o->len); | ||
714 | return nfs_ok; | ||
715 | xdr_error: | ||
716 | return nfserr_bad_xdr; | ||
717 | } | ||
718 | |||
645 | static __be32 | 719 | static __be32 |
646 | nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | 720 | nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) |
647 | { | 721 | { |
@@ -649,19 +723,23 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
649 | 723 | ||
650 | memset(open->op_bmval, 0, sizeof(open->op_bmval)); | 724 | memset(open->op_bmval, 0, sizeof(open->op_bmval)); |
651 | open->op_iattr.ia_valid = 0; | 725 | open->op_iattr.ia_valid = 0; |
652 | open->op_stateowner = NULL; | 726 | open->op_openowner = NULL; |
653 | 727 | ||
654 | /* seqid, share_access, share_deny, clientid, ownerlen */ | 728 | /* seqid, share_access, share_deny, clientid, ownerlen */ |
655 | READ_BUF(16 + sizeof(clientid_t)); | 729 | READ_BUF(4); |
656 | READ32(open->op_seqid); | 730 | READ32(open->op_seqid); |
657 | READ32(open->op_share_access); | 731 | status = nfsd4_decode_share_access(argp, &open->op_share_access); |
658 | READ32(open->op_share_deny); | 732 | if (status) |
733 | goto xdr_error; | ||
734 | status = nfsd4_decode_share_deny(argp, &open->op_share_deny); | ||
735 | if (status) | ||
736 | goto xdr_error; | ||
737 | READ_BUF(sizeof(clientid_t)); | ||
659 | COPYMEM(&open->op_clientid, sizeof(clientid_t)); | 738 | COPYMEM(&open->op_clientid, sizeof(clientid_t)); |
660 | READ32(open->op_owner.len); | 739 | status = nfsd4_decode_opaque(argp, &open->op_owner); |
661 | 740 | if (status) | |
662 | /* owner, open_flag */ | 741 | goto xdr_error; |
663 | READ_BUF(open->op_owner.len + 4); | 742 | READ_BUF(4); |
664 | SAVEMEM(open->op_owner.data, open->op_owner.len); | ||
665 | READ32(open->op_create); | 743 | READ32(open->op_create); |
666 | switch (open->op_create) { | 744 | switch (open->op_create) { |
667 | case NFS4_OPEN_NOCREATE: | 745 | case NFS4_OPEN_NOCREATE: |
@@ -727,6 +805,19 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
727 | if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) | 805 | if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) |
728 | return status; | 806 | return status; |
729 | break; | 807 | break; |
808 | case NFS4_OPEN_CLAIM_FH: | ||
809 | case NFS4_OPEN_CLAIM_DELEG_PREV_FH: | ||
810 | if (argp->minorversion < 1) | ||
811 | goto xdr_error; | ||
812 | /* void */ | ||
813 | break; | ||
814 | case NFS4_OPEN_CLAIM_DELEG_CUR_FH: | ||
815 | if (argp->minorversion < 1) | ||
816 | goto xdr_error; | ||
817 | status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); | ||
818 | if (status) | ||
819 | return status; | ||
820 | break; | ||
730 | default: | 821 | default: |
731 | goto xdr_error; | 822 | goto xdr_error; |
732 | } | 823 | } |
@@ -739,7 +830,6 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con | |||
739 | { | 830 | { |
740 | DECODE_HEAD; | 831 | DECODE_HEAD; |
741 | 832 | ||
742 | open_conf->oc_stateowner = NULL; | ||
743 | status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); | 833 | status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); |
744 | if (status) | 834 | if (status) |
745 | return status; | 835 | return status; |
@@ -754,15 +844,17 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d | |||
754 | { | 844 | { |
755 | DECODE_HEAD; | 845 | DECODE_HEAD; |
756 | 846 | ||
757 | open_down->od_stateowner = NULL; | ||
758 | status = nfsd4_decode_stateid(argp, &open_down->od_stateid); | 847 | status = nfsd4_decode_stateid(argp, &open_down->od_stateid); |
759 | if (status) | 848 | if (status) |
760 | return status; | 849 | return status; |
761 | READ_BUF(12); | 850 | READ_BUF(4); |
762 | READ32(open_down->od_seqid); | 851 | READ32(open_down->od_seqid); |
763 | READ32(open_down->od_share_access); | 852 | status = nfsd4_decode_share_access(argp, &open_down->od_share_access); |
764 | READ32(open_down->od_share_deny); | 853 | if (status) |
765 | 854 | return status; | |
855 | status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); | ||
856 | if (status) | ||
857 | return status; | ||
766 | DECODE_TAIL; | 858 | DECODE_TAIL; |
767 | } | 859 | } |
768 | 860 | ||
@@ -903,12 +995,13 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient | |||
903 | { | 995 | { |
904 | DECODE_HEAD; | 996 | DECODE_HEAD; |
905 | 997 | ||
906 | READ_BUF(12); | 998 | READ_BUF(8); |
907 | COPYMEM(setclientid->se_verf.data, 8); | 999 | COPYMEM(setclientid->se_verf.data, 8); |
908 | READ32(setclientid->se_namelen); | ||
909 | 1000 | ||
910 | READ_BUF(setclientid->se_namelen + 8); | 1001 | status = nfsd4_decode_opaque(argp, &setclientid->se_name); |
911 | SAVEMEM(setclientid->se_name, setclientid->se_namelen); | 1002 | if (status) |
1003 | return nfserr_bad_xdr; | ||
1004 | READ_BUF(8); | ||
912 | READ32(setclientid->se_callback_prog); | 1005 | READ32(setclientid->se_callback_prog); |
913 | READ32(setclientid->se_callback_netid_len); | 1006 | READ32(setclientid->se_callback_netid_len); |
914 | 1007 | ||
@@ -1051,11 +1144,9 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, | |||
1051 | READ_BUF(NFS4_VERIFIER_SIZE); | 1144 | READ_BUF(NFS4_VERIFIER_SIZE); |
1052 | COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); | 1145 | COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); |
1053 | 1146 | ||
1054 | READ_BUF(4); | 1147 | status = nfsd4_decode_opaque(argp, &exid->clname); |
1055 | READ32(exid->clname.len); | 1148 | if (status) |
1056 | 1149 | return nfserr_bad_xdr; | |
1057 | READ_BUF(exid->clname.len); | ||
1058 | SAVEMEM(exid->clname.data, exid->clname.len); | ||
1059 | 1150 | ||
1060 | READ_BUF(4); | 1151 | READ_BUF(4); |
1061 | READ32(exid->flags); | 1152 | READ32(exid->flags); |
@@ -1326,6 +1417,16 @@ xdr_error: | |||
1326 | goto out; | 1417 | goto out; |
1327 | } | 1418 | } |
1328 | 1419 | ||
1420 | static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc) | ||
1421 | { | ||
1422 | DECODE_HEAD; | ||
1423 | |||
1424 | READ_BUF(8); | ||
1425 | COPYMEM(&dc->clientid, 8); | ||
1426 | |||
1427 | DECODE_TAIL; | ||
1428 | } | ||
1429 | |||
1329 | static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) | 1430 | static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) |
1330 | { | 1431 | { |
1331 | DECODE_HEAD; | 1432 | DECODE_HEAD; |
@@ -1447,7 +1548,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { | |||
1447 | [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, | 1548 | [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, |
1448 | [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, | 1549 | [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, |
1449 | [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, | 1550 | [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, |
1450 | [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, | 1551 | [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, |
1451 | [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, | 1552 | [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, |
1452 | }; | 1553 | }; |
1453 | 1554 | ||
@@ -1630,15 +1731,20 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) | |||
1630 | * we know whether the error to be returned is a sequence id mutating error. | 1731 | * we know whether the error to be returned is a sequence id mutating error. |
1631 | */ | 1732 | */ |
1632 | 1733 | ||
1633 | #define ENCODE_SEQID_OP_TAIL(stateowner) do { \ | 1734 | static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, __be32 nfserr) |
1634 | if (seqid_mutating_err(nfserr) && stateowner) { \ | 1735 | { |
1635 | stateowner->so_seqid++; \ | 1736 | struct nfs4_stateowner *stateowner = resp->cstate.replay_owner; |
1636 | stateowner->so_replay.rp_status = nfserr; \ | 1737 | |
1637 | stateowner->so_replay.rp_buflen = \ | 1738 | if (seqid_mutating_err(ntohl(nfserr)) && stateowner) { |
1638 | (((char *)(resp)->p - (char *)save)); \ | 1739 | stateowner->so_seqid++; |
1639 | memcpy(stateowner->so_replay.rp_buf, save, \ | 1740 | stateowner->so_replay.rp_status = nfserr; |
1640 | stateowner->so_replay.rp_buflen); \ | 1741 | stateowner->so_replay.rp_buflen = |
1641 | } } while (0); | 1742 | (char *)resp->p - (char *)save; |
1743 | memcpy(stateowner->so_replay.rp_buf, save, | ||
1744 | stateowner->so_replay.rp_buflen); | ||
1745 | nfsd4_purge_closed_stateid(stateowner); | ||
1746 | } | ||
1747 | } | ||
1642 | 1748 | ||
1643 | /* Encode as an array of strings the string given with components | 1749 | /* Encode as an array of strings the string given with components |
1644 | * separated @sep. | 1750 | * separated @sep. |
@@ -1697,36 +1803,89 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, | |||
1697 | } | 1803 | } |
1698 | 1804 | ||
1699 | /* | 1805 | /* |
1700 | * Return the path to an export point in the pseudo filesystem namespace | 1806 | * Encode a path in RFC3530 'pathname4' format |
1701 | * Returned string is safe to use as long as the caller holds a reference | ||
1702 | * to @exp. | ||
1703 | */ | 1807 | */ |
1704 | static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) | 1808 | static __be32 nfsd4_encode_path(const struct path *root, |
1809 | const struct path *path, __be32 **pp, int *buflen) | ||
1705 | { | 1810 | { |
1706 | struct svc_fh tmp_fh; | 1811 | struct path cur = { |
1707 | char *path = NULL, *rootpath; | 1812 | .mnt = path->mnt, |
1708 | size_t rootlen; | 1813 | .dentry = path->dentry, |
1814 | }; | ||
1815 | __be32 *p = *pp; | ||
1816 | struct dentry **components = NULL; | ||
1817 | unsigned int ncomponents = 0; | ||
1818 | __be32 err = nfserr_jukebox; | ||
1709 | 1819 | ||
1710 | fh_init(&tmp_fh, NFS4_FHSIZE); | 1820 | dprintk("nfsd4_encode_components("); |
1711 | *stat = exp_pseudoroot(rqstp, &tmp_fh); | ||
1712 | if (*stat) | ||
1713 | return NULL; | ||
1714 | rootpath = tmp_fh.fh_export->ex_pathname; | ||
1715 | 1821 | ||
1716 | path = exp->ex_pathname; | 1822 | path_get(&cur); |
1823 | /* First walk the path up to the nfsd root, and store the | ||
1824 | * dentries/path components in an array. | ||
1825 | */ | ||
1826 | for (;;) { | ||
1827 | if (cur.dentry == root->dentry && cur.mnt == root->mnt) | ||
1828 | break; | ||
1829 | if (cur.dentry == cur.mnt->mnt_root) { | ||
1830 | if (follow_up(&cur)) | ||
1831 | continue; | ||
1832 | goto out_free; | ||
1833 | } | ||
1834 | if ((ncomponents & 15) == 0) { | ||
1835 | struct dentry **new; | ||
1836 | new = krealloc(components, | ||
1837 | sizeof(*new) * (ncomponents + 16), | ||
1838 | GFP_KERNEL); | ||
1839 | if (!new) | ||
1840 | goto out_free; | ||
1841 | components = new; | ||
1842 | } | ||
1843 | components[ncomponents++] = cur.dentry; | ||
1844 | cur.dentry = dget_parent(cur.dentry); | ||
1845 | } | ||
1717 | 1846 | ||
1718 | rootlen = strlen(rootpath); | 1847 | *buflen -= 4; |
1719 | if (strncmp(path, rootpath, rootlen)) { | 1848 | if (*buflen < 0) |
1720 | dprintk("nfsd: fs_locations failed;" | 1849 | goto out_free; |
1721 | "%s is not contained in %s\n", path, rootpath); | 1850 | WRITE32(ncomponents); |
1722 | *stat = nfserr_notsupp; | 1851 | |
1723 | path = NULL; | 1852 | while (ncomponents) { |
1724 | goto out; | 1853 | struct dentry *dentry = components[ncomponents - 1]; |
1854 | unsigned int len = dentry->d_name.len; | ||
1855 | |||
1856 | *buflen -= 4 + (XDR_QUADLEN(len) << 2); | ||
1857 | if (*buflen < 0) | ||
1858 | goto out_free; | ||
1859 | WRITE32(len); | ||
1860 | WRITEMEM(dentry->d_name.name, len); | ||
1861 | dprintk("/%s", dentry->d_name.name); | ||
1862 | dput(dentry); | ||
1863 | ncomponents--; | ||
1725 | } | 1864 | } |
1726 | path += rootlen; | 1865 | |
1727 | out: | 1866 | *pp = p; |
1728 | fh_put(&tmp_fh); | 1867 | err = 0; |
1729 | return path; | 1868 | out_free: |
1869 | dprintk(")\n"); | ||
1870 | while (ncomponents) | ||
1871 | dput(components[--ncomponents]); | ||
1872 | kfree(components); | ||
1873 | path_put(&cur); | ||
1874 | return err; | ||
1875 | } | ||
1876 | |||
1877 | static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, | ||
1878 | const struct path *path, __be32 **pp, int *buflen) | ||
1879 | { | ||
1880 | struct svc_export *exp_ps; | ||
1881 | __be32 res; | ||
1882 | |||
1883 | exp_ps = rqst_find_fsidzero_export(rqstp); | ||
1884 | if (IS_ERR(exp_ps)) | ||
1885 | return nfserrno(PTR_ERR(exp_ps)); | ||
1886 | res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen); | ||
1887 | exp_put(exp_ps); | ||
1888 | return res; | ||
1730 | } | 1889 | } |
1731 | 1890 | ||
1732 | /* | 1891 | /* |
@@ -1740,11 +1899,8 @@ static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, | |||
1740 | int i; | 1899 | int i; |
1741 | __be32 *p = *pp; | 1900 | __be32 *p = *pp; |
1742 | struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; | 1901 | struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; |
1743 | char *root = nfsd4_path(rqstp, exp, &status); | ||
1744 | 1902 | ||
1745 | if (status) | 1903 | status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen); |
1746 | return status; | ||
1747 | status = nfsd4_encode_components('/', root, &p, buflen); | ||
1748 | if (status) | 1904 | if (status) |
1749 | return status; | 1905 | return status; |
1750 | if ((*buflen -= 4) < 0) | 1906 | if ((*buflen -= 4) < 0) |
@@ -1760,12 +1916,19 @@ static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, | |||
1760 | return 0; | 1916 | return 0; |
1761 | } | 1917 | } |
1762 | 1918 | ||
1763 | static u32 nfs4_ftypes[16] = { | 1919 | static u32 nfs4_file_type(umode_t mode) |
1764 | NF4BAD, NF4FIFO, NF4CHR, NF4BAD, | 1920 | { |
1765 | NF4DIR, NF4BAD, NF4BLK, NF4BAD, | 1921 | switch (mode & S_IFMT) { |
1766 | NF4REG, NF4BAD, NF4LNK, NF4BAD, | 1922 | case S_IFIFO: return NF4FIFO; |
1767 | NF4SOCK, NF4BAD, NF4LNK, NF4BAD, | 1923 | case S_IFCHR: return NF4CHR; |
1768 | }; | 1924 | case S_IFDIR: return NF4DIR; |
1925 | case S_IFBLK: return NF4BLK; | ||
1926 | case S_IFLNK: return NF4LNK; | ||
1927 | case S_IFREG: return NF4REG; | ||
1928 | case S_IFSOCK: return NF4SOCK; | ||
1929 | default: return NF4BAD; | ||
1930 | }; | ||
1931 | } | ||
1769 | 1932 | ||
1770 | static __be32 | 1933 | static __be32 |
1771 | nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, | 1934 | nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, |
@@ -1954,7 +2117,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1954 | if (bmval0 & FATTR4_WORD0_TYPE) { | 2117 | if (bmval0 & FATTR4_WORD0_TYPE) { |
1955 | if ((buflen -= 4) < 0) | 2118 | if ((buflen -= 4) < 0) |
1956 | goto out_resource; | 2119 | goto out_resource; |
1957 | dummy = nfs4_ftypes[(stat.mode & S_IFMT) >> 12]; | 2120 | dummy = nfs4_file_type(stat.mode); |
1958 | if (dummy == NF4BAD) | 2121 | if (dummy == NF4BAD) |
1959 | goto out_serverfault; | 2122 | goto out_serverfault; |
1960 | WRITE32(dummy); | 2123 | WRITE32(dummy); |
@@ -2488,7 +2651,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c | |||
2488 | if (!nfserr) | 2651 | if (!nfserr) |
2489 | nfsd4_encode_stateid(resp, &close->cl_stateid); | 2652 | nfsd4_encode_stateid(resp, &close->cl_stateid); |
2490 | 2653 | ||
2491 | ENCODE_SEQID_OP_TAIL(close->cl_stateowner); | 2654 | encode_seqid_op_tail(resp, save, nfserr); |
2492 | return nfserr; | 2655 | return nfserr; |
2493 | } | 2656 | } |
2494 | 2657 | ||
@@ -2564,17 +2727,18 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh | |||
2564 | static void | 2727 | static void |
2565 | nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) | 2728 | nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) |
2566 | { | 2729 | { |
2730 | struct xdr_netobj *conf = &ld->ld_owner; | ||
2567 | __be32 *p; | 2731 | __be32 *p; |
2568 | 2732 | ||
2569 | RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0)); | 2733 | RESERVE_SPACE(32 + XDR_LEN(conf->len)); |
2570 | WRITE64(ld->ld_start); | 2734 | WRITE64(ld->ld_start); |
2571 | WRITE64(ld->ld_length); | 2735 | WRITE64(ld->ld_length); |
2572 | WRITE32(ld->ld_type); | 2736 | WRITE32(ld->ld_type); |
2573 | if (ld->ld_sop) { | 2737 | if (conf->len) { |
2574 | WRITEMEM(&ld->ld_clientid, 8); | 2738 | WRITEMEM(&ld->ld_clientid, 8); |
2575 | WRITE32(ld->ld_sop->so_owner.len); | 2739 | WRITE32(conf->len); |
2576 | WRITEMEM(ld->ld_sop->so_owner.data, ld->ld_sop->so_owner.len); | 2740 | WRITEMEM(conf->data, conf->len); |
2577 | kref_put(&ld->ld_sop->so_ref, nfs4_free_stateowner); | 2741 | kfree(conf->data); |
2578 | } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ | 2742 | } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ |
2579 | WRITE64((u64)0); /* clientid */ | 2743 | WRITE64((u64)0); /* clientid */ |
2580 | WRITE32(0); /* length of owner name */ | 2744 | WRITE32(0); /* length of owner name */ |
@@ -2592,7 +2756,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo | |||
2592 | else if (nfserr == nfserr_denied) | 2756 | else if (nfserr == nfserr_denied) |
2593 | nfsd4_encode_lock_denied(resp, &lock->lk_denied); | 2757 | nfsd4_encode_lock_denied(resp, &lock->lk_denied); |
2594 | 2758 | ||
2595 | ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); | 2759 | encode_seqid_op_tail(resp, save, nfserr); |
2596 | return nfserr; | 2760 | return nfserr; |
2597 | } | 2761 | } |
2598 | 2762 | ||
@@ -2612,7 +2776,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l | |||
2612 | if (!nfserr) | 2776 | if (!nfserr) |
2613 | nfsd4_encode_stateid(resp, &locku->lu_stateid); | 2777 | nfsd4_encode_stateid(resp, &locku->lu_stateid); |
2614 | 2778 | ||
2615 | ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); | 2779 | encode_seqid_op_tail(resp, save, nfserr); |
2616 | return nfserr; | 2780 | return nfserr; |
2617 | } | 2781 | } |
2618 | 2782 | ||
@@ -2693,7 +2857,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op | |||
2693 | } | 2857 | } |
2694 | /* XXX save filehandle here */ | 2858 | /* XXX save filehandle here */ |
2695 | out: | 2859 | out: |
2696 | ENCODE_SEQID_OP_TAIL(open->op_stateowner); | 2860 | encode_seqid_op_tail(resp, save, nfserr); |
2697 | return nfserr; | 2861 | return nfserr; |
2698 | } | 2862 | } |
2699 | 2863 | ||
@@ -2705,7 +2869,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct | |||
2705 | if (!nfserr) | 2869 | if (!nfserr) |
2706 | nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); | 2870 | nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); |
2707 | 2871 | ||
2708 | ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); | 2872 | encode_seqid_op_tail(resp, save, nfserr); |
2709 | return nfserr; | 2873 | return nfserr; |
2710 | } | 2874 | } |
2711 | 2875 | ||
@@ -2717,7 +2881,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc | |||
2717 | if (!nfserr) | 2881 | if (!nfserr) |
2718 | nfsd4_encode_stateid(resp, &od->od_stateid); | 2882 | nfsd4_encode_stateid(resp, &od->od_stateid); |
2719 | 2883 | ||
2720 | ENCODE_SEQID_OP_TAIL(od->od_stateowner); | 2884 | encode_seqid_op_tail(resp, save, nfserr); |
2721 | return nfserr; | 2885 | return nfserr; |
2722 | } | 2886 | } |
2723 | 2887 | ||
@@ -2759,8 +2923,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
2759 | read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, | 2923 | read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, |
2760 | &maxcount); | 2924 | &maxcount); |
2761 | 2925 | ||
2762 | if (nfserr == nfserr_symlink) | ||
2763 | nfserr = nfserr_inval; | ||
2764 | if (nfserr) | 2926 | if (nfserr) |
2765 | return nfserr; | 2927 | return nfserr; |
2766 | eof = (read->rd_offset + maxcount >= | 2928 | eof = (read->rd_offset + maxcount >= |
@@ -2886,8 +3048,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 | |||
2886 | readdir->common.err == nfserr_toosmall && | 3048 | readdir->common.err == nfserr_toosmall && |
2887 | readdir->buffer == page) | 3049 | readdir->buffer == page) |
2888 | nfserr = nfserr_toosmall; | 3050 | nfserr = nfserr_toosmall; |
2889 | if (nfserr == nfserr_symlink) | ||
2890 | nfserr = nfserr_notdir; | ||
2891 | if (nfserr) | 3051 | if (nfserr) |
2892 | goto err_no_verf; | 3052 | goto err_no_verf; |
2893 | 3053 | ||
@@ -3218,9 +3378,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, | |||
3218 | WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); | 3378 | WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); |
3219 | WRITE32(seq->seqid); | 3379 | WRITE32(seq->seqid); |
3220 | WRITE32(seq->slotid); | 3380 | WRITE32(seq->slotid); |
3221 | WRITE32(seq->maxslots); | 3381 | /* Note slotid's are numbered from zero: */ |
3222 | /* For now: target_maxslots = maxslots */ | 3382 | WRITE32(seq->maxslots - 1); /* sr_highest_slotid */ |
3223 | WRITE32(seq->maxslots); | 3383 | WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ |
3224 | WRITE32(seq->status_flags); | 3384 | WRITE32(seq->status_flags); |
3225 | 3385 | ||
3226 | ADJUST_ARGS(); | 3386 | ADJUST_ARGS(); |
@@ -3233,6 +3393,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, | |||
3233 | struct nfsd4_test_stateid *test_stateid) | 3393 | struct nfsd4_test_stateid *test_stateid) |
3234 | { | 3394 | { |
3235 | struct nfsd4_compoundargs *argp; | 3395 | struct nfsd4_compoundargs *argp; |
3396 | struct nfs4_client *cl = resp->cstate.session->se_client; | ||
3236 | stateid_t si; | 3397 | stateid_t si; |
3237 | __be32 *p; | 3398 | __be32 *p; |
3238 | int i; | 3399 | int i; |
@@ -3248,7 +3409,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, | |||
3248 | nfs4_lock_state(); | 3409 | nfs4_lock_state(); |
3249 | for (i = 0; i < test_stateid->ts_num_ids; i++) { | 3410 | for (i = 0; i < test_stateid->ts_num_ids; i++) { |
3250 | nfsd4_decode_stateid(argp, &si); | 3411 | nfsd4_decode_stateid(argp, &si); |
3251 | valid = nfs4_validate_stateid(&si, test_stateid->ts_has_session); | 3412 | valid = nfs4_validate_stateid(cl, &si); |
3252 | RESERVE_SPACE(4); | 3413 | RESERVE_SPACE(4); |
3253 | *p++ = htonl(valid); | 3414 | *p++ = htonl(valid); |
3254 | resp->p = p; | 3415 | resp->p = p; |
@@ -3334,34 +3495,29 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
3334 | 3495 | ||
3335 | /* | 3496 | /* |
3336 | * Calculate the total amount of memory that the compound response has taken | 3497 | * Calculate the total amount of memory that the compound response has taken |
3337 | * after encoding the current operation. | 3498 | * after encoding the current operation with pad. |
3338 | * | 3499 | * |
3339 | * pad: add on 8 bytes for the next operation's op_code and status so that | 3500 | * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop() |
3340 | * there is room to cache a failure on the next operation. | 3501 | * which was specified at nfsd4_operation, else pad is zero. |
3341 | * | 3502 | * |
3342 | * Compare this length to the session se_fmaxresp_cached. | 3503 | * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached. |
3343 | * | 3504 | * |
3344 | * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so | 3505 | * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so |
3345 | * will be at least a page and will therefore hold the xdr_buf head. | 3506 | * will be at least a page and will therefore hold the xdr_buf head. |
3346 | */ | 3507 | */ |
3347 | static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) | 3508 | int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) |
3348 | { | 3509 | { |
3349 | int status = 0; | ||
3350 | struct xdr_buf *xb = &resp->rqstp->rq_res; | 3510 | struct xdr_buf *xb = &resp->rqstp->rq_res; |
3351 | struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; | ||
3352 | struct nfsd4_session *session = NULL; | 3511 | struct nfsd4_session *session = NULL; |
3353 | struct nfsd4_slot *slot = resp->cstate.slot; | 3512 | struct nfsd4_slot *slot = resp->cstate.slot; |
3354 | u32 length, tlen = 0, pad = 8; | 3513 | u32 length, tlen = 0; |
3355 | 3514 | ||
3356 | if (!nfsd4_has_session(&resp->cstate)) | 3515 | if (!nfsd4_has_session(&resp->cstate)) |
3357 | return status; | 3516 | return 0; |
3358 | 3517 | ||
3359 | session = resp->cstate.session; | 3518 | session = resp->cstate.session; |
3360 | if (session == NULL || slot->sl_cachethis == 0) | 3519 | if (session == NULL) |
3361 | return status; | 3520 | return 0; |
3362 | |||
3363 | if (resp->opcnt >= args->opcnt) | ||
3364 | pad = 0; /* this is the last operation */ | ||
3365 | 3521 | ||
3366 | if (xb->page_len == 0) { | 3522 | if (xb->page_len == 0) { |
3367 | length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; | 3523 | length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; |
@@ -3374,10 +3530,14 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) | |||
3374 | dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, | 3530 | dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, |
3375 | length, xb->page_len, tlen, pad); | 3531 | length, xb->page_len, tlen, pad); |
3376 | 3532 | ||
3377 | if (length <= session->se_fchannel.maxresp_cached) | 3533 | if (length > session->se_fchannel.maxresp_sz) |
3378 | return status; | 3534 | return nfserr_rep_too_big; |
3379 | else | 3535 | |
3536 | if (slot->sl_cachethis == 1 && | ||
3537 | length > session->se_fchannel.maxresp_cached) | ||
3380 | return nfserr_rep_too_big_to_cache; | 3538 | return nfserr_rep_too_big_to_cache; |
3539 | |||
3540 | return 0; | ||
3381 | } | 3541 | } |
3382 | 3542 | ||
3383 | void | 3543 | void |
@@ -3397,8 +3557,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) | |||
3397 | !nfsd4_enc_ops[op->opnum]); | 3557 | !nfsd4_enc_ops[op->opnum]); |
3398 | op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); | 3558 | op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); |
3399 | /* nfsd4_check_drc_limit guarantees enough room for error status */ | 3559 | /* nfsd4_check_drc_limit guarantees enough room for error status */ |
3400 | if (!op->status && nfsd4_check_drc_limit(resp)) | 3560 | if (!op->status) |
3401 | op->status = nfserr_rep_too_big_to_cache; | 3561 | op->status = nfsd4_check_resp_size(resp, 0); |
3402 | status: | 3562 | status: |
3403 | /* | 3563 | /* |
3404 | * Note: We write the status directly, instead of using WRITE32(), | 3564 | * Note: We write the status directly, instead of using WRITE32(), |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c7716143cbd1..db34a585e112 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/ctype.h> | 9 | #include <linux/ctype.h> |
10 | 10 | ||
11 | #include <linux/sunrpc/svcsock.h> | 11 | #include <linux/sunrpc/svcsock.h> |
12 | #include <linux/nfsd/syscall.h> | ||
13 | #include <linux/lockd/lockd.h> | 12 | #include <linux/lockd/lockd.h> |
14 | #include <linux/sunrpc/clnt.h> | 13 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/gss_api.h> | 14 | #include <linux/sunrpc/gss_api.h> |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7ecfa2420307..58134a23fdfb 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -11,13 +11,39 @@ | |||
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | #include <linux/mount.h> | 12 | #include <linux/mount.h> |
13 | 13 | ||
14 | #include <linux/nfs.h> | ||
15 | #include <linux/nfs2.h> | ||
16 | #include <linux/nfs3.h> | ||
17 | #include <linux/nfs4.h> | ||
18 | #include <linux/sunrpc/msg_prot.h> | ||
19 | |||
14 | #include <linux/nfsd/debug.h> | 20 | #include <linux/nfsd/debug.h> |
15 | #include <linux/nfsd/export.h> | 21 | #include <linux/nfsd/export.h> |
16 | #include <linux/nfsd/stats.h> | 22 | #include <linux/nfsd/stats.h> |
23 | |||
17 | /* | 24 | /* |
18 | * nfsd version | 25 | * nfsd version |
19 | */ | 26 | */ |
20 | #define NFSD_SUPPORTED_MINOR_VERSION 1 | 27 | #define NFSD_SUPPORTED_MINOR_VERSION 1 |
28 | /* | ||
29 | * Maximum blocksizes supported by daemon under various circumstances. | ||
30 | */ | ||
31 | #define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD | ||
32 | /* NFSv2 is limited by the protocol specification, see RFC 1094 */ | ||
33 | #define NFSSVC_MAXBLKSIZE_V2 (8*1024) | ||
34 | |||
35 | |||
36 | /* | ||
37 | * Largest number of bytes we need to allocate for an NFS | ||
38 | * call or reply. Used to control buffer sizes. We use | ||
39 | * the length of v3 WRITE, READDIR and READDIR replies | ||
40 | * which are an RPC header, up to 26 XDR units of reply | ||
41 | * data, and some page data. | ||
42 | * | ||
43 | * Note that accuracy here doesn't matter too much as the | ||
44 | * size is rounded up to a page size when allocating space. | ||
45 | */ | ||
46 | #define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE) | ||
21 | 47 | ||
22 | struct readdir_cd { | 48 | struct readdir_cd { |
23 | __be32 err; /* 0, nfserr, or nfserr_eof */ | 49 | __be32 err; /* 0, nfserr, or nfserr_eof */ |
@@ -335,6 +361,13 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) | |||
335 | #define NFSD_SUPPATTR_EXCLCREAT_WORD2 \ | 361 | #define NFSD_SUPPATTR_EXCLCREAT_WORD2 \ |
336 | NFSD_WRITEABLE_ATTRS_WORD2 | 362 | NFSD_WRITEABLE_ATTRS_WORD2 |
337 | 363 | ||
364 | extern int nfsd4_is_junction(struct dentry *dentry); | ||
365 | #else | ||
366 | static inline int nfsd4_is_junction(struct dentry *dentry) | ||
367 | { | ||
368 | return 0; | ||
369 | } | ||
370 | |||
338 | #endif /* CONFIG_NFSD_V4 */ | 371 | #endif /* CONFIG_NFSD_V4 */ |
339 | 372 | ||
340 | #endif /* LINUX_NFSD_NFSD_H */ | 373 | #endif /* LINUX_NFSD_NFSD_H */ |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 90c6aa6d5e0f..c763de5c1157 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -59,28 +59,25 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) | |||
59 | * the write call). | 59 | * the write call). |
60 | */ | 60 | */ |
61 | static inline __be32 | 61 | static inline __be32 |
62 | nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type) | 62 | nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int requested) |
63 | { | 63 | { |
64 | /* Type can be negative when creating hardlinks - not to a dir */ | 64 | mode &= S_IFMT; |
65 | if (type > 0 && (mode & S_IFMT) != type) { | 65 | |
66 | if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) | 66 | if (requested == 0) /* the caller doesn't care */ |
67 | return nfserr_symlink; | 67 | return nfs_ok; |
68 | else if (type == S_IFDIR) | 68 | if (mode == requested) |
69 | return nfserr_notdir; | 69 | return nfs_ok; |
70 | else if ((mode & S_IFMT) == S_IFDIR) | 70 | /* |
71 | return nfserr_isdir; | 71 | * v4 has an error more specific than err_notdir which we should |
72 | else | 72 | * return in preference to err_notdir: |
73 | return nfserr_inval; | 73 | */ |
74 | } | 74 | if (rqstp->rq_vers == 4 && mode == S_IFLNK) |
75 | if (type < 0 && (mode & S_IFMT) == -type) { | 75 | return nfserr_symlink; |
76 | if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) | 76 | if (requested == S_IFDIR) |
77 | return nfserr_symlink; | 77 | return nfserr_notdir; |
78 | else if (type == -S_IFDIR) | 78 | if (mode == S_IFDIR) |
79 | return nfserr_isdir; | 79 | return nfserr_isdir; |
80 | else | 80 | return nfserr_inval; |
81 | return nfserr_notdir; | ||
82 | } | ||
83 | return 0; | ||
84 | } | 81 | } |
85 | 82 | ||
86 | static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, | 83 | static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4eefaf1b42e8..a3cf38476a1b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -35,6 +35,7 @@ | |||
35 | #ifndef _NFSD4_STATE_H | 35 | #ifndef _NFSD4_STATE_H |
36 | #define _NFSD4_STATE_H | 36 | #define _NFSD4_STATE_H |
37 | 37 | ||
38 | #include <linux/idr.h> | ||
38 | #include <linux/sunrpc/svc_xprt.h> | 39 | #include <linux/sunrpc/svc_xprt.h> |
39 | #include <linux/nfsd/nfsfh.h> | 40 | #include <linux/nfsd/nfsfh.h> |
40 | #include "nfsfh.h" | 41 | #include "nfsfh.h" |
@@ -45,24 +46,20 @@ typedef struct { | |||
45 | } clientid_t; | 46 | } clientid_t; |
46 | 47 | ||
47 | typedef struct { | 48 | typedef struct { |
48 | u32 so_boot; | 49 | clientid_t so_clid; |
49 | u32 so_stateownerid; | 50 | u32 so_id; |
50 | u32 so_fileid; | ||
51 | } stateid_opaque_t; | 51 | } stateid_opaque_t; |
52 | 52 | ||
53 | typedef struct { | 53 | typedef struct { |
54 | u32 si_generation; | 54 | u32 si_generation; |
55 | stateid_opaque_t si_opaque; | 55 | stateid_opaque_t si_opaque; |
56 | } stateid_t; | 56 | } stateid_t; |
57 | #define si_boot si_opaque.so_boot | ||
58 | #define si_stateownerid si_opaque.so_stateownerid | ||
59 | #define si_fileid si_opaque.so_fileid | ||
60 | 57 | ||
61 | #define STATEID_FMT "(%08x/%08x/%08x/%08x)" | 58 | #define STATEID_FMT "(%08x/%08x/%08x/%08x)" |
62 | #define STATEID_VAL(s) \ | 59 | #define STATEID_VAL(s) \ |
63 | (s)->si_boot, \ | 60 | (s)->si_opaque.so_clid.cl_boot, \ |
64 | (s)->si_stateownerid, \ | 61 | (s)->si_opaque.so_clid.cl_id, \ |
65 | (s)->si_fileid, \ | 62 | (s)->si_opaque.so_id, \ |
66 | (s)->si_generation | 63 | (s)->si_generation |
67 | 64 | ||
68 | struct nfsd4_callback { | 65 | struct nfsd4_callback { |
@@ -76,17 +73,27 @@ struct nfsd4_callback { | |||
76 | bool cb_done; | 73 | bool cb_done; |
77 | }; | 74 | }; |
78 | 75 | ||
76 | struct nfs4_stid { | ||
77 | #define NFS4_OPEN_STID 1 | ||
78 | #define NFS4_LOCK_STID 2 | ||
79 | #define NFS4_DELEG_STID 4 | ||
80 | /* For an open stateid kept around *only* to process close replays: */ | ||
81 | #define NFS4_CLOSED_STID 8 | ||
82 | unsigned char sc_type; | ||
83 | stateid_t sc_stateid; | ||
84 | struct nfs4_client *sc_client; | ||
85 | }; | ||
86 | |||
79 | struct nfs4_delegation { | 87 | struct nfs4_delegation { |
88 | struct nfs4_stid dl_stid; /* must be first field */ | ||
80 | struct list_head dl_perfile; | 89 | struct list_head dl_perfile; |
81 | struct list_head dl_perclnt; | 90 | struct list_head dl_perclnt; |
82 | struct list_head dl_recall_lru; /* delegation recalled */ | 91 | struct list_head dl_recall_lru; /* delegation recalled */ |
83 | atomic_t dl_count; /* ref count */ | 92 | atomic_t dl_count; /* ref count */ |
84 | struct nfs4_client *dl_client; | ||
85 | struct nfs4_file *dl_file; | 93 | struct nfs4_file *dl_file; |
86 | u32 dl_type; | 94 | u32 dl_type; |
87 | time_t dl_time; | 95 | time_t dl_time; |
88 | /* For recall: */ | 96 | /* For recall: */ |
89 | stateid_t dl_stateid; | ||
90 | struct knfsd_fh dl_fh; | 97 | struct knfsd_fh dl_fh; |
91 | int dl_retries; | 98 | int dl_retries; |
92 | struct nfsd4_callback dl_recall; | 99 | struct nfsd4_callback dl_recall; |
@@ -104,6 +111,11 @@ struct nfs4_cb_conn { | |||
104 | struct svc_xprt *cb_xprt; /* minorversion 1 only */ | 111 | struct svc_xprt *cb_xprt; /* minorversion 1 only */ |
105 | }; | 112 | }; |
106 | 113 | ||
114 | static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) | ||
115 | { | ||
116 | return container_of(s, struct nfs4_delegation, dl_stid); | ||
117 | } | ||
118 | |||
107 | /* Maximum number of slots per session. 160 is useful for long haul TCP */ | 119 | /* Maximum number of slots per session. 160 is useful for long haul TCP */ |
108 | #define NFSD_MAX_SLOTS_PER_SESSION 160 | 120 | #define NFSD_MAX_SLOTS_PER_SESSION 160 |
109 | /* Maximum number of operations per session compound */ | 121 | /* Maximum number of operations per session compound */ |
@@ -220,6 +232,7 @@ struct nfs4_client { | |||
220 | struct list_head cl_idhash; /* hash by cl_clientid.id */ | 232 | struct list_head cl_idhash; /* hash by cl_clientid.id */ |
221 | struct list_head cl_strhash; /* hash by cl_name */ | 233 | struct list_head cl_strhash; /* hash by cl_name */ |
222 | struct list_head cl_openowners; | 234 | struct list_head cl_openowners; |
235 | struct idr cl_stateids; /* stateid lookup */ | ||
223 | struct list_head cl_delegations; | 236 | struct list_head cl_delegations; |
224 | struct list_head cl_lru; /* tail queue */ | 237 | struct list_head cl_lru; /* tail queue */ |
225 | struct xdr_netobj cl_name; /* id generated by client */ | 238 | struct xdr_netobj cl_name; /* id generated by client */ |
@@ -245,6 +258,7 @@ struct nfs4_client { | |||
245 | #define NFSD4_CB_UP 0 | 258 | #define NFSD4_CB_UP 0 |
246 | #define NFSD4_CB_UNKNOWN 1 | 259 | #define NFSD4_CB_UNKNOWN 1 |
247 | #define NFSD4_CB_DOWN 2 | 260 | #define NFSD4_CB_DOWN 2 |
261 | #define NFSD4_CB_FAULT 3 | ||
248 | int cl_cb_state; | 262 | int cl_cb_state; |
249 | struct nfsd4_callback cl_cb_null; | 263 | struct nfsd4_callback cl_cb_null; |
250 | struct nfsd4_session *cl_cb_session; | 264 | struct nfsd4_session *cl_cb_session; |
@@ -293,6 +307,9 @@ static inline void | |||
293 | update_stateid(stateid_t *stateid) | 307 | update_stateid(stateid_t *stateid) |
294 | { | 308 | { |
295 | stateid->si_generation++; | 309 | stateid->si_generation++; |
310 | /* Wraparound recommendation from 3530bis-13 9.1.3.2: */ | ||
311 | if (stateid->si_generation == 0) | ||
312 | stateid->si_generation = 1; | ||
296 | } | 313 | } |
297 | 314 | ||
298 | /* A reasonable value for REPLAY_ISIZE was estimated as follows: | 315 | /* A reasonable value for REPLAY_ISIZE was estimated as follows: |
@@ -312,49 +329,57 @@ struct nfs4_replay { | |||
312 | __be32 rp_status; | 329 | __be32 rp_status; |
313 | unsigned int rp_buflen; | 330 | unsigned int rp_buflen; |
314 | char *rp_buf; | 331 | char *rp_buf; |
315 | unsigned intrp_allocated; | ||
316 | struct knfsd_fh rp_openfh; | 332 | struct knfsd_fh rp_openfh; |
317 | char rp_ibuf[NFSD4_REPLAY_ISIZE]; | 333 | char rp_ibuf[NFSD4_REPLAY_ISIZE]; |
318 | }; | 334 | }; |
319 | 335 | ||
320 | /* | ||
321 | * nfs4_stateowner can either be an open_owner, or a lock_owner | ||
322 | * | ||
323 | * so_idhash: stateid_hashtbl[] for open owner, lockstateid_hashtbl[] | ||
324 | * for lock_owner | ||
325 | * so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[] | ||
326 | * for lock_owner | ||
327 | * so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client | ||
328 | * struct is reaped. | ||
329 | * so_perfilestate: heads the list of nfs4_stateid (either open or lock) | ||
330 | * and is used to ensure no dangling nfs4_stateid references when we | ||
331 | * release a stateowner. | ||
332 | * so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when | ||
333 | * close is called to reap associated byte-range locks | ||
334 | * so_close_lru: (open) stateowner is placed on this list instead of being | ||
335 | * reaped (when so_perfilestate is empty) to hold the last close replay. | ||
336 | * reaped by laundramat thread after lease period. | ||
337 | */ | ||
338 | struct nfs4_stateowner { | 336 | struct nfs4_stateowner { |
339 | struct kref so_ref; | ||
340 | struct list_head so_idhash; /* hash by so_id */ | ||
341 | struct list_head so_strhash; /* hash by op_name */ | 337 | struct list_head so_strhash; /* hash by op_name */ |
342 | struct list_head so_perclient; | ||
343 | struct list_head so_stateids; | 338 | struct list_head so_stateids; |
344 | struct list_head so_perstateid; /* for lockowners only */ | ||
345 | struct list_head so_close_lru; /* tail queue */ | ||
346 | time_t so_time; /* time of placement on so_close_lru */ | ||
347 | int so_is_open_owner; /* 1=openowner,0=lockowner */ | ||
348 | u32 so_id; | ||
349 | struct nfs4_client * so_client; | 339 | struct nfs4_client * so_client; |
350 | /* after increment in ENCODE_SEQID_OP_TAIL, represents the next | 340 | /* after increment in ENCODE_SEQID_OP_TAIL, represents the next |
351 | * sequence id expected from the client: */ | 341 | * sequence id expected from the client: */ |
352 | u32 so_seqid; | 342 | u32 so_seqid; |
353 | struct xdr_netobj so_owner; /* open owner name */ | 343 | struct xdr_netobj so_owner; /* open owner name */ |
354 | int so_confirmed; /* successful OPEN_CONFIRM? */ | ||
355 | struct nfs4_replay so_replay; | 344 | struct nfs4_replay so_replay; |
345 | bool so_is_open_owner; | ||
356 | }; | 346 | }; |
357 | 347 | ||
348 | struct nfs4_openowner { | ||
349 | struct nfs4_stateowner oo_owner; /* must be first field */ | ||
350 | struct list_head oo_perclient; | ||
351 | /* | ||
352 | * We keep around openowners a little while after last close, | ||
353 | * which saves clients from having to confirm, and allows us to | ||
354 | * handle close replays if they come soon enough. The close_lru | ||
355 | * is a list of such openowners, to be reaped by the laundromat | ||
356 | * thread eventually if they remain unused: | ||
357 | */ | ||
358 | struct list_head oo_close_lru; | ||
359 | struct nfs4_ol_stateid *oo_last_closed_stid; | ||
360 | time_t oo_time; /* time of placement on so_close_lru */ | ||
361 | #define NFS4_OO_CONFIRMED 1 | ||
362 | #define NFS4_OO_PURGE_CLOSE 2 | ||
363 | #define NFS4_OO_NEW 4 | ||
364 | unsigned char oo_flags; | ||
365 | }; | ||
366 | |||
367 | struct nfs4_lockowner { | ||
368 | struct nfs4_stateowner lo_owner; /* must be first element */ | ||
369 | struct list_head lo_perstateid; /* for lockowners only */ | ||
370 | struct list_head lo_list; /* for temporary uses */ | ||
371 | }; | ||
372 | |||
373 | static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) | ||
374 | { | ||
375 | return container_of(so, struct nfs4_openowner, oo_owner); | ||
376 | } | ||
377 | |||
378 | static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) | ||
379 | { | ||
380 | return container_of(so, struct nfs4_lockowner, lo_owner); | ||
381 | } | ||
382 | |||
358 | /* | 383 | /* |
359 | * nfs4_file: a file opened by some number of (open) nfs4_stateowners. | 384 | * nfs4_file: a file opened by some number of (open) nfs4_stateowners. |
360 | * o fi_perfile list is used to search for conflicting | 385 | * o fi_perfile list is used to search for conflicting |
@@ -368,17 +393,17 @@ struct nfs4_file { | |||
368 | /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ | 393 | /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ |
369 | struct file * fi_fds[3]; | 394 | struct file * fi_fds[3]; |
370 | /* | 395 | /* |
371 | * Each open or lock stateid contributes 1 to either | 396 | * Each open or lock stateid contributes 0-4 to the counts |
372 | * fi_access[O_RDONLY], fi_access[O_WRONLY], or both, depending | 397 | * below depending on which bits are set in st_access_bitmap: |
373 | * on open or lock mode: | 398 | * 1 to fi_access[O_RDONLY] if NFS4_SHARE_ACCES_READ is set |
399 | * + 1 to fi_access[O_WRONLY] if NFS4_SHARE_ACCESS_WRITE is set | ||
400 | * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set. | ||
374 | */ | 401 | */ |
375 | atomic_t fi_access[2]; | 402 | atomic_t fi_access[2]; |
376 | struct file *fi_deleg_file; | 403 | struct file *fi_deleg_file; |
377 | struct file_lock *fi_lease; | 404 | struct file_lock *fi_lease; |
378 | atomic_t fi_delegees; | 405 | atomic_t fi_delegees; |
379 | struct inode *fi_inode; | 406 | struct inode *fi_inode; |
380 | u32 fi_id; /* used with stateowner->so_id | ||
381 | * for stateid_hashtbl hash */ | ||
382 | bool fi_had_conflict; | 407 | bool fi_had_conflict; |
383 | }; | 408 | }; |
384 | 409 | ||
@@ -408,50 +433,27 @@ static inline struct file *find_any_file(struct nfs4_file *f) | |||
408 | return f->fi_fds[O_RDONLY]; | 433 | return f->fi_fds[O_RDONLY]; |
409 | } | 434 | } |
410 | 435 | ||
411 | /* | 436 | /* "ol" stands for "Open or Lock". Better suggestions welcome. */ |
412 | * nfs4_stateid can either be an open stateid or (eventually) a lock stateid | 437 | struct nfs4_ol_stateid { |
413 | * | 438 | struct nfs4_stid st_stid; /* must be first field */ |
414 | * (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file | ||
415 | * | ||
416 | * st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry | ||
417 | * st_perfile: file_hashtbl[] entry. | ||
418 | * st_perfile_state: nfs4_stateowner->so_perfilestate | ||
419 | * st_perlockowner: (open stateid) list of lock nfs4_stateowners | ||
420 | * st_access_bmap: used only for open stateid | ||
421 | * st_deny_bmap: used only for open stateid | ||
422 | * st_openstp: open stateid lock stateid was derived from | ||
423 | * | ||
424 | * XXX: open stateids and lock stateids have diverged sufficiently that | ||
425 | * we should consider defining separate structs for the two cases. | ||
426 | */ | ||
427 | |||
428 | struct nfs4_stateid { | ||
429 | struct list_head st_hash; | ||
430 | struct list_head st_perfile; | 439 | struct list_head st_perfile; |
431 | struct list_head st_perstateowner; | 440 | struct list_head st_perstateowner; |
432 | struct list_head st_lockowners; | 441 | struct list_head st_lockowners; |
433 | struct nfs4_stateowner * st_stateowner; | 442 | struct nfs4_stateowner * st_stateowner; |
434 | struct nfs4_file * st_file; | 443 | struct nfs4_file * st_file; |
435 | stateid_t st_stateid; | ||
436 | unsigned long st_access_bmap; | 444 | unsigned long st_access_bmap; |
437 | unsigned long st_deny_bmap; | 445 | unsigned long st_deny_bmap; |
438 | struct nfs4_stateid * st_openstp; | 446 | struct nfs4_ol_stateid * st_openstp; |
439 | }; | 447 | }; |
440 | 448 | ||
449 | static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) | ||
450 | { | ||
451 | return container_of(s, struct nfs4_ol_stateid, st_stid); | ||
452 | } | ||
453 | |||
441 | /* flags for preprocess_seqid_op() */ | 454 | /* flags for preprocess_seqid_op() */ |
442 | #define HAS_SESSION 0x00000001 | ||
443 | #define CONFIRM 0x00000002 | ||
444 | #define OPEN_STATE 0x00000004 | ||
445 | #define LOCK_STATE 0x00000008 | ||
446 | #define RD_STATE 0x00000010 | 455 | #define RD_STATE 0x00000010 |
447 | #define WR_STATE 0x00000020 | 456 | #define WR_STATE 0x00000020 |
448 | #define CLOSE_STATE 0x00000040 | ||
449 | |||
450 | #define seqid_mutating_err(err) \ | ||
451 | (((err) != nfserr_stale_clientid) && \ | ||
452 | ((err) != nfserr_bad_seqid) && \ | ||
453 | ((err) != nfserr_stale_stateid) && \ | ||
454 | ((err) != nfserr_bad_stateid)) | ||
455 | 457 | ||
456 | struct nfsd4_compound_state; | 458 | struct nfsd4_compound_state; |
457 | 459 | ||
@@ -461,7 +463,8 @@ extern void nfs4_lock_state(void); | |||
461 | extern void nfs4_unlock_state(void); | 463 | extern void nfs4_unlock_state(void); |
462 | extern int nfs4_in_grace(void); | 464 | extern int nfs4_in_grace(void); |
463 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid); | 465 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid); |
464 | extern void nfs4_free_stateowner(struct kref *kref); | 466 | extern void nfs4_free_openowner(struct nfs4_openowner *); |
467 | extern void nfs4_free_lockowner(struct nfs4_lockowner *); | ||
465 | extern int set_callback_cred(void); | 468 | extern int set_callback_cred(void); |
466 | extern void nfsd4_probe_callback(struct nfs4_client *clp); | 469 | extern void nfsd4_probe_callback(struct nfs4_client *clp); |
467 | extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); | 470 | extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); |
@@ -473,7 +476,7 @@ extern void nfsd4_destroy_callback_queue(void); | |||
473 | extern void nfsd4_shutdown_callback(struct nfs4_client *); | 476 | extern void nfsd4_shutdown_callback(struct nfs4_client *); |
474 | extern void nfs4_put_delegation(struct nfs4_delegation *dp); | 477 | extern void nfs4_put_delegation(struct nfs4_delegation *dp); |
475 | extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); | 478 | extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); |
476 | extern void nfsd4_init_recdir(char *recdir_name); | 479 | extern void nfsd4_init_recdir(void); |
477 | extern int nfsd4_recdir_load(void); | 480 | extern int nfsd4_recdir_load(void); |
478 | extern void nfsd4_shutdown_recdir(void); | 481 | extern void nfsd4_shutdown_recdir(void); |
479 | extern int nfs4_client_to_reclaim(const char *name); | 482 | extern int nfs4_client_to_reclaim(const char *name); |
@@ -482,18 +485,7 @@ extern void nfsd4_recdir_purge_old(void); | |||
482 | extern int nfsd4_create_clid_dir(struct nfs4_client *clp); | 485 | extern int nfsd4_create_clid_dir(struct nfs4_client *clp); |
483 | extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); | 486 | extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); |
484 | extern void release_session_client(struct nfsd4_session *); | 487 | extern void release_session_client(struct nfsd4_session *); |
485 | extern __be32 nfs4_validate_stateid(stateid_t *, int); | 488 | extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); |
486 | 489 | extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); | |
487 | static inline void | ||
488 | nfs4_put_stateowner(struct nfs4_stateowner *so) | ||
489 | { | ||
490 | kref_put(&so->so_ref, nfs4_free_stateowner); | ||
491 | } | ||
492 | |||
493 | static inline void | ||
494 | nfs4_get_stateowner(struct nfs4_stateowner *so) | ||
495 | { | ||
496 | kref_get(&so->so_ref); | ||
497 | } | ||
498 | 490 | ||
499 | #endif /* NFSD4_STATE_H */ | 491 | #endif /* NFSD4_STATE_H */ |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fd0acca5370a..7a2e442623c8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -168,6 +168,8 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) | |||
168 | { | 168 | { |
169 | if (d_mountpoint(dentry)) | 169 | if (d_mountpoint(dentry)) |
170 | return 1; | 170 | return 1; |
171 | if (nfsd4_is_junction(dentry)) | ||
172 | return 1; | ||
171 | if (!(exp->ex_flags & NFSEXP_V4ROOT)) | 173 | if (!(exp->ex_flags & NFSEXP_V4ROOT)) |
172 | return 0; | 174 | return 0; |
173 | return dentry->d_inode != NULL; | 175 | return dentry->d_inode != NULL; |
@@ -502,7 +504,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
502 | unsigned int flags = 0; | 504 | unsigned int flags = 0; |
503 | 505 | ||
504 | /* Get inode */ | 506 | /* Get inode */ |
505 | error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); | 507 | error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); |
506 | if (error) | 508 | if (error) |
507 | return error; | 509 | return error; |
508 | 510 | ||
@@ -592,6 +594,22 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac | |||
592 | return error; | 594 | return error; |
593 | } | 595 | } |
594 | 596 | ||
597 | #define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction." | ||
598 | #define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type" | ||
599 | int nfsd4_is_junction(struct dentry *dentry) | ||
600 | { | ||
601 | struct inode *inode = dentry->d_inode; | ||
602 | |||
603 | if (inode == NULL) | ||
604 | return 0; | ||
605 | if (inode->i_mode & S_IXUGO) | ||
606 | return 0; | ||
607 | if (!(inode->i_mode & S_ISVTX)) | ||
608 | return 0; | ||
609 | if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0) | ||
610 | return 0; | ||
611 | return 1; | ||
612 | } | ||
595 | #endif /* defined(CONFIG_NFSD_V4) */ | 613 | #endif /* defined(CONFIG_NFSD_V4) */ |
596 | 614 | ||
597 | #ifdef CONFIG_NFSD_V3 | 615 | #ifdef CONFIG_NFSD_V3 |
@@ -1352,7 +1370,7 @@ __be32 | |||
1352 | do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | 1370 | do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, |
1353 | char *fname, int flen, struct iattr *iap, | 1371 | char *fname, int flen, struct iattr *iap, |
1354 | struct svc_fh *resfhp, int createmode, u32 *verifier, | 1372 | struct svc_fh *resfhp, int createmode, u32 *verifier, |
1355 | int *truncp, int *created) | 1373 | bool *truncp, bool *created) |
1356 | { | 1374 | { |
1357 | struct dentry *dentry, *dchild = NULL; | 1375 | struct dentry *dentry, *dchild = NULL; |
1358 | struct inode *dirp; | 1376 | struct inode *dirp; |
@@ -1632,10 +1650,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1632 | err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); | 1650 | err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); |
1633 | if (err) | 1651 | if (err) |
1634 | goto out; | 1652 | goto out; |
1635 | err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP); | 1653 | err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP); |
1636 | if (err) | 1654 | if (err) |
1637 | goto out; | 1655 | goto out; |
1638 | 1656 | err = nfserr_isdir; | |
1657 | if (S_ISDIR(tfhp->fh_dentry->d_inode->i_mode)) | ||
1658 | goto out; | ||
1639 | err = nfserr_perm; | 1659 | err = nfserr_perm; |
1640 | if (!len) | 1660 | if (!len) |
1641 | goto out; | 1661 | goto out; |
@@ -2114,7 +2134,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
2114 | 2134 | ||
2115 | /* Allow read access to binaries even when mode 111 */ | 2135 | /* Allow read access to binaries even when mode 111 */ |
2116 | if (err == -EACCES && S_ISREG(inode->i_mode) && | 2136 | if (err == -EACCES && S_ISREG(inode->i_mode) && |
2117 | acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) | 2137 | (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) || |
2138 | acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC))) | ||
2118 | err = inode_permission(inode, MAY_EXEC); | 2139 | err = inode_permission(inode, MAY_EXEC); |
2119 | 2140 | ||
2120 | return err? nfserrno(err) : 0; | 2141 | return err? nfserrno(err) : 0; |
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index e0bbac04d1dd..3f54ad03bb2b 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h | |||
@@ -10,21 +10,22 @@ | |||
10 | /* | 10 | /* |
11 | * Flags for nfsd_permission | 11 | * Flags for nfsd_permission |
12 | */ | 12 | */ |
13 | #define NFSD_MAY_NOP 0 | 13 | #define NFSD_MAY_NOP 0 |
14 | #define NFSD_MAY_EXEC 1 /* == MAY_EXEC */ | 14 | #define NFSD_MAY_EXEC 0x001 /* == MAY_EXEC */ |
15 | #define NFSD_MAY_WRITE 2 /* == MAY_WRITE */ | 15 | #define NFSD_MAY_WRITE 0x002 /* == MAY_WRITE */ |
16 | #define NFSD_MAY_READ 4 /* == MAY_READ */ | 16 | #define NFSD_MAY_READ 0x004 /* == MAY_READ */ |
17 | #define NFSD_MAY_SATTR 8 | 17 | #define NFSD_MAY_SATTR 0x008 |
18 | #define NFSD_MAY_TRUNC 16 | 18 | #define NFSD_MAY_TRUNC 0x010 |
19 | #define NFSD_MAY_LOCK 32 | 19 | #define NFSD_MAY_LOCK 0x020 |
20 | #define NFSD_MAY_MASK 63 | 20 | #define NFSD_MAY_MASK 0x03f |
21 | 21 | ||
22 | /* extra hints to permission and open routines: */ | 22 | /* extra hints to permission and open routines: */ |
23 | #define NFSD_MAY_OWNER_OVERRIDE 64 | 23 | #define NFSD_MAY_OWNER_OVERRIDE 0x040 |
24 | #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ | 24 | #define NFSD_MAY_LOCAL_ACCESS 0x080 /* for device special files */ |
25 | #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 | 25 | #define NFSD_MAY_BYPASS_GSS_ON_ROOT 0x100 |
26 | #define NFSD_MAY_NOT_BREAK_LEASE 512 | 26 | #define NFSD_MAY_NOT_BREAK_LEASE 0x200 |
27 | #define NFSD_MAY_BYPASS_GSS 1024 | 27 | #define NFSD_MAY_BYPASS_GSS 0x400 |
28 | #define NFSD_MAY_READ_IF_EXEC 0x800 | ||
28 | 29 | ||
29 | #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) | 30 | #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) |
30 | #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) | 31 | #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) |
@@ -61,7 +62,7 @@ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); | |||
61 | __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, | 62 | __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, |
62 | char *name, int len, struct iattr *attrs, | 63 | char *name, int len, struct iattr *attrs, |
63 | struct svc_fh *res, int createmode, | 64 | struct svc_fh *res, int createmode, |
64 | u32 *verifier, int *truncp, int *created); | 65 | u32 *verifier, bool *truncp, bool *created); |
65 | __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, | 66 | __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, |
66 | loff_t, unsigned long); | 67 | loff_t, unsigned long); |
67 | #endif /* CONFIG_NFSD_V3 */ | 68 | #endif /* CONFIG_NFSD_V3 */ |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index d2a8d04428c7..2364747ee97d 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -81,7 +81,6 @@ struct nfsd4_access { | |||
81 | struct nfsd4_close { | 81 | struct nfsd4_close { |
82 | u32 cl_seqid; /* request */ | 82 | u32 cl_seqid; /* request */ |
83 | stateid_t cl_stateid; /* request+response */ | 83 | stateid_t cl_stateid; /* request+response */ |
84 | struct nfs4_stateowner * cl_stateowner; /* response */ | ||
85 | }; | 84 | }; |
86 | 85 | ||
87 | struct nfsd4_commit { | 86 | struct nfsd4_commit { |
@@ -131,7 +130,7 @@ struct nfsd4_link { | |||
131 | 130 | ||
132 | struct nfsd4_lock_denied { | 131 | struct nfsd4_lock_denied { |
133 | clientid_t ld_clientid; | 132 | clientid_t ld_clientid; |
134 | struct nfs4_stateowner *ld_sop; | 133 | struct xdr_netobj ld_owner; |
135 | u64 ld_start; | 134 | u64 ld_start; |
136 | u64 ld_length; | 135 | u64 ld_length; |
137 | u32 ld_type; | 136 | u32 ld_type; |
@@ -165,9 +164,6 @@ struct nfsd4_lock { | |||
165 | } ok; | 164 | } ok; |
166 | struct nfsd4_lock_denied denied; | 165 | struct nfsd4_lock_denied denied; |
167 | } u; | 166 | } u; |
168 | /* The lk_replay_owner is the open owner in the open_to_lock_owner | ||
169 | * case and the lock owner otherwise: */ | ||
170 | struct nfs4_stateowner *lk_replay_owner; | ||
171 | }; | 167 | }; |
172 | #define lk_new_open_seqid v.new.open_seqid | 168 | #define lk_new_open_seqid v.new.open_seqid |
173 | #define lk_new_open_stateid v.new.open_stateid | 169 | #define lk_new_open_stateid v.new.open_stateid |
@@ -188,7 +184,6 @@ struct nfsd4_lockt { | |||
188 | struct xdr_netobj lt_owner; | 184 | struct xdr_netobj lt_owner; |
189 | u64 lt_offset; | 185 | u64 lt_offset; |
190 | u64 lt_length; | 186 | u64 lt_length; |
191 | struct nfs4_stateowner * lt_stateowner; | ||
192 | struct nfsd4_lock_denied lt_denied; | 187 | struct nfsd4_lock_denied lt_denied; |
193 | }; | 188 | }; |
194 | 189 | ||
@@ -199,7 +194,6 @@ struct nfsd4_locku { | |||
199 | stateid_t lu_stateid; | 194 | stateid_t lu_stateid; |
200 | u64 lu_offset; | 195 | u64 lu_offset; |
201 | u64 lu_length; | 196 | u64 lu_length; |
202 | struct nfs4_stateowner *lu_stateowner; | ||
203 | }; | 197 | }; |
204 | 198 | ||
205 | 199 | ||
@@ -232,8 +226,11 @@ struct nfsd4_open { | |||
232 | u32 op_recall; /* recall */ | 226 | u32 op_recall; /* recall */ |
233 | struct nfsd4_change_info op_cinfo; /* response */ | 227 | struct nfsd4_change_info op_cinfo; /* response */ |
234 | u32 op_rflags; /* response */ | 228 | u32 op_rflags; /* response */ |
235 | int op_truncate; /* used during processing */ | 229 | bool op_truncate; /* used during processing */ |
236 | struct nfs4_stateowner *op_stateowner; /* used during processing */ | 230 | bool op_created; /* used during processing */ |
231 | struct nfs4_openowner *op_openowner; /* used during processing */ | ||
232 | struct nfs4_file *op_file; /* used during processing */ | ||
233 | struct nfs4_ol_stateid *op_stp; /* used during processing */ | ||
237 | struct nfs4_acl *op_acl; | 234 | struct nfs4_acl *op_acl; |
238 | }; | 235 | }; |
239 | #define op_iattr iattr | 236 | #define op_iattr iattr |
@@ -243,7 +240,6 @@ struct nfsd4_open_confirm { | |||
243 | stateid_t oc_req_stateid /* request */; | 240 | stateid_t oc_req_stateid /* request */; |
244 | u32 oc_seqid /* request */; | 241 | u32 oc_seqid /* request */; |
245 | stateid_t oc_resp_stateid /* response */; | 242 | stateid_t oc_resp_stateid /* response */; |
246 | struct nfs4_stateowner * oc_stateowner; /* response */ | ||
247 | }; | 243 | }; |
248 | 244 | ||
249 | struct nfsd4_open_downgrade { | 245 | struct nfsd4_open_downgrade { |
@@ -251,7 +247,6 @@ struct nfsd4_open_downgrade { | |||
251 | u32 od_seqid; | 247 | u32 od_seqid; |
252 | u32 od_share_access; | 248 | u32 od_share_access; |
253 | u32 od_share_deny; | 249 | u32 od_share_deny; |
254 | struct nfs4_stateowner *od_stateowner; | ||
255 | }; | 250 | }; |
256 | 251 | ||
257 | 252 | ||
@@ -325,8 +320,7 @@ struct nfsd4_setattr { | |||
325 | 320 | ||
326 | struct nfsd4_setclientid { | 321 | struct nfsd4_setclientid { |
327 | nfs4_verifier se_verf; /* request */ | 322 | nfs4_verifier se_verf; /* request */ |
328 | u32 se_namelen; /* request */ | 323 | struct xdr_netobj se_name; |
329 | char * se_name; /* request */ | ||
330 | u32 se_callback_prog; /* request */ | 324 | u32 se_callback_prog; /* request */ |
331 | u32 se_callback_netid_len; /* request */ | 325 | u32 se_callback_netid_len; /* request */ |
332 | char * se_callback_netid_val; /* request */ | 326 | char * se_callback_netid_val; /* request */ |
@@ -351,7 +345,6 @@ struct nfsd4_saved_compoundargs { | |||
351 | 345 | ||
352 | struct nfsd4_test_stateid { | 346 | struct nfsd4_test_stateid { |
353 | __be32 ts_num_ids; | 347 | __be32 ts_num_ids; |
354 | __be32 ts_has_session; | ||
355 | struct nfsd4_compoundargs *ts_saved_args; | 348 | struct nfsd4_compoundargs *ts_saved_args; |
356 | struct nfsd4_saved_compoundargs ts_savedp; | 349 | struct nfsd4_saved_compoundargs ts_savedp; |
357 | }; | 350 | }; |
@@ -405,6 +398,10 @@ struct nfsd4_destroy_session { | |||
405 | struct nfs4_sessionid sessionid; | 398 | struct nfs4_sessionid sessionid; |
406 | }; | 399 | }; |
407 | 400 | ||
401 | struct nfsd4_destroy_clientid { | ||
402 | clientid_t clientid; | ||
403 | }; | ||
404 | |||
408 | struct nfsd4_reclaim_complete { | 405 | struct nfsd4_reclaim_complete { |
409 | u32 rca_one_fs; | 406 | u32 rca_one_fs; |
410 | }; | 407 | }; |
@@ -532,6 +529,7 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *, | |||
532 | struct nfsd4_compoundargs *); | 529 | struct nfsd4_compoundargs *); |
533 | int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, | 530 | int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, |
534 | struct nfsd4_compoundres *); | 531 | struct nfsd4_compoundres *); |
532 | int nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); | ||
535 | void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); | 533 | void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); |
536 | void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); | 534 | void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); |
537 | __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | 535 | __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, |
@@ -558,11 +556,13 @@ extern __be32 nfsd4_sequence(struct svc_rqst *, | |||
558 | extern __be32 nfsd4_destroy_session(struct svc_rqst *, | 556 | extern __be32 nfsd4_destroy_session(struct svc_rqst *, |
559 | struct nfsd4_compound_state *, | 557 | struct nfsd4_compound_state *, |
560 | struct nfsd4_destroy_session *); | 558 | struct nfsd4_destroy_session *); |
559 | extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *); | ||
561 | __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *); | 560 | __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *); |
562 | extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, | 561 | extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, |
563 | struct nfsd4_open *open); | 562 | struct nfsd4_open *open); |
564 | extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, | 563 | extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, |
565 | struct svc_fh *current_fh, struct nfsd4_open *open); | 564 | struct svc_fh *current_fh, struct nfsd4_open *open); |
565 | extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); | ||
566 | extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, | 566 | extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, |
567 | struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); | 567 | struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); |
568 | extern __be32 nfsd4_close(struct svc_rqst *rqstp, | 568 | extern __be32 nfsd4_close(struct svc_rqst *rqstp, |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 81ecf9c0bf0a..194fb22ef79d 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -7185,20 +7185,9 @@ int ocfs2_init_security_and_acl(struct inode *dir, | |||
7185 | { | 7185 | { |
7186 | int ret = 0; | 7186 | int ret = 0; |
7187 | struct buffer_head *dir_bh = NULL; | 7187 | struct buffer_head *dir_bh = NULL; |
7188 | struct ocfs2_security_xattr_info si = { | ||
7189 | .enable = 1, | ||
7190 | }; | ||
7191 | 7188 | ||
7192 | ret = ocfs2_init_security_get(inode, dir, qstr, &si); | 7189 | ret = ocfs2_init_security_get(inode, dir, qstr, NULL); |
7193 | if (!ret) { | 7190 | if (!ret) { |
7194 | ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, | ||
7195 | si.name, si.value, si.value_len, | ||
7196 | XATTR_CREATE); | ||
7197 | if (ret) { | ||
7198 | mlog_errno(ret); | ||
7199 | goto leave; | ||
7200 | } | ||
7201 | } else if (ret != -EOPNOTSUPP) { | ||
7202 | mlog_errno(ret); | 7191 | mlog_errno(ret); |
7203 | goto leave; | 7192 | goto leave; |
7204 | } | 7193 | } |
@@ -7255,6 +7244,22 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name, | |||
7255 | name, value, size, flags); | 7244 | name, value, size, flags); |
7256 | } | 7245 | } |
7257 | 7246 | ||
7247 | int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, | ||
7248 | void *fs_info) | ||
7249 | { | ||
7250 | const struct xattr *xattr; | ||
7251 | int err = 0; | ||
7252 | |||
7253 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { | ||
7254 | err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, | ||
7255 | xattr->name, xattr->value, | ||
7256 | xattr->value_len, XATTR_CREATE); | ||
7257 | if (err) | ||
7258 | break; | ||
7259 | } | ||
7260 | return err; | ||
7261 | } | ||
7262 | |||
7258 | int ocfs2_init_security_get(struct inode *inode, | 7263 | int ocfs2_init_security_get(struct inode *inode, |
7259 | struct inode *dir, | 7264 | struct inode *dir, |
7260 | const struct qstr *qstr, | 7265 | const struct qstr *qstr, |
@@ -7263,8 +7268,13 @@ int ocfs2_init_security_get(struct inode *inode, | |||
7263 | /* check whether ocfs2 support feature xattr */ | 7268 | /* check whether ocfs2 support feature xattr */ |
7264 | if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) | 7269 | if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) |
7265 | return -EOPNOTSUPP; | 7270 | return -EOPNOTSUPP; |
7266 | return security_inode_init_security(inode, dir, qstr, &si->name, | 7271 | if (si) |
7267 | &si->value, &si->value_len); | 7272 | return security_old_inode_init_security(inode, dir, qstr, |
7273 | &si->name, &si->value, | ||
7274 | &si->value_len); | ||
7275 | |||
7276 | return security_inode_init_security(inode, dir, qstr, | ||
7277 | &ocfs2_initxattrs, NULL); | ||
7268 | } | 7278 | } |
7269 | 7279 | ||
7270 | int ocfs2_init_security_set(handle_t *handle, | 7280 | int ocfs2_init_security_set(handle_t *handle, |
@@ -685,6 +685,10 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
685 | if (error) | 685 | if (error) |
686 | goto cleanup_all; | 686 | goto cleanup_all; |
687 | 687 | ||
688 | error = break_lease(inode, f->f_flags); | ||
689 | if (error) | ||
690 | goto cleanup_all; | ||
691 | |||
688 | if (!open && f->f_op) | 692 | if (!open && f->f_op) |
689 | open = f->f_op->open; | 693 | open = f->f_op->open; |
690 | if (open) { | 694 | if (open) { |
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 10027b42b7e2..cea4623f1ed6 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c | |||
@@ -218,6 +218,8 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want) | |||
218 | const struct posix_acl_entry *pa, *pe, *mask_obj; | 218 | const struct posix_acl_entry *pa, *pe, *mask_obj; |
219 | int found = 0; | 219 | int found = 0; |
220 | 220 | ||
221 | want &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK; | ||
222 | |||
221 | FOREACH_ACL_ENTRY(pa, acl, pe) { | 223 | FOREACH_ACL_ENTRY(pa, acl, pe) { |
222 | switch(pa->e_tag) { | 224 | switch(pa->e_tag) { |
223 | case ACL_USER_OBJ: | 225 | case ACL_USER_OBJ: |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 9758b654a1bc..42b274da92c3 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/time.h> | 10 | #include <linux/time.h> |
11 | #include <linux/irqnr.h> | 11 | #include <linux/irqnr.h> |
12 | #include <asm/cputime.h> | 12 | #include <asm/cputime.h> |
13 | #include <linux/tick.h> | ||
13 | 14 | ||
14 | #ifndef arch_irq_stat_cpu | 15 | #ifndef arch_irq_stat_cpu |
15 | #define arch_irq_stat_cpu(cpu) 0 | 16 | #define arch_irq_stat_cpu(cpu) 0 |
@@ -21,6 +22,35 @@ | |||
21 | #define arch_idle_time(cpu) 0 | 22 | #define arch_idle_time(cpu) 0 |
22 | #endif | 23 | #endif |
23 | 24 | ||
25 | static cputime64_t get_idle_time(int cpu) | ||
26 | { | ||
27 | u64 idle_time = get_cpu_idle_time_us(cpu, NULL); | ||
28 | cputime64_t idle; | ||
29 | |||
30 | if (idle_time == -1ULL) { | ||
31 | /* !NO_HZ so we can rely on cpustat.idle */ | ||
32 | idle = kstat_cpu(cpu).cpustat.idle; | ||
33 | idle = cputime64_add(idle, arch_idle_time(cpu)); | ||
34 | } else | ||
35 | idle = usecs_to_cputime(idle_time); | ||
36 | |||
37 | return idle; | ||
38 | } | ||
39 | |||
40 | static cputime64_t get_iowait_time(int cpu) | ||
41 | { | ||
42 | u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL); | ||
43 | cputime64_t iowait; | ||
44 | |||
45 | if (iowait_time == -1ULL) | ||
46 | /* !NO_HZ so we can rely on cpustat.iowait */ | ||
47 | iowait = kstat_cpu(cpu).cpustat.iowait; | ||
48 | else | ||
49 | iowait = usecs_to_cputime(iowait_time); | ||
50 | |||
51 | return iowait; | ||
52 | } | ||
53 | |||
24 | static int show_stat(struct seq_file *p, void *v) | 54 | static int show_stat(struct seq_file *p, void *v) |
25 | { | 55 | { |
26 | int i, j; | 56 | int i, j; |
@@ -42,9 +72,8 @@ static int show_stat(struct seq_file *p, void *v) | |||
42 | user = cputime64_add(user, kstat_cpu(i).cpustat.user); | 72 | user = cputime64_add(user, kstat_cpu(i).cpustat.user); |
43 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); | 73 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); |
44 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); | 74 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); |
45 | idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); | 75 | idle = cputime64_add(idle, get_idle_time(i)); |
46 | idle = cputime64_add(idle, arch_idle_time(i)); | 76 | iowait = cputime64_add(iowait, get_iowait_time(i)); |
47 | iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); | ||
48 | irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); | 77 | irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); |
49 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); | 78 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); |
50 | steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); | 79 | steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); |
@@ -76,14 +105,12 @@ static int show_stat(struct seq_file *p, void *v) | |||
76 | (unsigned long long)cputime64_to_clock_t(guest), | 105 | (unsigned long long)cputime64_to_clock_t(guest), |
77 | (unsigned long long)cputime64_to_clock_t(guest_nice)); | 106 | (unsigned long long)cputime64_to_clock_t(guest_nice)); |
78 | for_each_online_cpu(i) { | 107 | for_each_online_cpu(i) { |
79 | |||
80 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ | 108 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ |
81 | user = kstat_cpu(i).cpustat.user; | 109 | user = kstat_cpu(i).cpustat.user; |
82 | nice = kstat_cpu(i).cpustat.nice; | 110 | nice = kstat_cpu(i).cpustat.nice; |
83 | system = kstat_cpu(i).cpustat.system; | 111 | system = kstat_cpu(i).cpustat.system; |
84 | idle = kstat_cpu(i).cpustat.idle; | 112 | idle = get_idle_time(i); |
85 | idle = cputime64_add(idle, arch_idle_time(i)); | 113 | iowait = get_iowait_time(i); |
86 | iowait = kstat_cpu(i).cpustat.iowait; | ||
87 | irq = kstat_cpu(i).cpustat.irq; | 114 | irq = kstat_cpu(i).cpustat.irq; |
88 | softirq = kstat_cpu(i).cpustat.softirq; | 115 | softirq = kstat_cpu(i).cpustat.softirq; |
89 | steal = kstat_cpu(i).cpustat.steal; | 116 | steal = kstat_cpu(i).cpustat.steal; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 25b6a887adb9..5afaa58a8630 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -877,30 +877,54 @@ struct numa_maps_private { | |||
877 | struct numa_maps md; | 877 | struct numa_maps md; |
878 | }; | 878 | }; |
879 | 879 | ||
880 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) | 880 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, |
881 | unsigned long nr_pages) | ||
881 | { | 882 | { |
882 | int count = page_mapcount(page); | 883 | int count = page_mapcount(page); |
883 | 884 | ||
884 | md->pages++; | 885 | md->pages += nr_pages; |
885 | if (pte_dirty || PageDirty(page)) | 886 | if (pte_dirty || PageDirty(page)) |
886 | md->dirty++; | 887 | md->dirty += nr_pages; |
887 | 888 | ||
888 | if (PageSwapCache(page)) | 889 | if (PageSwapCache(page)) |
889 | md->swapcache++; | 890 | md->swapcache += nr_pages; |
890 | 891 | ||
891 | if (PageActive(page) || PageUnevictable(page)) | 892 | if (PageActive(page) || PageUnevictable(page)) |
892 | md->active++; | 893 | md->active += nr_pages; |
893 | 894 | ||
894 | if (PageWriteback(page)) | 895 | if (PageWriteback(page)) |
895 | md->writeback++; | 896 | md->writeback += nr_pages; |
896 | 897 | ||
897 | if (PageAnon(page)) | 898 | if (PageAnon(page)) |
898 | md->anon++; | 899 | md->anon += nr_pages; |
899 | 900 | ||
900 | if (count > md->mapcount_max) | 901 | if (count > md->mapcount_max) |
901 | md->mapcount_max = count; | 902 | md->mapcount_max = count; |
902 | 903 | ||
903 | md->node[page_to_nid(page)]++; | 904 | md->node[page_to_nid(page)] += nr_pages; |
905 | } | ||
906 | |||
907 | static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, | ||
908 | unsigned long addr) | ||
909 | { | ||
910 | struct page *page; | ||
911 | int nid; | ||
912 | |||
913 | if (!pte_present(pte)) | ||
914 | return NULL; | ||
915 | |||
916 | page = vm_normal_page(vma, addr, pte); | ||
917 | if (!page) | ||
918 | return NULL; | ||
919 | |||
920 | if (PageReserved(page)) | ||
921 | return NULL; | ||
922 | |||
923 | nid = page_to_nid(page); | ||
924 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) | ||
925 | return NULL; | ||
926 | |||
927 | return page; | ||
904 | } | 928 | } |
905 | 929 | ||
906 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | 930 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, |
@@ -912,26 +936,32 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
912 | pte_t *pte; | 936 | pte_t *pte; |
913 | 937 | ||
914 | md = walk->private; | 938 | md = walk->private; |
915 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | 939 | spin_lock(&walk->mm->page_table_lock); |
916 | do { | 940 | if (pmd_trans_huge(*pmd)) { |
917 | struct page *page; | 941 | if (pmd_trans_splitting(*pmd)) { |
918 | int nid; | 942 | spin_unlock(&walk->mm->page_table_lock); |
943 | wait_split_huge_page(md->vma->anon_vma, pmd); | ||
944 | } else { | ||
945 | pte_t huge_pte = *(pte_t *)pmd; | ||
946 | struct page *page; | ||
919 | 947 | ||
920 | if (!pte_present(*pte)) | 948 | page = can_gather_numa_stats(huge_pte, md->vma, addr); |
921 | continue; | 949 | if (page) |
950 | gather_stats(page, md, pte_dirty(huge_pte), | ||
951 | HPAGE_PMD_SIZE/PAGE_SIZE); | ||
952 | spin_unlock(&walk->mm->page_table_lock); | ||
953 | return 0; | ||
954 | } | ||
955 | } else { | ||
956 | spin_unlock(&walk->mm->page_table_lock); | ||
957 | } | ||
922 | 958 | ||
923 | page = vm_normal_page(md->vma, addr, *pte); | 959 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
960 | do { | ||
961 | struct page *page = can_gather_numa_stats(*pte, md->vma, addr); | ||
924 | if (!page) | 962 | if (!page) |
925 | continue; | 963 | continue; |
926 | 964 | gather_stats(page, md, pte_dirty(*pte), 1); | |
927 | if (PageReserved(page)) | ||
928 | continue; | ||
929 | |||
930 | nid = page_to_nid(page); | ||
931 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) | ||
932 | continue; | ||
933 | |||
934 | gather_stats(page, md, pte_dirty(*pte)); | ||
935 | 965 | ||
936 | } while (pte++, addr += PAGE_SIZE, addr != end); | 966 | } while (pte++, addr += PAGE_SIZE, addr != end); |
937 | pte_unmap_unlock(orig_pte, ptl); | 967 | pte_unmap_unlock(orig_pte, ptl); |
@@ -952,7 +982,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | |||
952 | return 0; | 982 | return 0; |
953 | 983 | ||
954 | md = walk->private; | 984 | md = walk->private; |
955 | gather_stats(page, md, pte_dirty(*pte)); | 985 | gather_stats(page, md, pte_dirty(*pte), 1); |
956 | return 0; | 986 | return 0; |
957 | } | 987 | } |
958 | 988 | ||
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index b34bdb25490c..10b6be3ca280 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -355,7 +355,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, | |||
355 | * resolution (think about autofs) and thus deadlocks could arise. | 355 | * resolution (think about autofs) and thus deadlocks could arise. |
356 | */ | 356 | */ |
357 | if (cmds == Q_QUOTAON) { | 357 | if (cmds == Q_QUOTAON) { |
358 | ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW, &path); | 358 | ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); |
359 | if (ret) | 359 | if (ret) |
360 | pathp = ERR_PTR(ret); | 360 | pathp = ERR_PTR(ret); |
361 | else | 361 | else |
diff --git a/fs/read_write.c b/fs/read_write.c index 179f1c33ea57..dfd125798791 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -35,23 +35,45 @@ static inline int unsigned_offsets(struct file *file) | |||
35 | return file->f_mode & FMODE_UNSIGNED_OFFSET; | 35 | return file->f_mode & FMODE_UNSIGNED_OFFSET; |
36 | } | 36 | } |
37 | 37 | ||
38 | static loff_t lseek_execute(struct file *file, struct inode *inode, | ||
39 | loff_t offset, loff_t maxsize) | ||
40 | { | ||
41 | if (offset < 0 && !unsigned_offsets(file)) | ||
42 | return -EINVAL; | ||
43 | if (offset > maxsize) | ||
44 | return -EINVAL; | ||
45 | |||
46 | if (offset != file->f_pos) { | ||
47 | file->f_pos = offset; | ||
48 | file->f_version = 0; | ||
49 | } | ||
50 | return offset; | ||
51 | } | ||
52 | |||
38 | /** | 53 | /** |
39 | * generic_file_llseek_unlocked - lockless generic llseek implementation | 54 | * generic_file_llseek_size - generic llseek implementation for regular files |
40 | * @file: file structure to seek on | 55 | * @file: file structure to seek on |
41 | * @offset: file offset to seek to | 56 | * @offset: file offset to seek to |
42 | * @origin: type of seek | 57 | * @origin: type of seek |
58 | * @size: max size of file system | ||
59 | * | ||
60 | * This is a variant of generic_file_llseek that allows passing in a custom | ||
61 | * file size. | ||
43 | * | 62 | * |
44 | * Updates the file offset to the value specified by @offset and @origin. | 63 | * Synchronization: |
45 | * Locking must be provided by the caller. | 64 | * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) |
65 | * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. | ||
66 | * read/writes behave like SEEK_SET against seeks. | ||
46 | */ | 67 | */ |
47 | loff_t | 68 | loff_t |
48 | generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) | 69 | generic_file_llseek_size(struct file *file, loff_t offset, int origin, |
70 | loff_t maxsize) | ||
49 | { | 71 | { |
50 | struct inode *inode = file->f_mapping->host; | 72 | struct inode *inode = file->f_mapping->host; |
51 | 73 | ||
52 | switch (origin) { | 74 | switch (origin) { |
53 | case SEEK_END: | 75 | case SEEK_END: |
54 | offset += inode->i_size; | 76 | offset += i_size_read(inode); |
55 | break; | 77 | break; |
56 | case SEEK_CUR: | 78 | case SEEK_CUR: |
57 | /* | 79 | /* |
@@ -62,14 +84,22 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) | |||
62 | */ | 84 | */ |
63 | if (offset == 0) | 85 | if (offset == 0) |
64 | return file->f_pos; | 86 | return file->f_pos; |
65 | offset += file->f_pos; | 87 | /* |
66 | break; | 88 | * f_lock protects against read/modify/write race with other |
89 | * SEEK_CURs. Note that parallel writes and reads behave | ||
90 | * like SEEK_SET. | ||
91 | */ | ||
92 | spin_lock(&file->f_lock); | ||
93 | offset = lseek_execute(file, inode, file->f_pos + offset, | ||
94 | maxsize); | ||
95 | spin_unlock(&file->f_lock); | ||
96 | return offset; | ||
67 | case SEEK_DATA: | 97 | case SEEK_DATA: |
68 | /* | 98 | /* |
69 | * In the generic case the entire file is data, so as long as | 99 | * In the generic case the entire file is data, so as long as |
70 | * offset isn't at the end of the file then the offset is data. | 100 | * offset isn't at the end of the file then the offset is data. |
71 | */ | 101 | */ |
72 | if (offset >= inode->i_size) | 102 | if (offset >= i_size_read(inode)) |
73 | return -ENXIO; | 103 | return -ENXIO; |
74 | break; | 104 | break; |
75 | case SEEK_HOLE: | 105 | case SEEK_HOLE: |
@@ -77,26 +107,15 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) | |||
77 | * There is a virtual hole at the end of the file, so as long as | 107 | * There is a virtual hole at the end of the file, so as long as |
78 | * offset isn't i_size or larger, return i_size. | 108 | * offset isn't i_size or larger, return i_size. |
79 | */ | 109 | */ |
80 | if (offset >= inode->i_size) | 110 | if (offset >= i_size_read(inode)) |
81 | return -ENXIO; | 111 | return -ENXIO; |
82 | offset = inode->i_size; | 112 | offset = i_size_read(inode); |
83 | break; | 113 | break; |
84 | } | 114 | } |
85 | 115 | ||
86 | if (offset < 0 && !unsigned_offsets(file)) | 116 | return lseek_execute(file, inode, offset, maxsize); |
87 | return -EINVAL; | ||
88 | if (offset > inode->i_sb->s_maxbytes) | ||
89 | return -EINVAL; | ||
90 | |||
91 | /* Special lock needed here? */ | ||
92 | if (offset != file->f_pos) { | ||
93 | file->f_pos = offset; | ||
94 | file->f_version = 0; | ||
95 | } | ||
96 | |||
97 | return offset; | ||
98 | } | 117 | } |
99 | EXPORT_SYMBOL(generic_file_llseek_unlocked); | 118 | EXPORT_SYMBOL(generic_file_llseek_size); |
100 | 119 | ||
101 | /** | 120 | /** |
102 | * generic_file_llseek - generic llseek implementation for regular files | 121 | * generic_file_llseek - generic llseek implementation for regular files |
@@ -110,13 +129,10 @@ EXPORT_SYMBOL(generic_file_llseek_unlocked); | |||
110 | */ | 129 | */ |
111 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) | 130 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) |
112 | { | 131 | { |
113 | loff_t rval; | 132 | struct inode *inode = file->f_mapping->host; |
114 | |||
115 | mutex_lock(&file->f_dentry->d_inode->i_mutex); | ||
116 | rval = generic_file_llseek_unlocked(file, offset, origin); | ||
117 | mutex_unlock(&file->f_dentry->d_inode->i_mutex); | ||
118 | 133 | ||
119 | return rval; | 134 | return generic_file_llseek_size(file, offset, origin, |
135 | inode->i_sb->s_maxbytes); | ||
120 | } | 136 | } |
121 | EXPORT_SYMBOL(generic_file_llseek); | 137 | EXPORT_SYMBOL(generic_file_llseek); |
122 | 138 | ||
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index a159ba5a35e7..eb711060a6f2 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -291,14 +291,13 @@ int reiserfs_allocate_list_bitmaps(struct super_block *sb, | |||
291 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { | 291 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { |
292 | jb = jb_array + i; | 292 | jb = jb_array + i; |
293 | jb->journal_list = NULL; | 293 | jb->journal_list = NULL; |
294 | jb->bitmaps = vmalloc(mem); | 294 | jb->bitmaps = vzalloc(mem); |
295 | if (!jb->bitmaps) { | 295 | if (!jb->bitmaps) { |
296 | reiserfs_warning(sb, "clm-2000", "unable to " | 296 | reiserfs_warning(sb, "clm-2000", "unable to " |
297 | "allocate bitmaps for journal lists"); | 297 | "allocate bitmaps for journal lists"); |
298 | failed = 1; | 298 | failed = 1; |
299 | break; | 299 | break; |
300 | } | 300 | } |
301 | memset(jb->bitmaps, 0, mem); | ||
302 | } | 301 | } |
303 | if (failed) { | 302 | if (failed) { |
304 | free_list_bitmaps(sb, jb_array); | 303 | free_list_bitmaps(sb, jb_array); |
@@ -353,11 +352,10 @@ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) | |||
353 | if (num_cnodes <= 0) { | 352 | if (num_cnodes <= 0) { |
354 | return NULL; | 353 | return NULL; |
355 | } | 354 | } |
356 | head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); | 355 | head = vzalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); |
357 | if (!head) { | 356 | if (!head) { |
358 | return NULL; | 357 | return NULL; |
359 | } | 358 | } |
360 | memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)); | ||
361 | head[0].prev = NULL; | 359 | head[0].prev = NULL; |
362 | head[0].next = head + 1; | 360 | head[0].next = head + 1; |
363 | for (i = 1; i < num_cnodes; i++) { | 361 | for (i = 1; i < num_cnodes; i++) { |
@@ -2685,14 +2683,13 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2685 | * dependency inversion warnings. | 2683 | * dependency inversion warnings. |
2686 | */ | 2684 | */ |
2687 | reiserfs_write_unlock(sb); | 2685 | reiserfs_write_unlock(sb); |
2688 | journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); | 2686 | journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); |
2689 | if (!journal) { | 2687 | if (!journal) { |
2690 | reiserfs_warning(sb, "journal-1256", | 2688 | reiserfs_warning(sb, "journal-1256", |
2691 | "unable to get memory for journal structure"); | 2689 | "unable to get memory for journal structure"); |
2692 | reiserfs_write_lock(sb); | 2690 | reiserfs_write_lock(sb); |
2693 | return 1; | 2691 | return 1; |
2694 | } | 2692 | } |
2695 | memset(journal, 0, sizeof(struct reiserfs_journal)); | ||
2696 | INIT_LIST_HEAD(&journal->j_bitmap_nodes); | 2693 | INIT_LIST_HEAD(&journal->j_bitmap_nodes); |
2697 | INIT_LIST_HEAD(&journal->j_prealloc_list); | 2694 | INIT_LIST_HEAD(&journal->j_prealloc_list); |
2698 | INIT_LIST_HEAD(&journal->j_working_list); | 2695 | INIT_LIST_HEAD(&journal->j_working_list); |
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index b6b9b1fe33b0..7483279b482d 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c | |||
@@ -111,15 +111,13 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
111 | /* allocate additional bitmap blocks, reallocate array of bitmap | 111 | /* allocate additional bitmap blocks, reallocate array of bitmap |
112 | * block pointers */ | 112 | * block pointers */ |
113 | bitmap = | 113 | bitmap = |
114 | vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); | 114 | vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); |
115 | if (!bitmap) { | 115 | if (!bitmap) { |
116 | /* Journal bitmaps are still supersized, but the memory isn't | 116 | /* Journal bitmaps are still supersized, but the memory isn't |
117 | * leaked, so I guess it's ok */ | 117 | * leaked, so I guess it's ok */ |
118 | printk("reiserfs_resize: unable to allocate memory.\n"); | 118 | printk("reiserfs_resize: unable to allocate memory.\n"); |
119 | return -ENOMEM; | 119 | return -ENOMEM; |
120 | } | 120 | } |
121 | memset(bitmap, 0, | ||
122 | sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); | ||
123 | for (i = 0; i < bmap_nr; i++) | 121 | for (i = 0; i < bmap_nr; i++) |
124 | bitmap[i] = old_bitmap[i]; | 122 | bitmap[i] = old_bitmap[i]; |
125 | 123 | ||
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index ef66c18a9332..534668fa41be 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c | |||
@@ -66,8 +66,8 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode, | |||
66 | if (IS_PRIVATE(dir)) | 66 | if (IS_PRIVATE(dir)) |
67 | return 0; | 67 | return 0; |
68 | 68 | ||
69 | error = security_inode_init_security(inode, dir, qstr, &sec->name, | 69 | error = security_old_inode_init_security(inode, dir, qstr, &sec->name, |
70 | &sec->value, &sec->length); | 70 | &sec->value, &sec->length); |
71 | if (error) { | 71 | if (error) { |
72 | if (error == -EOPNOTSUPP) | 72 | if (error == -EOPNOTSUPP) |
73 | error = 0; | 73 | error = 0; |
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 1360d4f88f41..048b59d5b2f0 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig | |||
@@ -19,9 +19,9 @@ config SQUASHFS | |||
19 | 19 | ||
20 | If you want to compile this as a module ( = code which can be | 20 | If you want to compile this as a module ( = code which can be |
21 | inserted in and removed from the running kernel whenever you want), | 21 | inserted in and removed from the running kernel whenever you want), |
22 | say M here and read <file:Documentation/modules.txt>. The module | 22 | say M here. The module will be called squashfs. Note that the root |
23 | will be called squashfs. Note that the root file system (the one | 23 | file system (the one containing the directory /) cannot be compiled |
24 | containing the directory /) cannot be compiled as a module. | 24 | as a module. |
25 | 25 | ||
26 | If unsure, say N. | 26 | If unsure, say N. |
27 | 27 | ||
@@ -81,8 +81,6 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, | |||
81 | 81 | ||
82 | if (!(flag & AT_SYMLINK_NOFOLLOW)) | 82 | if (!(flag & AT_SYMLINK_NOFOLLOW)) |
83 | lookup_flags |= LOOKUP_FOLLOW; | 83 | lookup_flags |= LOOKUP_FOLLOW; |
84 | if (flag & AT_NO_AUTOMOUNT) | ||
85 | lookup_flags |= LOOKUP_NO_AUTOMOUNT; | ||
86 | if (flag & AT_EMPTY_PATH) | 84 | if (flag & AT_EMPTY_PATH) |
87 | lookup_flags |= LOOKUP_EMPTY; | 85 | lookup_flags |= LOOKUP_EMPTY; |
88 | 86 | ||
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index ea9120a830d8..48ffbdf0d017 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -43,20 +43,48 @@ static DEFINE_IDA(sysfs_ino_ida); | |||
43 | static void sysfs_link_sibling(struct sysfs_dirent *sd) | 43 | static void sysfs_link_sibling(struct sysfs_dirent *sd) |
44 | { | 44 | { |
45 | struct sysfs_dirent *parent_sd = sd->s_parent; | 45 | struct sysfs_dirent *parent_sd = sd->s_parent; |
46 | struct sysfs_dirent **pos; | ||
47 | 46 | ||
48 | BUG_ON(sd->s_sibling); | 47 | struct rb_node **p; |
49 | 48 | struct rb_node *parent; | |
50 | /* Store directory entries in order by ino. This allows | 49 | |
51 | * readdir to properly restart without having to add a | 50 | if (sysfs_type(sd) == SYSFS_DIR) |
52 | * cursor into the s_dir.children list. | 51 | parent_sd->s_dir.subdirs++; |
53 | */ | 52 | |
54 | for (pos = &parent_sd->s_dir.children; *pos; pos = &(*pos)->s_sibling) { | 53 | p = &parent_sd->s_dir.inode_tree.rb_node; |
55 | if (sd->s_ino < (*pos)->s_ino) | 54 | parent = NULL; |
56 | break; | 55 | while (*p) { |
56 | parent = *p; | ||
57 | #define node rb_entry(parent, struct sysfs_dirent, inode_node) | ||
58 | if (sd->s_ino < node->s_ino) { | ||
59 | p = &node->inode_node.rb_left; | ||
60 | } else if (sd->s_ino > node->s_ino) { | ||
61 | p = &node->inode_node.rb_right; | ||
62 | } else { | ||
63 | printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n", | ||
64 | (unsigned long) sd->s_ino); | ||
65 | BUG(); | ||
66 | } | ||
67 | #undef node | ||
57 | } | 68 | } |
58 | sd->s_sibling = *pos; | 69 | rb_link_node(&sd->inode_node, parent, p); |
59 | *pos = sd; | 70 | rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree); |
71 | |||
72 | p = &parent_sd->s_dir.name_tree.rb_node; | ||
73 | parent = NULL; | ||
74 | while (*p) { | ||
75 | int c; | ||
76 | parent = *p; | ||
77 | #define node rb_entry(parent, struct sysfs_dirent, name_node) | ||
78 | c = strcmp(sd->s_name, node->s_name); | ||
79 | if (c < 0) { | ||
80 | p = &node->name_node.rb_left; | ||
81 | } else { | ||
82 | p = &node->name_node.rb_right; | ||
83 | } | ||
84 | #undef node | ||
85 | } | ||
86 | rb_link_node(&sd->name_node, parent, p); | ||
87 | rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree); | ||
60 | } | 88 | } |
61 | 89 | ||
62 | /** | 90 | /** |
@@ -71,16 +99,11 @@ static void sysfs_link_sibling(struct sysfs_dirent *sd) | |||
71 | */ | 99 | */ |
72 | static void sysfs_unlink_sibling(struct sysfs_dirent *sd) | 100 | static void sysfs_unlink_sibling(struct sysfs_dirent *sd) |
73 | { | 101 | { |
74 | struct sysfs_dirent **pos; | 102 | if (sysfs_type(sd) == SYSFS_DIR) |
103 | sd->s_parent->s_dir.subdirs--; | ||
75 | 104 | ||
76 | for (pos = &sd->s_parent->s_dir.children; *pos; | 105 | rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree); |
77 | pos = &(*pos)->s_sibling) { | 106 | rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree); |
78 | if (*pos == sd) { | ||
79 | *pos = sd->s_sibling; | ||
80 | sd->s_sibling = NULL; | ||
81 | break; | ||
82 | } | ||
83 | } | ||
84 | } | 107 | } |
85 | 108 | ||
86 | /** | 109 | /** |
@@ -126,7 +149,6 @@ struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) | |||
126 | */ | 149 | */ |
127 | void sysfs_put_active(struct sysfs_dirent *sd) | 150 | void sysfs_put_active(struct sysfs_dirent *sd) |
128 | { | 151 | { |
129 | struct completion *cmpl; | ||
130 | int v; | 152 | int v; |
131 | 153 | ||
132 | if (unlikely(!sd)) | 154 | if (unlikely(!sd)) |
@@ -138,10 +160,9 @@ void sysfs_put_active(struct sysfs_dirent *sd) | |||
138 | return; | 160 | return; |
139 | 161 | ||
140 | /* atomic_dec_return() is a mb(), we'll always see the updated | 162 | /* atomic_dec_return() is a mb(), we'll always see the updated |
141 | * sd->s_sibling. | 163 | * sd->u.completion. |
142 | */ | 164 | */ |
143 | cmpl = (void *)sd->s_sibling; | 165 | complete(sd->u.completion); |
144 | complete(cmpl); | ||
145 | } | 166 | } |
146 | 167 | ||
147 | /** | 168 | /** |
@@ -155,16 +176,16 @@ static void sysfs_deactivate(struct sysfs_dirent *sd) | |||
155 | DECLARE_COMPLETION_ONSTACK(wait); | 176 | DECLARE_COMPLETION_ONSTACK(wait); |
156 | int v; | 177 | int v; |
157 | 178 | ||
158 | BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); | 179 | BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED)); |
159 | 180 | ||
160 | if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF)) | 181 | if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF)) |
161 | return; | 182 | return; |
162 | 183 | ||
163 | sd->s_sibling = (void *)&wait; | 184 | sd->u.completion = (void *)&wait; |
164 | 185 | ||
165 | rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_); | 186 | rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_); |
166 | /* atomic_add_return() is a mb(), put_active() will always see | 187 | /* atomic_add_return() is a mb(), put_active() will always see |
167 | * the updated sd->s_sibling. | 188 | * the updated sd->u.completion. |
168 | */ | 189 | */ |
169 | v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); | 190 | v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); |
170 | 191 | ||
@@ -173,8 +194,6 @@ static void sysfs_deactivate(struct sysfs_dirent *sd) | |||
173 | wait_for_completion(&wait); | 194 | wait_for_completion(&wait); |
174 | } | 195 | } |
175 | 196 | ||
176 | sd->s_sibling = NULL; | ||
177 | |||
178 | lock_acquired(&sd->dep_map, _RET_IP_); | 197 | lock_acquired(&sd->dep_map, _RET_IP_); |
179 | rwsem_release(&sd->dep_map, 1, _RET_IP_); | 198 | rwsem_release(&sd->dep_map, 1, _RET_IP_); |
180 | } | 199 | } |
@@ -384,6 +403,13 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) | |||
384 | { | 403 | { |
385 | struct sysfs_inode_attrs *ps_iattr; | 404 | struct sysfs_inode_attrs *ps_iattr; |
386 | 405 | ||
406 | if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) { | ||
407 | WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", | ||
408 | sysfs_ns_type(acxt->parent_sd)? "required": "invalid", | ||
409 | acxt->parent_sd->s_name, sd->s_name); | ||
410 | return -EINVAL; | ||
411 | } | ||
412 | |||
387 | if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) | 413 | if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) |
388 | return -EEXIST; | 414 | return -EEXIST; |
389 | 415 | ||
@@ -490,7 +516,7 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) | |||
490 | } | 516 | } |
491 | 517 | ||
492 | sd->s_flags |= SYSFS_FLAG_REMOVED; | 518 | sd->s_flags |= SYSFS_FLAG_REMOVED; |
493 | sd->s_sibling = acxt->removed; | 519 | sd->u.removed_list = acxt->removed; |
494 | acxt->removed = sd; | 520 | acxt->removed = sd; |
495 | } | 521 | } |
496 | 522 | ||
@@ -514,8 +540,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) | |||
514 | while (acxt->removed) { | 540 | while (acxt->removed) { |
515 | struct sysfs_dirent *sd = acxt->removed; | 541 | struct sysfs_dirent *sd = acxt->removed; |
516 | 542 | ||
517 | acxt->removed = sd->s_sibling; | 543 | acxt->removed = sd->u.removed_list; |
518 | sd->s_sibling = NULL; | ||
519 | 544 | ||
520 | sysfs_deactivate(sd); | 545 | sysfs_deactivate(sd); |
521 | unmap_bin_file(sd); | 546 | unmap_bin_file(sd); |
@@ -540,15 +565,43 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, | |||
540 | const void *ns, | 565 | const void *ns, |
541 | const unsigned char *name) | 566 | const unsigned char *name) |
542 | { | 567 | { |
543 | struct sysfs_dirent *sd; | 568 | struct rb_node *p = parent_sd->s_dir.name_tree.rb_node; |
569 | struct sysfs_dirent *found = NULL; | ||
544 | 570 | ||
545 | for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) { | 571 | if (!!sysfs_ns_type(parent_sd) != !!ns) { |
546 | if (ns && sd->s_ns && (sd->s_ns != ns)) | 572 | WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", |
547 | continue; | 573 | sysfs_ns_type(parent_sd)? "required": "invalid", |
548 | if (!strcmp(sd->s_name, name)) | 574 | parent_sd->s_name, name); |
549 | return sd; | 575 | return NULL; |
550 | } | 576 | } |
551 | return NULL; | 577 | |
578 | while (p) { | ||
579 | int c; | ||
580 | #define node rb_entry(p, struct sysfs_dirent, name_node) | ||
581 | c = strcmp(name, node->s_name); | ||
582 | if (c < 0) { | ||
583 | p = node->name_node.rb_left; | ||
584 | } else if (c > 0) { | ||
585 | p = node->name_node.rb_right; | ||
586 | } else { | ||
587 | found = node; | ||
588 | p = node->name_node.rb_left; | ||
589 | } | ||
590 | #undef node | ||
591 | } | ||
592 | |||
593 | if (found) { | ||
594 | while (found->s_ns != ns) { | ||
595 | p = rb_next(&found->name_node); | ||
596 | if (!p) | ||
597 | return NULL; | ||
598 | found = rb_entry(p, struct sysfs_dirent, name_node); | ||
599 | if (strcmp(name, found->s_name)) | ||
600 | return NULL; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | return found; | ||
552 | } | 605 | } |
553 | 606 | ||
554 | /** | 607 | /** |
@@ -744,21 +797,19 @@ void sysfs_remove_subdir(struct sysfs_dirent *sd) | |||
744 | static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) | 797 | static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) |
745 | { | 798 | { |
746 | struct sysfs_addrm_cxt acxt; | 799 | struct sysfs_addrm_cxt acxt; |
747 | struct sysfs_dirent **pos; | 800 | struct rb_node *pos; |
748 | 801 | ||
749 | if (!dir_sd) | 802 | if (!dir_sd) |
750 | return; | 803 | return; |
751 | 804 | ||
752 | pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); | 805 | pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); |
753 | sysfs_addrm_start(&acxt, dir_sd); | 806 | sysfs_addrm_start(&acxt, dir_sd); |
754 | pos = &dir_sd->s_dir.children; | 807 | pos = rb_first(&dir_sd->s_dir.inode_tree); |
755 | while (*pos) { | 808 | while (pos) { |
756 | struct sysfs_dirent *sd = *pos; | 809 | struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node); |
757 | 810 | pos = rb_next(pos); | |
758 | if (sysfs_type(sd) != SYSFS_DIR) | 811 | if (sysfs_type(sd) != SYSFS_DIR) |
759 | sysfs_remove_one(&acxt, sd); | 812 | sysfs_remove_one(&acxt, sd); |
760 | else | ||
761 | pos = &(*pos)->s_sibling; | ||
762 | } | 813 | } |
763 | sysfs_addrm_finish(&acxt); | 814 | sysfs_addrm_finish(&acxt); |
764 | 815 | ||
@@ -881,12 +932,28 @@ static struct sysfs_dirent *sysfs_dir_pos(const void *ns, | |||
881 | pos = NULL; | 932 | pos = NULL; |
882 | } | 933 | } |
883 | if (!pos && (ino > 1) && (ino < INT_MAX)) { | 934 | if (!pos && (ino > 1) && (ino < INT_MAX)) { |
884 | pos = parent_sd->s_dir.children; | 935 | struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node; |
885 | while (pos && (ino > pos->s_ino)) | 936 | while (p) { |
886 | pos = pos->s_sibling; | 937 | #define node rb_entry(p, struct sysfs_dirent, inode_node) |
938 | if (ino < node->s_ino) { | ||
939 | pos = node; | ||
940 | p = node->inode_node.rb_left; | ||
941 | } else if (ino > node->s_ino) { | ||
942 | p = node->inode_node.rb_right; | ||
943 | } else { | ||
944 | pos = node; | ||
945 | break; | ||
946 | } | ||
947 | #undef node | ||
948 | } | ||
949 | } | ||
950 | while (pos && pos->s_ns != ns) { | ||
951 | struct rb_node *p = rb_next(&pos->inode_node); | ||
952 | if (!p) | ||
953 | pos = NULL; | ||
954 | else | ||
955 | pos = rb_entry(p, struct sysfs_dirent, inode_node); | ||
887 | } | 956 | } |
888 | while (pos && pos->s_ns && pos->s_ns != ns) | ||
889 | pos = pos->s_sibling; | ||
890 | return pos; | 957 | return pos; |
891 | } | 958 | } |
892 | 959 | ||
@@ -894,10 +961,13 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, | |||
894 | struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) | 961 | struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) |
895 | { | 962 | { |
896 | pos = sysfs_dir_pos(ns, parent_sd, ino, pos); | 963 | pos = sysfs_dir_pos(ns, parent_sd, ino, pos); |
897 | if (pos) | 964 | if (pos) do { |
898 | pos = pos->s_sibling; | 965 | struct rb_node *p = rb_next(&pos->inode_node); |
899 | while (pos && pos->s_ns && pos->s_ns != ns) | 966 | if (!p) |
900 | pos = pos->s_sibling; | 967 | pos = NULL; |
968 | else | ||
969 | pos = rb_entry(p, struct sysfs_dirent, inode_node); | ||
970 | } while (pos && pos->s_ns != ns); | ||
901 | return pos; | 971 | return pos; |
902 | } | 972 | } |
903 | 973 | ||
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1ad8c93c1b85..d4e6080b4b20 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -466,9 +466,6 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr) | |||
466 | mutex_lock(&sysfs_mutex); | 466 | mutex_lock(&sysfs_mutex); |
467 | 467 | ||
468 | if (sd && dir) | 468 | if (sd && dir) |
469 | /* Only directories are tagged, so no need to pass | ||
470 | * a tag explicitly. | ||
471 | */ | ||
472 | sd = sysfs_find_dirent(sd, NULL, dir); | 469 | sd = sysfs_find_dirent(sd, NULL, dir); |
473 | if (sd && attr) | 470 | if (sd && attr) |
474 | sd = sysfs_find_dirent(sd, NULL, attr); | 471 | sd = sysfs_find_dirent(sd, NULL, attr); |
@@ -488,17 +485,56 @@ const struct file_operations sysfs_file_operations = { | |||
488 | .poll = sysfs_poll, | 485 | .poll = sysfs_poll, |
489 | }; | 486 | }; |
490 | 487 | ||
488 | int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, | ||
489 | const void **pns) | ||
490 | { | ||
491 | struct sysfs_dirent *dir_sd = kobj->sd; | ||
492 | const struct sysfs_ops *ops; | ||
493 | const void *ns = NULL; | ||
494 | int err; | ||
495 | |||
496 | err = 0; | ||
497 | if (!sysfs_ns_type(dir_sd)) | ||
498 | goto out; | ||
499 | |||
500 | err = -EINVAL; | ||
501 | if (!kobj->ktype) | ||
502 | goto out; | ||
503 | ops = kobj->ktype->sysfs_ops; | ||
504 | if (!ops) | ||
505 | goto out; | ||
506 | if (!ops->namespace) | ||
507 | goto out; | ||
508 | |||
509 | err = 0; | ||
510 | ns = ops->namespace(kobj, attr); | ||
511 | out: | ||
512 | if (err) { | ||
513 | WARN(1, KERN_ERR "missing sysfs namespace attribute operation for " | ||
514 | "kobject: %s\n", kobject_name(kobj)); | ||
515 | } | ||
516 | *pns = ns; | ||
517 | return err; | ||
518 | } | ||
519 | |||
491 | int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, | 520 | int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, |
492 | const struct attribute *attr, int type, mode_t amode) | 521 | const struct attribute *attr, int type, mode_t amode) |
493 | { | 522 | { |
494 | umode_t mode = (amode & S_IALLUGO) | S_IFREG; | 523 | umode_t mode = (amode & S_IALLUGO) | S_IFREG; |
495 | struct sysfs_addrm_cxt acxt; | 524 | struct sysfs_addrm_cxt acxt; |
496 | struct sysfs_dirent *sd; | 525 | struct sysfs_dirent *sd; |
526 | const void *ns; | ||
497 | int rc; | 527 | int rc; |
498 | 528 | ||
529 | rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns); | ||
530 | if (rc) | ||
531 | return rc; | ||
532 | |||
499 | sd = sysfs_new_dirent(attr->name, mode, type); | 533 | sd = sysfs_new_dirent(attr->name, mode, type); |
500 | if (!sd) | 534 | if (!sd) |
501 | return -ENOMEM; | 535 | return -ENOMEM; |
536 | |||
537 | sd->s_ns = ns; | ||
502 | sd->s_attr.attr = (void *)attr; | 538 | sd->s_attr.attr = (void *)attr; |
503 | sysfs_dirent_init_lockdep(sd); | 539 | sysfs_dirent_init_lockdep(sd); |
504 | 540 | ||
@@ -586,12 +622,17 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, | |||
586 | { | 622 | { |
587 | struct sysfs_dirent *sd; | 623 | struct sysfs_dirent *sd; |
588 | struct iattr newattrs; | 624 | struct iattr newattrs; |
625 | const void *ns; | ||
589 | int rc; | 626 | int rc; |
590 | 627 | ||
628 | rc = sysfs_attr_ns(kobj, attr, &ns); | ||
629 | if (rc) | ||
630 | return rc; | ||
631 | |||
591 | mutex_lock(&sysfs_mutex); | 632 | mutex_lock(&sysfs_mutex); |
592 | 633 | ||
593 | rc = -ENOENT; | 634 | rc = -ENOENT; |
594 | sd = sysfs_find_dirent(kobj->sd, NULL, attr->name); | 635 | sd = sysfs_find_dirent(kobj->sd, ns, attr->name); |
595 | if (!sd) | 636 | if (!sd) |
596 | goto out; | 637 | goto out; |
597 | 638 | ||
@@ -616,7 +657,12 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file); | |||
616 | 657 | ||
617 | void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) | 658 | void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) |
618 | { | 659 | { |
619 | sysfs_hash_and_remove(kobj->sd, NULL, attr->name); | 660 | const void *ns; |
661 | |||
662 | if (sysfs_attr_ns(kobj, attr, &ns)) | ||
663 | return; | ||
664 | |||
665 | sysfs_hash_and_remove(kobj->sd, ns, attr->name); | ||
620 | } | 666 | } |
621 | 667 | ||
622 | void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr) | 668 | void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr) |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index e3f091a81c72..e23f28894a3a 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -202,18 +202,6 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) | |||
202 | inode->i_ctime = iattr->ia_ctime; | 202 | inode->i_ctime = iattr->ia_ctime; |
203 | } | 203 | } |
204 | 204 | ||
205 | static int sysfs_count_nlink(struct sysfs_dirent *sd) | ||
206 | { | ||
207 | struct sysfs_dirent *child; | ||
208 | int nr = 0; | ||
209 | |||
210 | for (child = sd->s_dir.children; child; child = child->s_sibling) | ||
211 | if (sysfs_type(child) == SYSFS_DIR) | ||
212 | nr++; | ||
213 | |||
214 | return nr + 2; | ||
215 | } | ||
216 | |||
217 | static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode) | 205 | static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode) |
218 | { | 206 | { |
219 | struct sysfs_inode_attrs *iattrs = sd->s_iattr; | 207 | struct sysfs_inode_attrs *iattrs = sd->s_iattr; |
@@ -230,7 +218,7 @@ static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode) | |||
230 | } | 218 | } |
231 | 219 | ||
232 | if (sysfs_type(sd) == SYSFS_DIR) | 220 | if (sysfs_type(sd) == SYSFS_DIR) |
233 | inode->i_nlink = sysfs_count_nlink(sd); | 221 | inode->i_nlink = sd->s_dir.subdirs + 2; |
234 | } | 222 | } |
235 | 223 | ||
236 | int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 224 | int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
@@ -336,8 +324,6 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha | |||
336 | sysfs_addrm_start(&acxt, dir_sd); | 324 | sysfs_addrm_start(&acxt, dir_sd); |
337 | 325 | ||
338 | sd = sysfs_find_dirent(dir_sd, ns, name); | 326 | sd = sysfs_find_dirent(dir_sd, ns, name); |
339 | if (sd && (sd->s_ns != ns)) | ||
340 | sd = NULL; | ||
341 | if (sd) | 327 | if (sd) |
342 | sysfs_remove_one(&acxt, sd); | 328 | sysfs_remove_one(&acxt, sd); |
343 | 329 | ||
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 845ab3ad229d..ce29e28b766d 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -11,14 +11,18 @@ | |||
11 | #include <linux/lockdep.h> | 11 | #include <linux/lockdep.h> |
12 | #include <linux/kobject_ns.h> | 12 | #include <linux/kobject_ns.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/rbtree.h> | ||
14 | 15 | ||
15 | struct sysfs_open_dirent; | 16 | struct sysfs_open_dirent; |
16 | 17 | ||
17 | /* type-specific structures for sysfs_dirent->s_* union members */ | 18 | /* type-specific structures for sysfs_dirent->s_* union members */ |
18 | struct sysfs_elem_dir { | 19 | struct sysfs_elem_dir { |
19 | struct kobject *kobj; | 20 | struct kobject *kobj; |
20 | /* children list starts here and goes through sd->s_sibling */ | 21 | |
21 | struct sysfs_dirent *children; | 22 | unsigned long subdirs; |
23 | |||
24 | struct rb_root inode_tree; | ||
25 | struct rb_root name_tree; | ||
22 | }; | 26 | }; |
23 | 27 | ||
24 | struct sysfs_elem_symlink { | 28 | struct sysfs_elem_symlink { |
@@ -56,9 +60,16 @@ struct sysfs_dirent { | |||
56 | struct lockdep_map dep_map; | 60 | struct lockdep_map dep_map; |
57 | #endif | 61 | #endif |
58 | struct sysfs_dirent *s_parent; | 62 | struct sysfs_dirent *s_parent; |
59 | struct sysfs_dirent *s_sibling; | ||
60 | const char *s_name; | 63 | const char *s_name; |
61 | 64 | ||
65 | struct rb_node inode_node; | ||
66 | struct rb_node name_node; | ||
67 | |||
68 | union { | ||
69 | struct completion *completion; | ||
70 | struct sysfs_dirent *removed_list; | ||
71 | } u; | ||
72 | |||
62 | const void *s_ns; /* namespace tag */ | 73 | const void *s_ns; /* namespace tag */ |
63 | union { | 74 | union { |
64 | struct sysfs_elem_dir s_dir; | 75 | struct sysfs_elem_dir s_dir; |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 45174b534377..feb361e252ac 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
@@ -335,9 +335,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); | |||
335 | #define DBGKEY(key) ((char *)(key)) | 335 | #define DBGKEY(key) ((char *)(key)) |
336 | #define DBGKEY1(key) ((char *)(key)) | 336 | #define DBGKEY1(key) ((char *)(key)) |
337 | 337 | ||
338 | #define ubifs_dbg_msg(fmt, ...) do { \ | 338 | #define ubifs_dbg_msg(fmt, ...) do { \ |
339 | if (0) \ | 339 | if (0) \ |
340 | pr_debug(fmt "\n", ##__VA_ARGS__); \ | 340 | printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \ |
341 | } while (0) | 341 | } while (0) |
342 | 342 | ||
343 | #define dbg_dump_stack() | 343 | #define dbg_dump_stack() |
diff --git a/fs/xattr.c b/fs/xattr.c index f060663ab70c..67583de8218c 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/mount.h> | 14 | #include <linux/mount.h> |
15 | #include <linux/namei.h> | 15 | #include <linux/namei.h> |
16 | #include <linux/security.h> | 16 | #include <linux/security.h> |
17 | #include <linux/evm.h> | ||
17 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
19 | #include <linux/fsnotify.h> | 20 | #include <linux/fsnotify.h> |
@@ -166,6 +167,64 @@ out_noalloc: | |||
166 | } | 167 | } |
167 | EXPORT_SYMBOL_GPL(xattr_getsecurity); | 168 | EXPORT_SYMBOL_GPL(xattr_getsecurity); |
168 | 169 | ||
170 | /* | ||
171 | * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr | ||
172 | * | ||
173 | * Allocate memory, if not already allocated, or re-allocate correct size, | ||
174 | * before retrieving the extended attribute. | ||
175 | * | ||
176 | * Returns the result of alloc, if failed, or the getxattr operation. | ||
177 | */ | ||
178 | ssize_t | ||
179 | vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, | ||
180 | size_t xattr_size, gfp_t flags) | ||
181 | { | ||
182 | struct inode *inode = dentry->d_inode; | ||
183 | char *value = *xattr_value; | ||
184 | int error; | ||
185 | |||
186 | error = xattr_permission(inode, name, MAY_READ); | ||
187 | if (error) | ||
188 | return error; | ||
189 | |||
190 | if (!inode->i_op->getxattr) | ||
191 | return -EOPNOTSUPP; | ||
192 | |||
193 | error = inode->i_op->getxattr(dentry, name, NULL, 0); | ||
194 | if (error < 0) | ||
195 | return error; | ||
196 | |||
197 | if (!value || (error > xattr_size)) { | ||
198 | value = krealloc(*xattr_value, error + 1, flags); | ||
199 | if (!value) | ||
200 | return -ENOMEM; | ||
201 | memset(value, 0, error + 1); | ||
202 | } | ||
203 | |||
204 | error = inode->i_op->getxattr(dentry, name, value, error); | ||
205 | *xattr_value = value; | ||
206 | return error; | ||
207 | } | ||
208 | |||
209 | /* Compare an extended attribute value with the given value */ | ||
210 | int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name, | ||
211 | const char *value, size_t size, gfp_t flags) | ||
212 | { | ||
213 | char *xattr_value = NULL; | ||
214 | int rc; | ||
215 | |||
216 | rc = vfs_getxattr_alloc(dentry, xattr_name, &xattr_value, 0, flags); | ||
217 | if (rc < 0) | ||
218 | return rc; | ||
219 | |||
220 | if ((rc != size) || (memcmp(xattr_value, value, rc) != 0)) | ||
221 | rc = -EINVAL; | ||
222 | else | ||
223 | rc = 0; | ||
224 | kfree(xattr_value); | ||
225 | return rc; | ||
226 | } | ||
227 | |||
169 | ssize_t | 228 | ssize_t |
170 | vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) | 229 | vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) |
171 | { | 230 | { |
@@ -243,8 +302,10 @@ vfs_removexattr(struct dentry *dentry, const char *name) | |||
243 | error = inode->i_op->removexattr(dentry, name); | 302 | error = inode->i_op->removexattr(dentry, name); |
244 | mutex_unlock(&inode->i_mutex); | 303 | mutex_unlock(&inode->i_mutex); |
245 | 304 | ||
246 | if (!error) | 305 | if (!error) { |
247 | fsnotify_xattr(dentry); | 306 | fsnotify_xattr(dentry); |
307 | evm_inode_post_removexattr(dentry, name); | ||
308 | } | ||
248 | return error; | 309 | return error; |
249 | } | 310 | } |
250 | EXPORT_SYMBOL_GPL(vfs_removexattr); | 311 | EXPORT_SYMBOL_GPL(vfs_removexattr); |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 75bb316529dd..427a4e82a588 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -16,44 +16,53 @@ | |||
16 | # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | # | 17 | # |
18 | 18 | ||
19 | ccflags-y := -I$(src) -I$(src)/linux-2.6 | 19 | ccflags-y += -I$(src) # needed for trace events |
20 | ccflags-$(CONFIG_XFS_DEBUG) += -g | ||
21 | 20 | ||
22 | XFS_LINUX := linux-2.6 | 21 | ccflags-$(CONFIG_XFS_DEBUG) += -g |
23 | 22 | ||
24 | obj-$(CONFIG_XFS_FS) += xfs.o | 23 | obj-$(CONFIG_XFS_FS) += xfs.o |
25 | 24 | ||
26 | xfs-y += linux-2.6/xfs_trace.o | 25 | # this one should be compiled first, as the tracing macros can easily blow up |
27 | 26 | xfs-y += xfs_trace.o | |
28 | xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ | ||
29 | xfs_dquot.o \ | ||
30 | xfs_dquot_item.o \ | ||
31 | xfs_trans_dquot.o \ | ||
32 | xfs_qm_syscalls.o \ | ||
33 | xfs_qm_bhv.o \ | ||
34 | xfs_qm.o) | ||
35 | xfs-$(CONFIG_XFS_QUOTA) += linux-2.6/xfs_quotaops.o | ||
36 | |||
37 | ifeq ($(CONFIG_XFS_QUOTA),y) | ||
38 | xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o | ||
39 | endif | ||
40 | |||
41 | xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o | ||
42 | xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o | ||
43 | xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o | ||
44 | xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o | ||
45 | xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o | ||
46 | 27 | ||
28 | # highlevel code | ||
29 | xfs-y += xfs_aops.o \ | ||
30 | xfs_bit.o \ | ||
31 | xfs_buf.o \ | ||
32 | xfs_dfrag.o \ | ||
33 | xfs_discard.o \ | ||
34 | xfs_error.o \ | ||
35 | xfs_export.o \ | ||
36 | xfs_file.o \ | ||
37 | xfs_filestream.o \ | ||
38 | xfs_fsops.o \ | ||
39 | xfs_fs_subr.o \ | ||
40 | xfs_globals.o \ | ||
41 | xfs_iget.o \ | ||
42 | xfs_ioctl.o \ | ||
43 | xfs_iomap.o \ | ||
44 | xfs_iops.o \ | ||
45 | xfs_itable.o \ | ||
46 | xfs_message.o \ | ||
47 | xfs_mru_cache.o \ | ||
48 | xfs_super.o \ | ||
49 | xfs_sync.o \ | ||
50 | xfs_xattr.o \ | ||
51 | xfs_rename.o \ | ||
52 | xfs_rw.o \ | ||
53 | xfs_utils.o \ | ||
54 | xfs_vnodeops.o \ | ||
55 | kmem.o \ | ||
56 | uuid.o | ||
47 | 57 | ||
58 | # code shared with libxfs | ||
48 | xfs-y += xfs_alloc.o \ | 59 | xfs-y += xfs_alloc.o \ |
49 | xfs_alloc_btree.o \ | 60 | xfs_alloc_btree.o \ |
50 | xfs_attr.o \ | 61 | xfs_attr.o \ |
51 | xfs_attr_leaf.o \ | 62 | xfs_attr_leaf.o \ |
52 | xfs_bit.o \ | ||
53 | xfs_bmap.o \ | 63 | xfs_bmap.o \ |
54 | xfs_bmap_btree.o \ | 64 | xfs_bmap_btree.o \ |
55 | xfs_btree.o \ | 65 | xfs_btree.o \ |
56 | xfs_buf_item.o \ | ||
57 | xfs_da_btree.o \ | 66 | xfs_da_btree.o \ |
58 | xfs_dir2.o \ | 67 | xfs_dir2.o \ |
59 | xfs_dir2_block.o \ | 68 | xfs_dir2_block.o \ |
@@ -61,49 +70,37 @@ xfs-y += xfs_alloc.o \ | |||
61 | xfs_dir2_leaf.o \ | 70 | xfs_dir2_leaf.o \ |
62 | xfs_dir2_node.o \ | 71 | xfs_dir2_node.o \ |
63 | xfs_dir2_sf.o \ | 72 | xfs_dir2_sf.o \ |
64 | xfs_error.o \ | ||
65 | xfs_extfree_item.o \ | ||
66 | xfs_filestream.o \ | ||
67 | xfs_fsops.o \ | ||
68 | xfs_ialloc.o \ | 73 | xfs_ialloc.o \ |
69 | xfs_ialloc_btree.o \ | 74 | xfs_ialloc_btree.o \ |
70 | xfs_iget.o \ | ||
71 | xfs_inode.o \ | 75 | xfs_inode.o \ |
72 | xfs_inode_item.o \ | ||
73 | xfs_iomap.o \ | ||
74 | xfs_itable.o \ | ||
75 | xfs_dfrag.o \ | ||
76 | xfs_log.o \ | ||
77 | xfs_log_cil.o \ | ||
78 | xfs_log_recover.o \ | 76 | xfs_log_recover.o \ |
79 | xfs_mount.o \ | 77 | xfs_mount.o \ |
80 | xfs_mru_cache.o \ | 78 | xfs_trans.o |
81 | xfs_rename.o \ | 79 | |
82 | xfs_trans.o \ | 80 | # low-level transaction/log code |
81 | xfs-y += xfs_log.o \ | ||
82 | xfs_log_cil.o \ | ||
83 | xfs_buf_item.o \ | ||
84 | xfs_extfree_item.o \ | ||
85 | xfs_inode_item.o \ | ||
83 | xfs_trans_ail.o \ | 86 | xfs_trans_ail.o \ |
84 | xfs_trans_buf.o \ | 87 | xfs_trans_buf.o \ |
85 | xfs_trans_extfree.o \ | 88 | xfs_trans_extfree.o \ |
86 | xfs_trans_inode.o \ | 89 | xfs_trans_inode.o \ |
87 | xfs_utils.o \ | ||
88 | xfs_vnodeops.o \ | ||
89 | xfs_rw.o | ||
90 | |||
91 | # Objects in linux/ | ||
92 | xfs-y += $(addprefix $(XFS_LINUX)/, \ | ||
93 | kmem.o \ | ||
94 | xfs_aops.o \ | ||
95 | xfs_buf.o \ | ||
96 | xfs_discard.o \ | ||
97 | xfs_export.o \ | ||
98 | xfs_file.o \ | ||
99 | xfs_fs_subr.o \ | ||
100 | xfs_globals.o \ | ||
101 | xfs_ioctl.o \ | ||
102 | xfs_iops.o \ | ||
103 | xfs_message.o \ | ||
104 | xfs_super.o \ | ||
105 | xfs_sync.o \ | ||
106 | xfs_xattr.o) | ||
107 | 90 | ||
108 | # Objects in support/ | 91 | # optional features |
109 | xfs-y += support/uuid.o | 92 | xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ |
93 | xfs_dquot_item.o \ | ||
94 | xfs_trans_dquot.o \ | ||
95 | xfs_qm_syscalls.o \ | ||
96 | xfs_qm_bhv.o \ | ||
97 | xfs_qm.o \ | ||
98 | xfs_quotaops.o | ||
99 | ifeq ($(CONFIG_XFS_QUOTA),y) | ||
100 | xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o | ||
101 | endif | ||
102 | xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o | ||
103 | xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o | ||
104 | xfs-$(CONFIG_PROC_FS) += xfs_stats.o | ||
105 | xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o | ||
106 | xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o | ||
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/kmem.c index a907de565db3..a907de565db3 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/kmem.c | |||
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/kmem.h index f7c8f7a9ea6d..292eff198030 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/kmem.h | |||
@@ -61,12 +61,7 @@ extern void kmem_free(const void *); | |||
61 | 61 | ||
62 | static inline void *kmem_zalloc_large(size_t size) | 62 | static inline void *kmem_zalloc_large(size_t size) |
63 | { | 63 | { |
64 | void *ptr; | 64 | return vzalloc(size); |
65 | |||
66 | ptr = vmalloc(size); | ||
67 | if (ptr) | ||
68 | memset(ptr, 0, size); | ||
69 | return ptr; | ||
70 | } | 65 | } |
71 | static inline void kmem_free_large(void *ptr) | 66 | static inline void kmem_free_large(void *ptr) |
72 | { | 67 | { |
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/mrlock.h index ff6a19873e5c..ff6a19873e5c 100644 --- a/fs/xfs/linux-2.6/mrlock.h +++ b/fs/xfs/mrlock.h | |||
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/time.h index 387e695a184c..387e695a184c 100644 --- a/fs/xfs/linux-2.6/time.h +++ b/fs/xfs/time.h | |||
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/uuid.c index b83f76b6d410..b83f76b6d410 100644 --- a/fs/xfs/support/uuid.c +++ b/fs/xfs/uuid.c | |||
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/uuid.h index 4732d71262cc..4732d71262cc 100644 --- a/fs/xfs/support/uuid.h +++ b/fs/xfs/uuid.h | |||
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 53ec3ea9a625..d8b11b7f94aa 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h | |||
@@ -24,5 +24,6 @@ | |||
24 | #define XFS_BUF_LOCK_TRACKING 1 | 24 | #define XFS_BUF_LOCK_TRACKING 1 |
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | #include <linux-2.6/xfs_linux.h> | 27 | #include "xfs_linux.h" |
28 | |||
28 | #endif /* __XFS_H__ */ | 29 | #endif /* __XFS_H__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/xfs_acl.c index b6c4b3795c4a..b6c4b3795c4a 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 6530769a999b..4805f009f923 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -103,7 +103,7 @@ typedef struct xfs_agf { | |||
103 | /* disk block (xfs_daddr_t) in the AG */ | 103 | /* disk block (xfs_daddr_t) in the AG */ |
104 | #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) | 104 | #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) |
105 | #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) | 105 | #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) |
106 | #define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) | 106 | #define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr)) |
107 | 107 | ||
108 | extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, | 108 | extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, |
109 | xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); | 109 | xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); |
@@ -156,7 +156,7 @@ typedef struct xfs_agi { | |||
156 | /* disk block (xfs_daddr_t) in the AG */ | 156 | /* disk block (xfs_daddr_t) in the AG */ |
157 | #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) | 157 | #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) |
158 | #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) | 158 | #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) |
159 | #define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) | 159 | #define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr)) |
160 | 160 | ||
161 | extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, | 161 | extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, |
162 | xfs_agnumber_t agno, struct xfs_buf **bpp); | 162 | xfs_agnumber_t agno, struct xfs_buf **bpp); |
@@ -168,7 +168,7 @@ extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, | |||
168 | #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) | 168 | #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) |
169 | #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) | 169 | #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) |
170 | #define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t)) | 170 | #define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t)) |
171 | #define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp)) | 171 | #define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) |
172 | 172 | ||
173 | typedef struct xfs_agfl { | 173 | typedef struct xfs_agfl { |
174 | __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ | 174 | __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 1e00b3ef6274..ce84ffd0264c 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -451,9 +451,8 @@ xfs_alloc_read_agfl( | |||
451 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 451 | XFS_FSS_TO_BB(mp, 1), 0, &bp); |
452 | if (error) | 452 | if (error) |
453 | return error; | 453 | return error; |
454 | ASSERT(bp); | 454 | ASSERT(!xfs_buf_geterror(bp)); |
455 | ASSERT(!XFS_BUF_GETERROR(bp)); | 455 | xfs_buf_set_ref(bp, XFS_AGFL_REF); |
456 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); | ||
457 | *bpp = bp; | 456 | *bpp = bp; |
458 | return 0; | 457 | return 0; |
459 | } | 458 | } |
@@ -2116,7 +2115,7 @@ xfs_read_agf( | |||
2116 | if (!*bpp) | 2115 | if (!*bpp) |
2117 | return 0; | 2116 | return 0; |
2118 | 2117 | ||
2119 | ASSERT(!XFS_BUF_GETERROR(*bpp)); | 2118 | ASSERT(!(*bpp)->b_error); |
2120 | agf = XFS_BUF_TO_AGF(*bpp); | 2119 | agf = XFS_BUF_TO_AGF(*bpp); |
2121 | 2120 | ||
2122 | /* | 2121 | /* |
@@ -2140,7 +2139,7 @@ xfs_read_agf( | |||
2140 | xfs_trans_brelse(tp, *bpp); | 2139 | xfs_trans_brelse(tp, *bpp); |
2141 | return XFS_ERROR(EFSCORRUPTED); | 2140 | return XFS_ERROR(EFSCORRUPTED); |
2142 | } | 2141 | } |
2143 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); | 2142 | xfs_buf_set_ref(*bpp, XFS_AGF_REF); |
2144 | return 0; | 2143 | return 0; |
2145 | } | 2144 | } |
2146 | 2145 | ||
@@ -2168,7 +2167,7 @@ xfs_alloc_read_agf( | |||
2168 | return error; | 2167 | return error; |
2169 | if (!*bpp) | 2168 | if (!*bpp) |
2170 | return 0; | 2169 | return 0; |
2171 | ASSERT(!XFS_BUF_GETERROR(*bpp)); | 2170 | ASSERT(!(*bpp)->b_error); |
2172 | 2171 | ||
2173 | agf = XFS_BUF_TO_AGF(*bpp); | 2172 | agf = XFS_BUF_TO_AGF(*bpp); |
2174 | pag = xfs_perag_get(mp, agno); | 2173 | pag = xfs_perag_get(mp, agno); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/xfs_aops.c index 63e971e2b837..11b2aad982d4 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -38,40 +38,6 @@ | |||
38 | #include <linux/pagevec.h> | 38 | #include <linux/pagevec.h> |
39 | #include <linux/writeback.h> | 39 | #include <linux/writeback.h> |
40 | 40 | ||
41 | |||
42 | /* | ||
43 | * Prime number of hash buckets since address is used as the key. | ||
44 | */ | ||
45 | #define NVSYNC 37 | ||
46 | #define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) | ||
47 | static wait_queue_head_t xfs_ioend_wq[NVSYNC]; | ||
48 | |||
49 | void __init | ||
50 | xfs_ioend_init(void) | ||
51 | { | ||
52 | int i; | ||
53 | |||
54 | for (i = 0; i < NVSYNC; i++) | ||
55 | init_waitqueue_head(&xfs_ioend_wq[i]); | ||
56 | } | ||
57 | |||
58 | void | ||
59 | xfs_ioend_wait( | ||
60 | xfs_inode_t *ip) | ||
61 | { | ||
62 | wait_queue_head_t *wq = to_ioend_wq(ip); | ||
63 | |||
64 | wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); | ||
65 | } | ||
66 | |||
67 | STATIC void | ||
68 | xfs_ioend_wake( | ||
69 | xfs_inode_t *ip) | ||
70 | { | ||
71 | if (atomic_dec_and_test(&ip->i_iocount)) | ||
72 | wake_up(to_ioend_wq(ip)); | ||
73 | } | ||
74 | |||
75 | void | 41 | void |
76 | xfs_count_page_state( | 42 | xfs_count_page_state( |
77 | struct page *page, | 43 | struct page *page, |
@@ -115,25 +81,20 @@ xfs_destroy_ioend( | |||
115 | xfs_ioend_t *ioend) | 81 | xfs_ioend_t *ioend) |
116 | { | 82 | { |
117 | struct buffer_head *bh, *next; | 83 | struct buffer_head *bh, *next; |
118 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | ||
119 | 84 | ||
120 | for (bh = ioend->io_buffer_head; bh; bh = next) { | 85 | for (bh = ioend->io_buffer_head; bh; bh = next) { |
121 | next = bh->b_private; | 86 | next = bh->b_private; |
122 | bh->b_end_io(bh, !ioend->io_error); | 87 | bh->b_end_io(bh, !ioend->io_error); |
123 | } | 88 | } |
124 | 89 | ||
125 | /* | 90 | if (ioend->io_iocb) { |
126 | * Volume managers supporting multiple paths can send back ENODEV | 91 | if (ioend->io_isasync) { |
127 | * when the final path disappears. In this case continuing to fill | 92 | aio_complete(ioend->io_iocb, ioend->io_error ? |
128 | * the page cache with dirty data which cannot be written out is | 93 | ioend->io_error : ioend->io_result, 0); |
129 | * evil, so prevent that. | 94 | } |
130 | */ | 95 | inode_dio_done(ioend->io_inode); |
131 | if (unlikely(ioend->io_error == -ENODEV)) { | ||
132 | xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, | ||
133 | __FILE__, __LINE__); | ||
134 | } | 96 | } |
135 | 97 | ||
136 | xfs_ioend_wake(ip); | ||
137 | mempool_free(ioend, xfs_ioend_pool); | 98 | mempool_free(ioend, xfs_ioend_pool); |
138 | } | 99 | } |
139 | 100 | ||
@@ -156,6 +117,15 @@ xfs_ioend_new_eof( | |||
156 | } | 117 | } |
157 | 118 | ||
158 | /* | 119 | /* |
120 | * Fast and loose check if this write could update the on-disk inode size. | ||
121 | */ | ||
122 | static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) | ||
123 | { | ||
124 | return ioend->io_offset + ioend->io_size > | ||
125 | XFS_I(ioend->io_inode)->i_d.di_size; | ||
126 | } | ||
127 | |||
128 | /* | ||
159 | * Update on-disk file size now that data has been written to disk. The | 129 | * Update on-disk file size now that data has been written to disk. The |
160 | * current in-memory file size is i_size. If a write is beyond eof i_new_size | 130 | * current in-memory file size is i_size. If a write is beyond eof i_new_size |
161 | * will be the intended file size until i_size is updated. If this write does | 131 | * will be the intended file size until i_size is updated. If this write does |
@@ -173,9 +143,6 @@ xfs_setfilesize( | |||
173 | xfs_inode_t *ip = XFS_I(ioend->io_inode); | 143 | xfs_inode_t *ip = XFS_I(ioend->io_inode); |
174 | xfs_fsize_t isize; | 144 | xfs_fsize_t isize; |
175 | 145 | ||
176 | if (unlikely(ioend->io_error)) | ||
177 | return 0; | ||
178 | |||
179 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) | 146 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) |
180 | return EAGAIN; | 147 | return EAGAIN; |
181 | 148 | ||
@@ -192,6 +159,9 @@ xfs_setfilesize( | |||
192 | 159 | ||
193 | /* | 160 | /* |
194 | * Schedule IO completion handling on the final put of an ioend. | 161 | * Schedule IO completion handling on the final put of an ioend. |
162 | * | ||
163 | * If there is no work to do we might as well call it a day and free the | ||
164 | * ioend right now. | ||
195 | */ | 165 | */ |
196 | STATIC void | 166 | STATIC void |
197 | xfs_finish_ioend( | 167 | xfs_finish_ioend( |
@@ -200,8 +170,10 @@ xfs_finish_ioend( | |||
200 | if (atomic_dec_and_test(&ioend->io_remaining)) { | 170 | if (atomic_dec_and_test(&ioend->io_remaining)) { |
201 | if (ioend->io_type == IO_UNWRITTEN) | 171 | if (ioend->io_type == IO_UNWRITTEN) |
202 | queue_work(xfsconvertd_workqueue, &ioend->io_work); | 172 | queue_work(xfsconvertd_workqueue, &ioend->io_work); |
203 | else | 173 | else if (xfs_ioend_is_append(ioend)) |
204 | queue_work(xfsdatad_workqueue, &ioend->io_work); | 174 | queue_work(xfsdatad_workqueue, &ioend->io_work); |
175 | else | ||
176 | xfs_destroy_ioend(ioend); | ||
205 | } | 177 | } |
206 | } | 178 | } |
207 | 179 | ||
@@ -216,17 +188,24 @@ xfs_end_io( | |||
216 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | 188 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
217 | int error = 0; | 189 | int error = 0; |
218 | 190 | ||
191 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
192 | error = -EIO; | ||
193 | goto done; | ||
194 | } | ||
195 | if (ioend->io_error) | ||
196 | goto done; | ||
197 | |||
219 | /* | 198 | /* |
220 | * For unwritten extents we need to issue transactions to convert a | 199 | * For unwritten extents we need to issue transactions to convert a |
221 | * range to normal written extens after the data I/O has finished. | 200 | * range to normal written extens after the data I/O has finished. |
222 | */ | 201 | */ |
223 | if (ioend->io_type == IO_UNWRITTEN && | 202 | if (ioend->io_type == IO_UNWRITTEN) { |
224 | likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { | ||
225 | |||
226 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 203 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
227 | ioend->io_size); | 204 | ioend->io_size); |
228 | if (error) | 205 | if (error) { |
229 | ioend->io_error = error; | 206 | ioend->io_error = -error; |
207 | goto done; | ||
208 | } | ||
230 | } | 209 | } |
231 | 210 | ||
232 | /* | 211 | /* |
@@ -236,6 +215,7 @@ xfs_end_io( | |||
236 | error = xfs_setfilesize(ioend); | 215 | error = xfs_setfilesize(ioend); |
237 | ASSERT(!error || error == EAGAIN); | 216 | ASSERT(!error || error == EAGAIN); |
238 | 217 | ||
218 | done: | ||
239 | /* | 219 | /* |
240 | * If we didn't complete processing of the ioend, requeue it to the | 220 | * If we didn't complete processing of the ioend, requeue it to the |
241 | * tail of the workqueue for another attempt later. Otherwise destroy | 221 | * tail of the workqueue for another attempt later. Otherwise destroy |
@@ -247,8 +227,6 @@ xfs_end_io( | |||
247 | /* ensure we don't spin on blocked ioends */ | 227 | /* ensure we don't spin on blocked ioends */ |
248 | delay(1); | 228 | delay(1); |
249 | } else { | 229 | } else { |
250 | if (ioend->io_iocb) | ||
251 | aio_complete(ioend->io_iocb, ioend->io_result, 0); | ||
252 | xfs_destroy_ioend(ioend); | 230 | xfs_destroy_ioend(ioend); |
253 | } | 231 | } |
254 | } | 232 | } |
@@ -285,13 +263,13 @@ xfs_alloc_ioend( | |||
285 | * all the I/O from calling the completion routine too early. | 263 | * all the I/O from calling the completion routine too early. |
286 | */ | 264 | */ |
287 | atomic_set(&ioend->io_remaining, 1); | 265 | atomic_set(&ioend->io_remaining, 1); |
266 | ioend->io_isasync = 0; | ||
288 | ioend->io_error = 0; | 267 | ioend->io_error = 0; |
289 | ioend->io_list = NULL; | 268 | ioend->io_list = NULL; |
290 | ioend->io_type = type; | 269 | ioend->io_type = type; |
291 | ioend->io_inode = inode; | 270 | ioend->io_inode = inode; |
292 | ioend->io_buffer_head = NULL; | 271 | ioend->io_buffer_head = NULL; |
293 | ioend->io_buffer_tail = NULL; | 272 | ioend->io_buffer_tail = NULL; |
294 | atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); | ||
295 | ioend->io_offset = 0; | 273 | ioend->io_offset = 0; |
296 | ioend->io_size = 0; | 274 | ioend->io_size = 0; |
297 | ioend->io_iocb = NULL; | 275 | ioend->io_iocb = NULL; |
@@ -337,8 +315,8 @@ xfs_map_blocks( | |||
337 | count = mp->m_maxioffset - offset; | 315 | count = mp->m_maxioffset - offset; |
338 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | 316 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); |
339 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 317 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
340 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, | 318 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, |
341 | bmapi_flags, NULL, 0, imap, &nimaps, NULL); | 319 | imap, &nimaps, bmapi_flags); |
342 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 320 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
343 | 321 | ||
344 | if (error) | 322 | if (error) |
@@ -551,7 +529,6 @@ xfs_cancel_ioend( | |||
551 | unlock_buffer(bh); | 529 | unlock_buffer(bh); |
552 | } while ((bh = next_bh) != NULL); | 530 | } while ((bh = next_bh) != NULL); |
553 | 531 | ||
554 | xfs_ioend_wake(XFS_I(ioend->io_inode)); | ||
555 | mempool_free(ioend, xfs_ioend_pool); | 532 | mempool_free(ioend, xfs_ioend_pool); |
556 | } while ((ioend = next) != NULL); | 533 | } while ((ioend = next) != NULL); |
557 | } | 534 | } |
@@ -1161,8 +1138,8 @@ __xfs_get_blocks( | |||
1161 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); | 1138 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); |
1162 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 1139 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
1163 | 1140 | ||
1164 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, | 1141 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, |
1165 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); | 1142 | &imap, &nimaps, XFS_BMAPI_ENTIRE); |
1166 | if (error) | 1143 | if (error) |
1167 | goto out_unlock; | 1144 | goto out_unlock; |
1168 | 1145 | ||
@@ -1310,28 +1287,17 @@ xfs_end_io_direct_write( | |||
1310 | 1287 | ||
1311 | ioend->io_offset = offset; | 1288 | ioend->io_offset = offset; |
1312 | ioend->io_size = size; | 1289 | ioend->io_size = size; |
1290 | ioend->io_iocb = iocb; | ||
1291 | ioend->io_result = ret; | ||
1313 | if (private && size > 0) | 1292 | if (private && size > 0) |
1314 | ioend->io_type = IO_UNWRITTEN; | 1293 | ioend->io_type = IO_UNWRITTEN; |
1315 | 1294 | ||
1316 | if (is_async) { | 1295 | if (is_async) { |
1317 | /* | 1296 | ioend->io_isasync = 1; |
1318 | * If we are converting an unwritten extent we need to delay | ||
1319 | * the AIO completion until after the unwrittent extent | ||
1320 | * conversion has completed, otherwise do it ASAP. | ||
1321 | */ | ||
1322 | if (ioend->io_type == IO_UNWRITTEN) { | ||
1323 | ioend->io_iocb = iocb; | ||
1324 | ioend->io_result = ret; | ||
1325 | } else { | ||
1326 | aio_complete(iocb, ret, 0); | ||
1327 | } | ||
1328 | xfs_finish_ioend(ioend); | 1297 | xfs_finish_ioend(ioend); |
1329 | } else { | 1298 | } else { |
1330 | xfs_finish_ioend_sync(ioend); | 1299 | xfs_finish_ioend_sync(ioend); |
1331 | } | 1300 | } |
1332 | |||
1333 | /* XXX: probably should move into the real I/O completion handler */ | ||
1334 | inode_dio_done(ioend->io_inode); | ||
1335 | } | 1301 | } |
1336 | 1302 | ||
1337 | STATIC ssize_t | 1303 | STATIC ssize_t |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/xfs_aops.h index 71f721e1a71f..116dd5c37034 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/xfs_aops.h | |||
@@ -47,6 +47,7 @@ typedef struct xfs_ioend { | |||
47 | unsigned int io_type; /* delalloc / unwritten */ | 47 | unsigned int io_type; /* delalloc / unwritten */ |
48 | int io_error; /* I/O error code */ | 48 | int io_error; /* I/O error code */ |
49 | atomic_t io_remaining; /* hold count */ | 49 | atomic_t io_remaining; /* hold count */ |
50 | unsigned int io_isasync : 1; /* needs aio_complete */ | ||
50 | struct inode *io_inode; /* file being written to */ | 51 | struct inode *io_inode; /* file being written to */ |
51 | struct buffer_head *io_buffer_head;/* buffer linked list head */ | 52 | struct buffer_head *io_buffer_head;/* buffer linked list head */ |
52 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ | 53 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ |
@@ -60,9 +61,6 @@ typedef struct xfs_ioend { | |||
60 | extern const struct address_space_operations xfs_address_space_operations; | 61 | extern const struct address_space_operations xfs_address_space_operations; |
61 | extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); | 62 | extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); |
62 | 63 | ||
63 | extern void xfs_ioend_init(void); | ||
64 | extern void xfs_ioend_wait(struct xfs_inode *); | ||
65 | |||
66 | extern void xfs_count_page_state(struct page *, int *, int *); | 64 | extern void xfs_count_page_state(struct page *, int *, int *); |
67 | 65 | ||
68 | #endif /* __XFS_AOPS_H__ */ | 66 | #endif /* __XFS_AOPS_H__ */ |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index cbae424fe1ba..1e5d97f86ea8 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -319,7 +319,7 @@ xfs_attr_set_int( | |||
319 | return (error); | 319 | return (error); |
320 | } | 320 | } |
321 | 321 | ||
322 | xfs_trans_ijoin(args.trans, dp); | 322 | xfs_trans_ijoin(args.trans, dp, 0); |
323 | 323 | ||
324 | /* | 324 | /* |
325 | * If the attribute list is non-existent or a shortform list, | 325 | * If the attribute list is non-existent or a shortform list, |
@@ -389,7 +389,7 @@ xfs_attr_set_int( | |||
389 | * a new one. We need the inode to be in all transactions. | 389 | * a new one. We need the inode to be in all transactions. |
390 | */ | 390 | */ |
391 | if (committed) | 391 | if (committed) |
392 | xfs_trans_ijoin(args.trans, dp); | 392 | xfs_trans_ijoin(args.trans, dp, 0); |
393 | 393 | ||
394 | /* | 394 | /* |
395 | * Commit the leaf transformation. We'll need another (linked) | 395 | * Commit the leaf transformation. We'll need another (linked) |
@@ -537,7 +537,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) | |||
537 | * No need to make quota reservations here. We expect to release some | 537 | * No need to make quota reservations here. We expect to release some |
538 | * blocks not allocate in the common case. | 538 | * blocks not allocate in the common case. |
539 | */ | 539 | */ |
540 | xfs_trans_ijoin(args.trans, dp); | 540 | xfs_trans_ijoin(args.trans, dp, 0); |
541 | 541 | ||
542 | /* | 542 | /* |
543 | * Decide on what work routines to call based on the inode size. | 543 | * Decide on what work routines to call based on the inode size. |
@@ -809,7 +809,7 @@ xfs_attr_inactive(xfs_inode_t *dp) | |||
809 | * No need to make quota reservations here. We expect to release some | 809 | * No need to make quota reservations here. We expect to release some |
810 | * blocks, not allocate, in the common case. | 810 | * blocks, not allocate, in the common case. |
811 | */ | 811 | */ |
812 | xfs_trans_ijoin(trans, dp); | 812 | xfs_trans_ijoin(trans, dp, 0); |
813 | 813 | ||
814 | /* | 814 | /* |
815 | * Decide on what work routines to call based on the inode size. | 815 | * Decide on what work routines to call based on the inode size. |
@@ -823,18 +823,6 @@ xfs_attr_inactive(xfs_inode_t *dp) | |||
823 | if (error) | 823 | if (error) |
824 | goto out; | 824 | goto out; |
825 | 825 | ||
826 | /* | ||
827 | * Signal synchronous inactive transactions unless this is a | ||
828 | * synchronous mount filesystem in which case we know that we're here | ||
829 | * because we've been called out of xfs_inactive which means that the | ||
830 | * last reference is gone and the unlink transaction has already hit | ||
831 | * the disk so async inactive transactions are safe. | ||
832 | */ | ||
833 | if (!(mp->m_flags & XFS_MOUNT_WSYNC)) { | ||
834 | if (dp->i_d.di_anextents > 0) | ||
835 | xfs_trans_set_sync(trans); | ||
836 | } | ||
837 | |||
838 | error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); | 826 | error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); |
839 | if (error) | 827 | if (error) |
840 | goto out; | 828 | goto out; |
@@ -973,7 +961,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) | |||
973 | * a new one. We need the inode to be in all transactions. | 961 | * a new one. We need the inode to be in all transactions. |
974 | */ | 962 | */ |
975 | if (committed) | 963 | if (committed) |
976 | xfs_trans_ijoin(args->trans, dp); | 964 | xfs_trans_ijoin(args->trans, dp, 0); |
977 | 965 | ||
978 | /* | 966 | /* |
979 | * Commit the current trans (including the inode) and start | 967 | * Commit the current trans (including the inode) and start |
@@ -1075,7 +1063,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) | |||
1075 | * in all transactions. | 1063 | * in all transactions. |
1076 | */ | 1064 | */ |
1077 | if (committed) | 1065 | if (committed) |
1078 | xfs_trans_ijoin(args->trans, dp); | 1066 | xfs_trans_ijoin(args->trans, dp, 0); |
1079 | } else | 1067 | } else |
1080 | xfs_da_buf_done(bp); | 1068 | xfs_da_buf_done(bp); |
1081 | 1069 | ||
@@ -1149,7 +1137,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) | |||
1149 | * a new one. We need the inode to be in all transactions. | 1137 | * a new one. We need the inode to be in all transactions. |
1150 | */ | 1138 | */ |
1151 | if (committed) | 1139 | if (committed) |
1152 | xfs_trans_ijoin(args->trans, dp); | 1140 | xfs_trans_ijoin(args->trans, dp, 0); |
1153 | } else | 1141 | } else |
1154 | xfs_da_buf_done(bp); | 1142 | xfs_da_buf_done(bp); |
1155 | return(0); | 1143 | return(0); |
@@ -1303,7 +1291,7 @@ restart: | |||
1303 | * in all transactions. | 1291 | * in all transactions. |
1304 | */ | 1292 | */ |
1305 | if (committed) | 1293 | if (committed) |
1306 | xfs_trans_ijoin(args->trans, dp); | 1294 | xfs_trans_ijoin(args->trans, dp, 0); |
1307 | 1295 | ||
1308 | /* | 1296 | /* |
1309 | * Commit the node conversion and start the next | 1297 | * Commit the node conversion and start the next |
@@ -1340,7 +1328,7 @@ restart: | |||
1340 | * a new one. We need the inode to be in all transactions. | 1328 | * a new one. We need the inode to be in all transactions. |
1341 | */ | 1329 | */ |
1342 | if (committed) | 1330 | if (committed) |
1343 | xfs_trans_ijoin(args->trans, dp); | 1331 | xfs_trans_ijoin(args->trans, dp, 0); |
1344 | } else { | 1332 | } else { |
1345 | /* | 1333 | /* |
1346 | * Addition succeeded, update Btree hashvals. | 1334 | * Addition succeeded, update Btree hashvals. |
@@ -1452,7 +1440,7 @@ restart: | |||
1452 | * in all transactions. | 1440 | * in all transactions. |
1453 | */ | 1441 | */ |
1454 | if (committed) | 1442 | if (committed) |
1455 | xfs_trans_ijoin(args->trans, dp); | 1443 | xfs_trans_ijoin(args->trans, dp, 0); |
1456 | } | 1444 | } |
1457 | 1445 | ||
1458 | /* | 1446 | /* |
@@ -1584,7 +1572,7 @@ xfs_attr_node_removename(xfs_da_args_t *args) | |||
1584 | * a new one. We need the inode to be in all transactions. | 1572 | * a new one. We need the inode to be in all transactions. |
1585 | */ | 1573 | */ |
1586 | if (committed) | 1574 | if (committed) |
1587 | xfs_trans_ijoin(args->trans, dp); | 1575 | xfs_trans_ijoin(args->trans, dp, 0); |
1588 | 1576 | ||
1589 | /* | 1577 | /* |
1590 | * Commit the Btree join operation and start a new trans. | 1578 | * Commit the Btree join operation and start a new trans. |
@@ -1635,7 +1623,7 @@ xfs_attr_node_removename(xfs_da_args_t *args) | |||
1635 | * in all transactions. | 1623 | * in all transactions. |
1636 | */ | 1624 | */ |
1637 | if (committed) | 1625 | if (committed) |
1638 | xfs_trans_ijoin(args->trans, dp); | 1626 | xfs_trans_ijoin(args->trans, dp, 0); |
1639 | } else | 1627 | } else |
1640 | xfs_da_brelse(args->trans, bp); | 1628 | xfs_da_brelse(args->trans, bp); |
1641 | } | 1629 | } |
@@ -1975,10 +1963,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) | |||
1975 | lblkno = args->rmtblkno; | 1963 | lblkno = args->rmtblkno; |
1976 | while (valuelen > 0) { | 1964 | while (valuelen > 0) { |
1977 | nmap = ATTR_RMTVALUE_MAPSIZE; | 1965 | nmap = ATTR_RMTVALUE_MAPSIZE; |
1978 | error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno, | 1966 | error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, |
1979 | args->rmtblkcnt, | 1967 | args->rmtblkcnt, map, &nmap, |
1980 | XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, | 1968 | XFS_BMAPI_ATTRFORK); |
1981 | NULL, 0, map, &nmap, NULL); | ||
1982 | if (error) | 1969 | if (error) |
1983 | return(error); | 1970 | return(error); |
1984 | ASSERT(nmap >= 1); | 1971 | ASSERT(nmap >= 1); |
@@ -2052,10 +2039,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2052 | */ | 2039 | */ |
2053 | xfs_bmap_init(args->flist, args->firstblock); | 2040 | xfs_bmap_init(args->flist, args->firstblock); |
2054 | nmap = 1; | 2041 | nmap = 1; |
2055 | error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno, | 2042 | error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, |
2056 | blkcnt, | 2043 | blkcnt, |
2057 | XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA | | 2044 | XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, |
2058 | XFS_BMAPI_WRITE, | ||
2059 | args->firstblock, args->total, &map, &nmap, | 2045 | args->firstblock, args->total, &map, &nmap, |
2060 | args->flist); | 2046 | args->flist); |
2061 | if (!error) { | 2047 | if (!error) { |
@@ -2074,7 +2060,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2074 | * a new one. We need the inode to be in all transactions. | 2060 | * a new one. We need the inode to be in all transactions. |
2075 | */ | 2061 | */ |
2076 | if (committed) | 2062 | if (committed) |
2077 | xfs_trans_ijoin(args->trans, dp); | 2063 | xfs_trans_ijoin(args->trans, dp, 0); |
2078 | 2064 | ||
2079 | ASSERT(nmap == 1); | 2065 | ASSERT(nmap == 1); |
2080 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && | 2066 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && |
@@ -2104,14 +2090,11 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2104 | */ | 2090 | */ |
2105 | xfs_bmap_init(args->flist, args->firstblock); | 2091 | xfs_bmap_init(args->flist, args->firstblock); |
2106 | nmap = 1; | 2092 | nmap = 1; |
2107 | error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno, | 2093 | error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, |
2108 | args->rmtblkcnt, | 2094 | args->rmtblkcnt, &map, &nmap, |
2109 | XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, | 2095 | XFS_BMAPI_ATTRFORK); |
2110 | args->firstblock, 0, &map, &nmap, | 2096 | if (error) |
2111 | NULL); | ||
2112 | if (error) { | ||
2113 | return(error); | 2097 | return(error); |
2114 | } | ||
2115 | ASSERT(nmap == 1); | 2098 | ASSERT(nmap == 1); |
2116 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && | 2099 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && |
2117 | (map.br_startblock != HOLESTARTBLOCK)); | 2100 | (map.br_startblock != HOLESTARTBLOCK)); |
@@ -2121,17 +2104,17 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2121 | 2104 | ||
2122 | bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, | 2105 | bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, |
2123 | XBF_LOCK | XBF_DONT_BLOCK); | 2106 | XBF_LOCK | XBF_DONT_BLOCK); |
2124 | ASSERT(bp); | 2107 | if (!bp) |
2125 | ASSERT(!XFS_BUF_GETERROR(bp)); | 2108 | return ENOMEM; |
2126 | |||
2127 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : | 2109 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : |
2128 | XFS_BUF_SIZE(bp); | 2110 | XFS_BUF_SIZE(bp); |
2129 | xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); | 2111 | xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); |
2130 | if (tmp < XFS_BUF_SIZE(bp)) | 2112 | if (tmp < XFS_BUF_SIZE(bp)) |
2131 | xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); | 2113 | xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); |
2132 | if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ | 2114 | error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ |
2133 | return (error); | 2115 | xfs_buf_relse(bp); |
2134 | } | 2116 | if (error) |
2117 | return error; | ||
2135 | src += tmp; | 2118 | src += tmp; |
2136 | valuelen -= tmp; | 2119 | valuelen -= tmp; |
2137 | 2120 | ||
@@ -2167,16 +2150,12 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) | |||
2167 | /* | 2150 | /* |
2168 | * Try to remember where we decided to put the value. | 2151 | * Try to remember where we decided to put the value. |
2169 | */ | 2152 | */ |
2170 | xfs_bmap_init(args->flist, args->firstblock); | ||
2171 | nmap = 1; | 2153 | nmap = 1; |
2172 | error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno, | 2154 | error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, |
2173 | args->rmtblkcnt, | 2155 | args->rmtblkcnt, &map, &nmap, |
2174 | XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, | 2156 | XFS_BMAPI_ATTRFORK); |
2175 | args->firstblock, 0, &map, &nmap, | 2157 | if (error) |
2176 | args->flist); | ||
2177 | if (error) { | ||
2178 | return(error); | 2158 | return(error); |
2179 | } | ||
2180 | ASSERT(nmap == 1); | 2159 | ASSERT(nmap == 1); |
2181 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && | 2160 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && |
2182 | (map.br_startblock != HOLESTARTBLOCK)); | 2161 | (map.br_startblock != HOLESTARTBLOCK)); |
@@ -2189,8 +2168,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) | |||
2189 | */ | 2168 | */ |
2190 | bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK); | 2169 | bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK); |
2191 | if (bp) { | 2170 | if (bp) { |
2192 | XFS_BUF_STALE(bp); | 2171 | xfs_buf_stale(bp); |
2193 | XFS_BUF_UNDELAYWRITE(bp); | ||
2194 | xfs_buf_relse(bp); | 2172 | xfs_buf_relse(bp); |
2195 | bp = NULL; | 2173 | bp = NULL; |
2196 | } | 2174 | } |
@@ -2228,7 +2206,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) | |||
2228 | * a new one. We need the inode to be in all transactions. | 2206 | * a new one. We need the inode to be in all transactions. |
2229 | */ | 2207 | */ |
2230 | if (committed) | 2208 | if (committed) |
2231 | xfs_trans_ijoin(args->trans, args->dp); | 2209 | xfs_trans_ijoin(args->trans, args->dp, 0); |
2232 | 2210 | ||
2233 | /* | 2211 | /* |
2234 | * Close out trans and start the next one in the chain. | 2212 | * Close out trans and start the next one in the chain. |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 8fad9602542b..d4906e7c9787 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -2926,9 +2926,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, | |||
2926 | * Try to remember where we decided to put the value. | 2926 | * Try to remember where we decided to put the value. |
2927 | */ | 2927 | */ |
2928 | nmap = 1; | 2928 | nmap = 1; |
2929 | error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt, | 2929 | error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt, |
2930 | XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, | 2930 | &map, &nmap, XFS_BMAPI_ATTRFORK); |
2931 | NULL, 0, &map, &nmap, NULL); | ||
2932 | if (error) { | 2931 | if (error) { |
2933 | return(error); | 2932 | return(error); |
2934 | } | 2933 | } |
@@ -2948,6 +2947,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, | |||
2948 | bp = xfs_trans_get_buf(*trans, | 2947 | bp = xfs_trans_get_buf(*trans, |
2949 | dp->i_mount->m_ddev_targp, | 2948 | dp->i_mount->m_ddev_targp, |
2950 | dblkno, dblkcnt, XBF_LOCK); | 2949 | dblkno, dblkcnt, XBF_LOCK); |
2950 | if (!bp) | ||
2951 | return ENOMEM; | ||
2951 | xfs_trans_binval(*trans, bp); | 2952 | xfs_trans_binval(*trans, bp); |
2952 | /* | 2953 | /* |
2953 | * Roll to next transaction. | 2954 | * Roll to next transaction. |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index ab3e5c6c4642..c68baeb0974a 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -50,17 +50,22 @@ | |||
50 | #include "xfs_trace.h" | 50 | #include "xfs_trace.h" |
51 | 51 | ||
52 | 52 | ||
53 | #ifdef DEBUG | ||
54 | STATIC void | ||
55 | xfs_bmap_check_leaf_extents(xfs_btree_cur_t *cur, xfs_inode_t *ip, int whichfork); | ||
56 | #endif | ||
57 | |||
58 | kmem_zone_t *xfs_bmap_free_item_zone; | 53 | kmem_zone_t *xfs_bmap_free_item_zone; |
59 | 54 | ||
60 | /* | 55 | /* |
61 | * Prototypes for internal bmap routines. | 56 | * Prototypes for internal bmap routines. |
62 | */ | 57 | */ |
63 | 58 | ||
59 | #ifdef DEBUG | ||
60 | STATIC void | ||
61 | xfs_bmap_check_leaf_extents( | ||
62 | struct xfs_btree_cur *cur, | ||
63 | struct xfs_inode *ip, | ||
64 | int whichfork); | ||
65 | #else | ||
66 | #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) | ||
67 | #endif | ||
68 | |||
64 | 69 | ||
65 | /* | 70 | /* |
66 | * Called from xfs_bmap_add_attrfork to handle extents format files. | 71 | * Called from xfs_bmap_add_attrfork to handle extents format files. |
@@ -85,58 +90,6 @@ xfs_bmap_add_attrfork_local( | |||
85 | int *flags); /* inode logging flags */ | 90 | int *flags); /* inode logging flags */ |
86 | 91 | ||
87 | /* | 92 | /* |
88 | * Called by xfs_bmap_add_extent to handle cases converting a delayed | ||
89 | * allocation to a real allocation. | ||
90 | */ | ||
91 | STATIC int /* error */ | ||
92 | xfs_bmap_add_extent_delay_real( | ||
93 | struct xfs_trans *tp, /* transaction pointer */ | ||
94 | xfs_inode_t *ip, /* incore inode pointer */ | ||
95 | xfs_extnum_t *idx, /* extent number to update/insert */ | ||
96 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | ||
97 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
98 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ | ||
99 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | ||
100 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
101 | int *logflagsp); /* inode logging flags */ | ||
102 | |||
103 | /* | ||
104 | * Called by xfs_bmap_add_extent to handle cases converting a hole | ||
105 | * to a delayed allocation. | ||
106 | */ | ||
107 | STATIC int /* error */ | ||
108 | xfs_bmap_add_extent_hole_delay( | ||
109 | xfs_inode_t *ip, /* incore inode pointer */ | ||
110 | xfs_extnum_t *idx, /* extent number to update/insert */ | ||
111 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
112 | int *logflagsp); /* inode logging flags */ | ||
113 | |||
114 | /* | ||
115 | * Called by xfs_bmap_add_extent to handle cases converting a hole | ||
116 | * to a real allocation. | ||
117 | */ | ||
118 | STATIC int /* error */ | ||
119 | xfs_bmap_add_extent_hole_real( | ||
120 | xfs_inode_t *ip, /* incore inode pointer */ | ||
121 | xfs_extnum_t *idx, /* extent number to update/insert */ | ||
122 | xfs_btree_cur_t *cur, /* if null, not a btree */ | ||
123 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
124 | int *logflagsp, /* inode logging flags */ | ||
125 | int whichfork); /* data or attr fork */ | ||
126 | |||
127 | /* | ||
128 | * Called by xfs_bmap_add_extent to handle cases converting an unwritten | ||
129 | * allocation to a real allocation or vice versa. | ||
130 | */ | ||
131 | STATIC int /* error */ | ||
132 | xfs_bmap_add_extent_unwritten_real( | ||
133 | xfs_inode_t *ip, /* incore inode pointer */ | ||
134 | xfs_extnum_t *idx, /* extent number to update/insert */ | ||
135 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | ||
136 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
137 | int *logflagsp); /* inode logging flags */ | ||
138 | |||
139 | /* | ||
140 | * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. | 93 | * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. |
141 | * It figures out where to ask the underlying allocator to put the new extent. | 94 | * It figures out where to ask the underlying allocator to put the new extent. |
142 | */ | 95 | */ |
@@ -215,19 +168,6 @@ xfs_bmap_search_extents( | |||
215 | xfs_bmbt_irec_t *prevp); /* out: previous extent entry found */ | 168 | xfs_bmbt_irec_t *prevp); /* out: previous extent entry found */ |
216 | 169 | ||
217 | /* | 170 | /* |
218 | * Check the last inode extent to determine whether this allocation will result | ||
219 | * in blocks being allocated at the end of the file. When we allocate new data | ||
220 | * blocks at the end of the file which do not start at the previous data block, | ||
221 | * we will try to align the new blocks at stripe unit boundaries. | ||
222 | */ | ||
223 | STATIC int /* error */ | ||
224 | xfs_bmap_isaeof( | ||
225 | xfs_inode_t *ip, /* incore inode pointer */ | ||
226 | xfs_fileoff_t off, /* file offset in fsblocks */ | ||
227 | int whichfork, /* data or attribute fork */ | ||
228 | char *aeof); /* return value */ | ||
229 | |||
230 | /* | ||
231 | * Compute the worst-case number of indirect blocks that will be used | 171 | * Compute the worst-case number of indirect blocks that will be used |
232 | * for ip's delayed extent of length "len". | 172 | * for ip's delayed extent of length "len". |
233 | */ | 173 | */ |
@@ -431,188 +371,13 @@ xfs_bmap_add_attrfork_local( | |||
431 | } | 371 | } |
432 | 372 | ||
433 | /* | 373 | /* |
434 | * Called by xfs_bmapi to update file extent records and the btree | 374 | * Convert a delayed allocation to a real allocation. |
435 | * after allocating space (or doing a delayed allocation). | ||
436 | */ | ||
437 | STATIC int /* error */ | ||
438 | xfs_bmap_add_extent( | ||
439 | struct xfs_trans *tp, /* transaction pointer */ | ||
440 | xfs_inode_t *ip, /* incore inode pointer */ | ||
441 | xfs_extnum_t *idx, /* extent number to update/insert */ | ||
442 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | ||
443 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
444 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | ||
445 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
446 | int *logflagsp, /* inode logging flags */ | ||
447 | int whichfork) /* data or attr fork */ | ||
448 | { | ||
449 | xfs_btree_cur_t *cur; /* btree cursor or null */ | ||
450 | xfs_filblks_t da_new; /* new count del alloc blocks used */ | ||
451 | xfs_filblks_t da_old; /* old count del alloc blocks used */ | ||
452 | int error; /* error return value */ | ||
453 | xfs_ifork_t *ifp; /* inode fork ptr */ | ||
454 | int logflags; /* returned value */ | ||
455 | xfs_extnum_t nextents; /* number of extents in file now */ | ||
456 | |||
457 | XFS_STATS_INC(xs_add_exlist); | ||
458 | |||
459 | cur = *curp; | ||
460 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
461 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
462 | da_old = da_new = 0; | ||
463 | error = 0; | ||
464 | |||
465 | ASSERT(*idx >= 0); | ||
466 | ASSERT(*idx <= nextents); | ||
467 | |||
468 | /* | ||
469 | * This is the first extent added to a new/empty file. | ||
470 | * Special case this one, so other routines get to assume there are | ||
471 | * already extents in the list. | ||
472 | */ | ||
473 | if (nextents == 0) { | ||
474 | xfs_iext_insert(ip, *idx, 1, new, | ||
475 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); | ||
476 | |||
477 | ASSERT(cur == NULL); | ||
478 | |||
479 | if (!isnullstartblock(new->br_startblock)) { | ||
480 | XFS_IFORK_NEXT_SET(ip, whichfork, 1); | ||
481 | logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); | ||
482 | } else | ||
483 | logflags = 0; | ||
484 | } | ||
485 | /* | ||
486 | * Any kind of new delayed allocation goes here. | ||
487 | */ | ||
488 | else if (isnullstartblock(new->br_startblock)) { | ||
489 | if (cur) | ||
490 | ASSERT((cur->bc_private.b.flags & | ||
491 | XFS_BTCUR_BPRV_WASDEL) == 0); | ||
492 | error = xfs_bmap_add_extent_hole_delay(ip, idx, new, | ||
493 | &logflags); | ||
494 | } | ||
495 | /* | ||
496 | * Real allocation off the end of the file. | ||
497 | */ | ||
498 | else if (*idx == nextents) { | ||
499 | if (cur) | ||
500 | ASSERT((cur->bc_private.b.flags & | ||
501 | XFS_BTCUR_BPRV_WASDEL) == 0); | ||
502 | error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, | ||
503 | &logflags, whichfork); | ||
504 | } else { | ||
505 | xfs_bmbt_irec_t prev; /* old extent at offset idx */ | ||
506 | |||
507 | /* | ||
508 | * Get the record referred to by idx. | ||
509 | */ | ||
510 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &prev); | ||
511 | /* | ||
512 | * If it's a real allocation record, and the new allocation ends | ||
513 | * after the start of the referred to record, then we're filling | ||
514 | * in a delayed or unwritten allocation with a real one, or | ||
515 | * converting real back to unwritten. | ||
516 | */ | ||
517 | if (!isnullstartblock(new->br_startblock) && | ||
518 | new->br_startoff + new->br_blockcount > prev.br_startoff) { | ||
519 | if (prev.br_state != XFS_EXT_UNWRITTEN && | ||
520 | isnullstartblock(prev.br_startblock)) { | ||
521 | da_old = startblockval(prev.br_startblock); | ||
522 | if (cur) | ||
523 | ASSERT(cur->bc_private.b.flags & | ||
524 | XFS_BTCUR_BPRV_WASDEL); | ||
525 | error = xfs_bmap_add_extent_delay_real(tp, ip, | ||
526 | idx, &cur, new, &da_new, | ||
527 | first, flist, &logflags); | ||
528 | } else { | ||
529 | ASSERT(new->br_state == XFS_EXT_NORM || | ||
530 | new->br_state == XFS_EXT_UNWRITTEN); | ||
531 | |||
532 | error = xfs_bmap_add_extent_unwritten_real(ip, | ||
533 | idx, &cur, new, &logflags); | ||
534 | if (error) | ||
535 | goto done; | ||
536 | } | ||
537 | } | ||
538 | /* | ||
539 | * Otherwise we're filling in a hole with an allocation. | ||
540 | */ | ||
541 | else { | ||
542 | if (cur) | ||
543 | ASSERT((cur->bc_private.b.flags & | ||
544 | XFS_BTCUR_BPRV_WASDEL) == 0); | ||
545 | error = xfs_bmap_add_extent_hole_real(ip, idx, cur, | ||
546 | new, &logflags, whichfork); | ||
547 | } | ||
548 | } | ||
549 | |||
550 | if (error) | ||
551 | goto done; | ||
552 | ASSERT(*curp == cur || *curp == NULL); | ||
553 | |||
554 | /* | ||
555 | * Convert to a btree if necessary. | ||
556 | */ | ||
557 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && | ||
558 | XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) { | ||
559 | int tmp_logflags; /* partial log flag return val */ | ||
560 | |||
561 | ASSERT(cur == NULL); | ||
562 | error = xfs_bmap_extents_to_btree(tp, ip, first, | ||
563 | flist, &cur, da_old > 0, &tmp_logflags, whichfork); | ||
564 | logflags |= tmp_logflags; | ||
565 | if (error) | ||
566 | goto done; | ||
567 | } | ||
568 | /* | ||
569 | * Adjust for changes in reserved delayed indirect blocks. | ||
570 | * Nothing to do for disk quotas here. | ||
571 | */ | ||
572 | if (da_old || da_new) { | ||
573 | xfs_filblks_t nblks; | ||
574 | |||
575 | nblks = da_new; | ||
576 | if (cur) | ||
577 | nblks += cur->bc_private.b.allocated; | ||
578 | ASSERT(nblks <= da_old); | ||
579 | if (nblks < da_old) | ||
580 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, | ||
581 | (int64_t)(da_old - nblks), 0); | ||
582 | } | ||
583 | /* | ||
584 | * Clear out the allocated field, done with it now in any case. | ||
585 | */ | ||
586 | if (cur) { | ||
587 | cur->bc_private.b.allocated = 0; | ||
588 | *curp = cur; | ||
589 | } | ||
590 | done: | ||
591 | #ifdef DEBUG | ||
592 | if (!error) | ||
593 | xfs_bmap_check_leaf_extents(*curp, ip, whichfork); | ||
594 | #endif | ||
595 | *logflagsp = logflags; | ||
596 | return error; | ||
597 | } | ||
598 | |||
599 | /* | ||
600 | * Called by xfs_bmap_add_extent to handle cases converting a delayed | ||
601 | * allocation to a real allocation. | ||
602 | */ | 375 | */ |
603 | STATIC int /* error */ | 376 | STATIC int /* error */ |
604 | xfs_bmap_add_extent_delay_real( | 377 | xfs_bmap_add_extent_delay_real( |
605 | struct xfs_trans *tp, /* transaction pointer */ | 378 | struct xfs_bmalloca *bma) |
606 | xfs_inode_t *ip, /* incore inode pointer */ | ||
607 | xfs_extnum_t *idx, /* extent number to update/insert */ | ||
608 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | ||
609 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
610 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ | ||
611 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | ||
612 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
613 | int *logflagsp) /* inode logging flags */ | ||
614 | { | 379 | { |
615 | xfs_btree_cur_t *cur; /* btree cursor */ | 380 | struct xfs_bmbt_irec *new = &bma->got; |
616 | int diff; /* temp value */ | 381 | int diff; /* temp value */ |
617 | xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ | 382 | xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ |
618 | int error; /* error return value */ | 383 | int error; /* error return value */ |
@@ -623,10 +388,22 @@ xfs_bmap_add_extent_delay_real( | |||
623 | /* left is 0, right is 1, prev is 2 */ | 388 | /* left is 0, right is 1, prev is 2 */ |
624 | int rval=0; /* return value (logging flags) */ | 389 | int rval=0; /* return value (logging flags) */ |
625 | int state = 0;/* state bits, accessed thru macros */ | 390 | int state = 0;/* state bits, accessed thru macros */ |
626 | xfs_filblks_t temp=0; /* value for dnew calculations */ | 391 | xfs_filblks_t da_new; /* new count del alloc blocks used */ |
627 | xfs_filblks_t temp2=0;/* value for dnew calculations */ | 392 | xfs_filblks_t da_old; /* old count del alloc blocks used */ |
393 | xfs_filblks_t temp=0; /* value for da_new calculations */ | ||
394 | xfs_filblks_t temp2=0;/* value for da_new calculations */ | ||
628 | int tmp_rval; /* partial logging flags */ | 395 | int tmp_rval; /* partial logging flags */ |
629 | 396 | ||
397 | ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); | ||
398 | |||
399 | ASSERT(bma->idx >= 0); | ||
400 | ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); | ||
401 | ASSERT(!isnullstartblock(new->br_startblock)); | ||
402 | ASSERT(!bma->cur || | ||
403 | (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); | ||
404 | |||
405 | XFS_STATS_INC(xs_add_exlist); | ||
406 | |||
630 | #define LEFT r[0] | 407 | #define LEFT r[0] |
631 | #define RIGHT r[1] | 408 | #define RIGHT r[1] |
632 | #define PREV r[2] | 409 | #define PREV r[2] |
@@ -634,14 +411,15 @@ xfs_bmap_add_extent_delay_real( | |||
634 | /* | 411 | /* |
635 | * Set up a bunch of variables to make the tests simpler. | 412 | * Set up a bunch of variables to make the tests simpler. |
636 | */ | 413 | */ |
637 | cur = *curp; | 414 | ep = xfs_iext_get_ext(ifp, bma->idx); |
638 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
639 | ep = xfs_iext_get_ext(ifp, *idx); | ||
640 | xfs_bmbt_get_all(ep, &PREV); | 415 | xfs_bmbt_get_all(ep, &PREV); |
641 | new_endoff = new->br_startoff + new->br_blockcount; | 416 | new_endoff = new->br_startoff + new->br_blockcount; |
642 | ASSERT(PREV.br_startoff <= new->br_startoff); | 417 | ASSERT(PREV.br_startoff <= new->br_startoff); |
643 | ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); | 418 | ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); |
644 | 419 | ||
420 | da_old = startblockval(PREV.br_startblock); | ||
421 | da_new = 0; | ||
422 | |||
645 | /* | 423 | /* |
646 | * Set flags determining what part of the previous delayed allocation | 424 | * Set flags determining what part of the previous delayed allocation |
647 | * extent is being replaced by a real allocation. | 425 | * extent is being replaced by a real allocation. |
@@ -655,9 +433,9 @@ xfs_bmap_add_extent_delay_real( | |||
655 | * Check and set flags if this segment has a left neighbor. | 433 | * Check and set flags if this segment has a left neighbor. |
656 | * Don't set contiguous if the combined extent would be too large. | 434 | * Don't set contiguous if the combined extent would be too large. |
657 | */ | 435 | */ |
658 | if (*idx > 0) { | 436 | if (bma->idx > 0) { |
659 | state |= BMAP_LEFT_VALID; | 437 | state |= BMAP_LEFT_VALID; |
660 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); | 438 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT); |
661 | 439 | ||
662 | if (isnullstartblock(LEFT.br_startblock)) | 440 | if (isnullstartblock(LEFT.br_startblock)) |
663 | state |= BMAP_LEFT_DELAY; | 441 | state |= BMAP_LEFT_DELAY; |
@@ -675,9 +453,9 @@ xfs_bmap_add_extent_delay_real( | |||
675 | * Don't set contiguous if the combined extent would be too large. | 453 | * Don't set contiguous if the combined extent would be too large. |
676 | * Also check for all-three-contiguous being too large. | 454 | * Also check for all-three-contiguous being too large. |
677 | */ | 455 | */ |
678 | if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { | 456 | if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { |
679 | state |= BMAP_RIGHT_VALID; | 457 | state |= BMAP_RIGHT_VALID; |
680 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); | 458 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); |
681 | 459 | ||
682 | if (isnullstartblock(RIGHT.br_startblock)) | 460 | if (isnullstartblock(RIGHT.br_startblock)) |
683 | state |= BMAP_RIGHT_DELAY; | 461 | state |= BMAP_RIGHT_DELAY; |
@@ -708,38 +486,41 @@ xfs_bmap_add_extent_delay_real( | |||
708 | * Filling in all of a previously delayed allocation extent. | 486 | * Filling in all of a previously delayed allocation extent. |
709 | * The left and right neighbors are both contiguous with new. | 487 | * The left and right neighbors are both contiguous with new. |
710 | */ | 488 | */ |
711 | --*idx; | 489 | bma->idx--; |
712 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 490 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
713 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | 491 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), |
714 | LEFT.br_blockcount + PREV.br_blockcount + | 492 | LEFT.br_blockcount + PREV.br_blockcount + |
715 | RIGHT.br_blockcount); | 493 | RIGHT.br_blockcount); |
716 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 494 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
717 | 495 | ||
718 | xfs_iext_remove(ip, *idx + 1, 2, state); | 496 | xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); |
719 | ip->i_d.di_nextents--; | 497 | bma->ip->i_d.di_nextents--; |
720 | if (cur == NULL) | 498 | if (bma->cur == NULL) |
721 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 499 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
722 | else { | 500 | else { |
723 | rval = XFS_ILOG_CORE; | 501 | rval = XFS_ILOG_CORE; |
724 | if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, | 502 | error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, |
725 | RIGHT.br_startblock, | 503 | RIGHT.br_startblock, |
726 | RIGHT.br_blockcount, &i))) | 504 | RIGHT.br_blockcount, &i); |
505 | if (error) | ||
727 | goto done; | 506 | goto done; |
728 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 507 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
729 | if ((error = xfs_btree_delete(cur, &i))) | 508 | error = xfs_btree_delete(bma->cur, &i); |
509 | if (error) | ||
730 | goto done; | 510 | goto done; |
731 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 511 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
732 | if ((error = xfs_btree_decrement(cur, 0, &i))) | 512 | error = xfs_btree_decrement(bma->cur, 0, &i); |
513 | if (error) | ||
733 | goto done; | 514 | goto done; |
734 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 515 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
735 | if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, | 516 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, |
736 | LEFT.br_startblock, | 517 | LEFT.br_startblock, |
737 | LEFT.br_blockcount + | 518 | LEFT.br_blockcount + |
738 | PREV.br_blockcount + | 519 | PREV.br_blockcount + |
739 | RIGHT.br_blockcount, LEFT.br_state))) | 520 | RIGHT.br_blockcount, LEFT.br_state); |
521 | if (error) | ||
740 | goto done; | 522 | goto done; |
741 | } | 523 | } |
742 | *dnew = 0; | ||
743 | break; | 524 | break; |
744 | 525 | ||
745 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: | 526 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: |
@@ -747,30 +528,31 @@ xfs_bmap_add_extent_delay_real( | |||
747 | * Filling in all of a previously delayed allocation extent. | 528 | * Filling in all of a previously delayed allocation extent. |
748 | * The left neighbor is contiguous, the right is not. | 529 | * The left neighbor is contiguous, the right is not. |
749 | */ | 530 | */ |
750 | --*idx; | 531 | bma->idx--; |
751 | 532 | ||
752 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 533 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
753 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | 534 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), |
754 | LEFT.br_blockcount + PREV.br_blockcount); | 535 | LEFT.br_blockcount + PREV.br_blockcount); |
755 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 536 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
756 | 537 | ||
757 | xfs_iext_remove(ip, *idx + 1, 1, state); | 538 | xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); |
758 | if (cur == NULL) | 539 | if (bma->cur == NULL) |
759 | rval = XFS_ILOG_DEXT; | 540 | rval = XFS_ILOG_DEXT; |
760 | else { | 541 | else { |
761 | rval = 0; | 542 | rval = 0; |
762 | if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff, | 543 | error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, |
763 | LEFT.br_startblock, LEFT.br_blockcount, | 544 | LEFT.br_startblock, LEFT.br_blockcount, |
764 | &i))) | 545 | &i); |
546 | if (error) | ||
765 | goto done; | 547 | goto done; |
766 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 548 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
767 | if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, | 549 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, |
768 | LEFT.br_startblock, | 550 | LEFT.br_startblock, |
769 | LEFT.br_blockcount + | 551 | LEFT.br_blockcount + |
770 | PREV.br_blockcount, LEFT.br_state))) | 552 | PREV.br_blockcount, LEFT.br_state); |
553 | if (error) | ||
771 | goto done; | 554 | goto done; |
772 | } | 555 | } |
773 | *dnew = 0; | ||
774 | break; | 556 | break; |
775 | 557 | ||
776 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: | 558 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: |
@@ -778,30 +560,30 @@ xfs_bmap_add_extent_delay_real( | |||
778 | * Filling in all of a previously delayed allocation extent. | 560 | * Filling in all of a previously delayed allocation extent. |
779 | * The right neighbor is contiguous, the left is not. | 561 | * The right neighbor is contiguous, the left is not. |
780 | */ | 562 | */ |
781 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 563 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
782 | xfs_bmbt_set_startblock(ep, new->br_startblock); | 564 | xfs_bmbt_set_startblock(ep, new->br_startblock); |
783 | xfs_bmbt_set_blockcount(ep, | 565 | xfs_bmbt_set_blockcount(ep, |
784 | PREV.br_blockcount + RIGHT.br_blockcount); | 566 | PREV.br_blockcount + RIGHT.br_blockcount); |
785 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 567 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
786 | 568 | ||
787 | xfs_iext_remove(ip, *idx + 1, 1, state); | 569 | xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); |
788 | if (cur == NULL) | 570 | if (bma->cur == NULL) |
789 | rval = XFS_ILOG_DEXT; | 571 | rval = XFS_ILOG_DEXT; |
790 | else { | 572 | else { |
791 | rval = 0; | 573 | rval = 0; |
792 | if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, | 574 | error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, |
793 | RIGHT.br_startblock, | 575 | RIGHT.br_startblock, |
794 | RIGHT.br_blockcount, &i))) | 576 | RIGHT.br_blockcount, &i); |
577 | if (error) | ||
795 | goto done; | 578 | goto done; |
796 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 579 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
797 | if ((error = xfs_bmbt_update(cur, PREV.br_startoff, | 580 | error = xfs_bmbt_update(bma->cur, PREV.br_startoff, |
798 | new->br_startblock, | 581 | new->br_startblock, |
799 | PREV.br_blockcount + | 582 | PREV.br_blockcount + |
800 | RIGHT.br_blockcount, PREV.br_state))) | 583 | RIGHT.br_blockcount, PREV.br_state); |
584 | if (error) | ||
801 | goto done; | 585 | goto done; |
802 | } | 586 | } |
803 | |||
804 | *dnew = 0; | ||
805 | break; | 587 | break; |
806 | 588 | ||
807 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: | 589 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: |
@@ -810,27 +592,27 @@ xfs_bmap_add_extent_delay_real( | |||
810 | * Neither the left nor right neighbors are contiguous with | 592 | * Neither the left nor right neighbors are contiguous with |
811 | * the new one. | 593 | * the new one. |
812 | */ | 594 | */ |
813 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 595 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
814 | xfs_bmbt_set_startblock(ep, new->br_startblock); | 596 | xfs_bmbt_set_startblock(ep, new->br_startblock); |
815 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 597 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
816 | 598 | ||
817 | ip->i_d.di_nextents++; | 599 | bma->ip->i_d.di_nextents++; |
818 | if (cur == NULL) | 600 | if (bma->cur == NULL) |
819 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 601 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
820 | else { | 602 | else { |
821 | rval = XFS_ILOG_CORE; | 603 | rval = XFS_ILOG_CORE; |
822 | if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, | 604 | error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, |
823 | new->br_startblock, new->br_blockcount, | 605 | new->br_startblock, new->br_blockcount, |
824 | &i))) | 606 | &i); |
607 | if (error) | ||
825 | goto done; | 608 | goto done; |
826 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 609 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); |
827 | cur->bc_rec.b.br_state = XFS_EXT_NORM; | 610 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; |
828 | if ((error = xfs_btree_insert(cur, &i))) | 611 | error = xfs_btree_insert(bma->cur, &i); |
612 | if (error) | ||
829 | goto done; | 613 | goto done; |
830 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 614 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
831 | } | 615 | } |
832 | |||
833 | *dnew = 0; | ||
834 | break; | 616 | break; |
835 | 617 | ||
836 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: | 618 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: |
@@ -838,39 +620,40 @@ xfs_bmap_add_extent_delay_real( | |||
838 | * Filling in the first part of a previous delayed allocation. | 620 | * Filling in the first part of a previous delayed allocation. |
839 | * The left neighbor is contiguous. | 621 | * The left neighbor is contiguous. |
840 | */ | 622 | */ |
841 | trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); | 623 | trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); |
842 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), | 624 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1), |
843 | LEFT.br_blockcount + new->br_blockcount); | 625 | LEFT.br_blockcount + new->br_blockcount); |
844 | xfs_bmbt_set_startoff(ep, | 626 | xfs_bmbt_set_startoff(ep, |
845 | PREV.br_startoff + new->br_blockcount); | 627 | PREV.br_startoff + new->br_blockcount); |
846 | trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); | 628 | trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_); |
847 | 629 | ||
848 | temp = PREV.br_blockcount - new->br_blockcount; | 630 | temp = PREV.br_blockcount - new->br_blockcount; |
849 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 631 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
850 | xfs_bmbt_set_blockcount(ep, temp); | 632 | xfs_bmbt_set_blockcount(ep, temp); |
851 | if (cur == NULL) | 633 | if (bma->cur == NULL) |
852 | rval = XFS_ILOG_DEXT; | 634 | rval = XFS_ILOG_DEXT; |
853 | else { | 635 | else { |
854 | rval = 0; | 636 | rval = 0; |
855 | if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff, | 637 | error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, |
856 | LEFT.br_startblock, LEFT.br_blockcount, | 638 | LEFT.br_startblock, LEFT.br_blockcount, |
857 | &i))) | 639 | &i); |
640 | if (error) | ||
858 | goto done; | 641 | goto done; |
859 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 642 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
860 | if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, | 643 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, |
861 | LEFT.br_startblock, | 644 | LEFT.br_startblock, |
862 | LEFT.br_blockcount + | 645 | LEFT.br_blockcount + |
863 | new->br_blockcount, | 646 | new->br_blockcount, |
864 | LEFT.br_state))) | 647 | LEFT.br_state); |
648 | if (error) | ||
865 | goto done; | 649 | goto done; |
866 | } | 650 | } |
867 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 651 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), |
868 | startblockval(PREV.br_startblock)); | 652 | startblockval(PREV.br_startblock)); |
869 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 653 | xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); |
870 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 654 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
871 | 655 | ||
872 | --*idx; | 656 | bma->idx--; |
873 | *dnew = temp; | ||
874 | break; | 657 | break; |
875 | 658 | ||
876 | case BMAP_LEFT_FILLING: | 659 | case BMAP_LEFT_FILLING: |
@@ -878,43 +661,43 @@ xfs_bmap_add_extent_delay_real( | |||
878 | * Filling in the first part of a previous delayed allocation. | 661 | * Filling in the first part of a previous delayed allocation. |
879 | * The left neighbor is not contiguous. | 662 | * The left neighbor is not contiguous. |
880 | */ | 663 | */ |
881 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 664 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
882 | xfs_bmbt_set_startoff(ep, new_endoff); | 665 | xfs_bmbt_set_startoff(ep, new_endoff); |
883 | temp = PREV.br_blockcount - new->br_blockcount; | 666 | temp = PREV.br_blockcount - new->br_blockcount; |
884 | xfs_bmbt_set_blockcount(ep, temp); | 667 | xfs_bmbt_set_blockcount(ep, temp); |
885 | xfs_iext_insert(ip, *idx, 1, new, state); | 668 | xfs_iext_insert(bma->ip, bma->idx, 1, new, state); |
886 | ip->i_d.di_nextents++; | 669 | bma->ip->i_d.di_nextents++; |
887 | if (cur == NULL) | 670 | if (bma->cur == NULL) |
888 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 671 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
889 | else { | 672 | else { |
890 | rval = XFS_ILOG_CORE; | 673 | rval = XFS_ILOG_CORE; |
891 | if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, | 674 | error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, |
892 | new->br_startblock, new->br_blockcount, | 675 | new->br_startblock, new->br_blockcount, |
893 | &i))) | 676 | &i); |
677 | if (error) | ||
894 | goto done; | 678 | goto done; |
895 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 679 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); |
896 | cur->bc_rec.b.br_state = XFS_EXT_NORM; | 680 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; |
897 | if ((error = xfs_btree_insert(cur, &i))) | 681 | error = xfs_btree_insert(bma->cur, &i); |
682 | if (error) | ||
898 | goto done; | 683 | goto done; |
899 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 684 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
900 | } | 685 | } |
901 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | 686 | if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && |
902 | ip->i_d.di_nextents > ip->i_df.if_ext_max) { | 687 | bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) { |
903 | error = xfs_bmap_extents_to_btree(tp, ip, | 688 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, |
904 | first, flist, &cur, 1, &tmp_rval, | 689 | bma->firstblock, bma->flist, |
905 | XFS_DATA_FORK); | 690 | &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); |
906 | rval |= tmp_rval; | 691 | rval |= tmp_rval; |
907 | if (error) | 692 | if (error) |
908 | goto done; | 693 | goto done; |
909 | } | 694 | } |
910 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 695 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), |
911 | startblockval(PREV.br_startblock) - | 696 | startblockval(PREV.br_startblock) - |
912 | (cur ? cur->bc_private.b.allocated : 0)); | 697 | (bma->cur ? bma->cur->bc_private.b.allocated : 0)); |
913 | ep = xfs_iext_get_ext(ifp, *idx + 1); | 698 | ep = xfs_iext_get_ext(ifp, bma->idx + 1); |
914 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 699 | xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); |
915 | trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); | 700 | trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); |
916 | |||
917 | *dnew = temp; | ||
918 | break; | 701 | break; |
919 | 702 | ||
920 | case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: | 703 | case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: |
@@ -923,38 +706,39 @@ xfs_bmap_add_extent_delay_real( | |||
923 | * The right neighbor is contiguous with the new allocation. | 706 | * The right neighbor is contiguous with the new allocation. |
924 | */ | 707 | */ |
925 | temp = PREV.br_blockcount - new->br_blockcount; | 708 | temp = PREV.br_blockcount - new->br_blockcount; |
926 | trace_xfs_bmap_pre_update(ip, *idx + 1, state, _THIS_IP_); | 709 | trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); |
927 | xfs_bmbt_set_blockcount(ep, temp); | 710 | xfs_bmbt_set_blockcount(ep, temp); |
928 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx + 1), | 711 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), |
929 | new->br_startoff, new->br_startblock, | 712 | new->br_startoff, new->br_startblock, |
930 | new->br_blockcount + RIGHT.br_blockcount, | 713 | new->br_blockcount + RIGHT.br_blockcount, |
931 | RIGHT.br_state); | 714 | RIGHT.br_state); |
932 | trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); | 715 | trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); |
933 | if (cur == NULL) | 716 | if (bma->cur == NULL) |
934 | rval = XFS_ILOG_DEXT; | 717 | rval = XFS_ILOG_DEXT; |
935 | else { | 718 | else { |
936 | rval = 0; | 719 | rval = 0; |
937 | if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, | 720 | error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, |
938 | RIGHT.br_startblock, | 721 | RIGHT.br_startblock, |
939 | RIGHT.br_blockcount, &i))) | 722 | RIGHT.br_blockcount, &i); |
723 | if (error) | ||
940 | goto done; | 724 | goto done; |
941 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 725 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
942 | if ((error = xfs_bmbt_update(cur, new->br_startoff, | 726 | error = xfs_bmbt_update(bma->cur, new->br_startoff, |
943 | new->br_startblock, | 727 | new->br_startblock, |
944 | new->br_blockcount + | 728 | new->br_blockcount + |
945 | RIGHT.br_blockcount, | 729 | RIGHT.br_blockcount, |
946 | RIGHT.br_state))) | 730 | RIGHT.br_state); |
731 | if (error) | ||
947 | goto done; | 732 | goto done; |
948 | } | 733 | } |
949 | 734 | ||
950 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 735 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), |
951 | startblockval(PREV.br_startblock)); | 736 | startblockval(PREV.br_startblock)); |
952 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 737 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
953 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 738 | xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); |
954 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 739 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
955 | 740 | ||
956 | ++*idx; | 741 | bma->idx++; |
957 | *dnew = temp; | ||
958 | break; | 742 | break; |
959 | 743 | ||
960 | case BMAP_RIGHT_FILLING: | 744 | case BMAP_RIGHT_FILLING: |
@@ -963,42 +747,43 @@ xfs_bmap_add_extent_delay_real( | |||
963 | * The right neighbor is not contiguous. | 747 | * The right neighbor is not contiguous. |
964 | */ | 748 | */ |
965 | temp = PREV.br_blockcount - new->br_blockcount; | 749 | temp = PREV.br_blockcount - new->br_blockcount; |
966 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 750 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
967 | xfs_bmbt_set_blockcount(ep, temp); | 751 | xfs_bmbt_set_blockcount(ep, temp); |
968 | xfs_iext_insert(ip, *idx + 1, 1, new, state); | 752 | xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); |
969 | ip->i_d.di_nextents++; | 753 | bma->ip->i_d.di_nextents++; |
970 | if (cur == NULL) | 754 | if (bma->cur == NULL) |
971 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 755 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
972 | else { | 756 | else { |
973 | rval = XFS_ILOG_CORE; | 757 | rval = XFS_ILOG_CORE; |
974 | if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, | 758 | error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, |
975 | new->br_startblock, new->br_blockcount, | 759 | new->br_startblock, new->br_blockcount, |
976 | &i))) | 760 | &i); |
761 | if (error) | ||
977 | goto done; | 762 | goto done; |
978 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 763 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); |
979 | cur->bc_rec.b.br_state = XFS_EXT_NORM; | 764 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; |
980 | if ((error = xfs_btree_insert(cur, &i))) | 765 | error = xfs_btree_insert(bma->cur, &i); |
766 | if (error) | ||
981 | goto done; | 767 | goto done; |
982 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 768 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
983 | } | 769 | } |
984 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | 770 | if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && |
985 | ip->i_d.di_nextents > ip->i_df.if_ext_max) { | 771 | bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) { |
986 | error = xfs_bmap_extents_to_btree(tp, ip, | 772 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, |
987 | first, flist, &cur, 1, &tmp_rval, | 773 | bma->firstblock, bma->flist, &bma->cur, 1, |
988 | XFS_DATA_FORK); | 774 | &tmp_rval, XFS_DATA_FORK); |
989 | rval |= tmp_rval; | 775 | rval |= tmp_rval; |
990 | if (error) | 776 | if (error) |
991 | goto done; | 777 | goto done; |
992 | } | 778 | } |
993 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 779 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), |
994 | startblockval(PREV.br_startblock) - | 780 | startblockval(PREV.br_startblock) - |
995 | (cur ? cur->bc_private.b.allocated : 0)); | 781 | (bma->cur ? bma->cur->bc_private.b.allocated : 0)); |
996 | ep = xfs_iext_get_ext(ifp, *idx); | 782 | ep = xfs_iext_get_ext(ifp, bma->idx); |
997 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 783 | xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); |
998 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 784 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
999 | 785 | ||
1000 | ++*idx; | 786 | bma->idx++; |
1001 | *dnew = temp; | ||
1002 | break; | 787 | break; |
1003 | 788 | ||
1004 | case 0: | 789 | case 0: |
@@ -1024,82 +809,65 @@ xfs_bmap_add_extent_delay_real( | |||
1024 | */ | 809 | */ |
1025 | temp = new->br_startoff - PREV.br_startoff; | 810 | temp = new->br_startoff - PREV.br_startoff; |
1026 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; | 811 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; |
1027 | trace_xfs_bmap_pre_update(ip, *idx, 0, _THIS_IP_); | 812 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); |
1028 | xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ | 813 | xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ |
1029 | LEFT = *new; | 814 | LEFT = *new; |
1030 | RIGHT.br_state = PREV.br_state; | 815 | RIGHT.br_state = PREV.br_state; |
1031 | RIGHT.br_startblock = nullstartblock( | 816 | RIGHT.br_startblock = nullstartblock( |
1032 | (int)xfs_bmap_worst_indlen(ip, temp2)); | 817 | (int)xfs_bmap_worst_indlen(bma->ip, temp2)); |
1033 | RIGHT.br_startoff = new_endoff; | 818 | RIGHT.br_startoff = new_endoff; |
1034 | RIGHT.br_blockcount = temp2; | 819 | RIGHT.br_blockcount = temp2; |
1035 | /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ | 820 | /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ |
1036 | xfs_iext_insert(ip, *idx + 1, 2, &LEFT, state); | 821 | xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); |
1037 | ip->i_d.di_nextents++; | 822 | bma->ip->i_d.di_nextents++; |
1038 | if (cur == NULL) | 823 | if (bma->cur == NULL) |
1039 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 824 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
1040 | else { | 825 | else { |
1041 | rval = XFS_ILOG_CORE; | 826 | rval = XFS_ILOG_CORE; |
1042 | if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, | 827 | error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, |
1043 | new->br_startblock, new->br_blockcount, | 828 | new->br_startblock, new->br_blockcount, |
1044 | &i))) | 829 | &i); |
830 | if (error) | ||
1045 | goto done; | 831 | goto done; |
1046 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 832 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); |
1047 | cur->bc_rec.b.br_state = XFS_EXT_NORM; | 833 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; |
1048 | if ((error = xfs_btree_insert(cur, &i))) | 834 | error = xfs_btree_insert(bma->cur, &i); |
835 | if (error) | ||
1049 | goto done; | 836 | goto done; |
1050 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 837 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
1051 | } | 838 | } |
1052 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | 839 | if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && |
1053 | ip->i_d.di_nextents > ip->i_df.if_ext_max) { | 840 | bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) { |
1054 | error = xfs_bmap_extents_to_btree(tp, ip, | 841 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, |
1055 | first, flist, &cur, 1, &tmp_rval, | 842 | bma->firstblock, bma->flist, &bma->cur, |
1056 | XFS_DATA_FORK); | 843 | 1, &tmp_rval, XFS_DATA_FORK); |
1057 | rval |= tmp_rval; | 844 | rval |= tmp_rval; |
1058 | if (error) | 845 | if (error) |
1059 | goto done; | 846 | goto done; |
1060 | } | 847 | } |
1061 | temp = xfs_bmap_worst_indlen(ip, temp); | 848 | temp = xfs_bmap_worst_indlen(bma->ip, temp); |
1062 | temp2 = xfs_bmap_worst_indlen(ip, temp2); | 849 | temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); |
1063 | diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - | 850 | diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - |
1064 | (cur ? cur->bc_private.b.allocated : 0)); | 851 | (bma->cur ? bma->cur->bc_private.b.allocated : 0)); |
1065 | if (diff > 0 && | 852 | if (diff > 0) { |
1066 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, | 853 | error = xfs_icsb_modify_counters(bma->ip->i_mount, |
1067 | -((int64_t)diff), 0)) { | 854 | XFS_SBS_FDBLOCKS, |
1068 | /* | 855 | -((int64_t)diff), 0); |
1069 | * Ick gross gag me with a spoon. | 856 | ASSERT(!error); |
1070 | */ | 857 | if (error) |
1071 | ASSERT(0); /* want to see if this ever happens! */ | 858 | goto done; |
1072 | while (diff > 0) { | ||
1073 | if (temp) { | ||
1074 | temp--; | ||
1075 | diff--; | ||
1076 | if (!diff || | ||
1077 | !xfs_icsb_modify_counters(ip->i_mount, | ||
1078 | XFS_SBS_FDBLOCKS, | ||
1079 | -((int64_t)diff), 0)) | ||
1080 | break; | ||
1081 | } | ||
1082 | if (temp2) { | ||
1083 | temp2--; | ||
1084 | diff--; | ||
1085 | if (!diff || | ||
1086 | !xfs_icsb_modify_counters(ip->i_mount, | ||
1087 | XFS_SBS_FDBLOCKS, | ||
1088 | -((int64_t)diff), 0)) | ||
1089 | break; | ||
1090 | } | ||
1091 | } | ||
1092 | } | 859 | } |
1093 | ep = xfs_iext_get_ext(ifp, *idx); | 860 | |
861 | ep = xfs_iext_get_ext(ifp, bma->idx); | ||
1094 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 862 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
1095 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 863 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
1096 | trace_xfs_bmap_pre_update(ip, *idx + 2, state, _THIS_IP_); | 864 | trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_); |
1097 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx + 2), | 865 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2), |
1098 | nullstartblock((int)temp2)); | 866 | nullstartblock((int)temp2)); |
1099 | trace_xfs_bmap_post_update(ip, *idx + 2, state, _THIS_IP_); | 867 | trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_); |
1100 | 868 | ||
1101 | ++*idx; | 869 | bma->idx++; |
1102 | *dnew = temp + temp2; | 870 | da_new = temp + temp2; |
1103 | break; | 871 | break; |
1104 | 872 | ||
1105 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | 873 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: |
@@ -1114,9 +882,40 @@ xfs_bmap_add_extent_delay_real( | |||
1114 | */ | 882 | */ |
1115 | ASSERT(0); | 883 | ASSERT(0); |
1116 | } | 884 | } |
1117 | *curp = cur; | 885 | |
886 | /* convert to a btree if necessary */ | ||
887 | if (XFS_IFORK_FORMAT(bma->ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS && | ||
888 | XFS_IFORK_NEXTENTS(bma->ip, XFS_DATA_FORK) > ifp->if_ext_max) { | ||
889 | int tmp_logflags; /* partial log flag return val */ | ||
890 | |||
891 | ASSERT(bma->cur == NULL); | ||
892 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | ||
893 | bma->firstblock, bma->flist, &bma->cur, | ||
894 | da_old > 0, &tmp_logflags, XFS_DATA_FORK); | ||
895 | bma->logflags |= tmp_logflags; | ||
896 | if (error) | ||
897 | goto done; | ||
898 | } | ||
899 | |||
900 | /* adjust for changes in reserved delayed indirect blocks */ | ||
901 | if (da_old || da_new) { | ||
902 | temp = da_new; | ||
903 | if (bma->cur) | ||
904 | temp += bma->cur->bc_private.b.allocated; | ||
905 | ASSERT(temp <= da_old); | ||
906 | if (temp < da_old) | ||
907 | xfs_icsb_modify_counters(bma->ip->i_mount, | ||
908 | XFS_SBS_FDBLOCKS, | ||
909 | (int64_t)(da_old - temp), 0); | ||
910 | } | ||
911 | |||
912 | /* clear out the allocated field, done with it now in any case. */ | ||
913 | if (bma->cur) | ||
914 | bma->cur->bc_private.b.allocated = 0; | ||
915 | |||
916 | xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK); | ||
1118 | done: | 917 | done: |
1119 | *logflagsp = rval; | 918 | bma->logflags |= rval; |
1120 | return error; | 919 | return error; |
1121 | #undef LEFT | 920 | #undef LEFT |
1122 | #undef RIGHT | 921 | #undef RIGHT |
@@ -1124,15 +923,17 @@ done: | |||
1124 | } | 923 | } |
1125 | 924 | ||
1126 | /* | 925 | /* |
1127 | * Called by xfs_bmap_add_extent to handle cases converting an unwritten | 926 | * Convert an unwritten allocation to a real allocation or vice versa. |
1128 | * allocation to a real allocation or vice versa. | ||
1129 | */ | 927 | */ |
1130 | STATIC int /* error */ | 928 | STATIC int /* error */ |
1131 | xfs_bmap_add_extent_unwritten_real( | 929 | xfs_bmap_add_extent_unwritten_real( |
930 | struct xfs_trans *tp, | ||
1132 | xfs_inode_t *ip, /* incore inode pointer */ | 931 | xfs_inode_t *ip, /* incore inode pointer */ |
1133 | xfs_extnum_t *idx, /* extent number to update/insert */ | 932 | xfs_extnum_t *idx, /* extent number to update/insert */ |
1134 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 933 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
1135 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 934 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
935 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | ||
936 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
1136 | int *logflagsp) /* inode logging flags */ | 937 | int *logflagsp) /* inode logging flags */ |
1137 | { | 938 | { |
1138 | xfs_btree_cur_t *cur; /* btree cursor */ | 939 | xfs_btree_cur_t *cur; /* btree cursor */ |
@@ -1148,15 +949,25 @@ xfs_bmap_add_extent_unwritten_real( | |||
1148 | int rval=0; /* return value (logging flags) */ | 949 | int rval=0; /* return value (logging flags) */ |
1149 | int state = 0;/* state bits, accessed thru macros */ | 950 | int state = 0;/* state bits, accessed thru macros */ |
1150 | 951 | ||
952 | *logflagsp = 0; | ||
953 | |||
954 | cur = *curp; | ||
955 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
956 | |||
957 | ASSERT(*idx >= 0); | ||
958 | ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); | ||
959 | ASSERT(!isnullstartblock(new->br_startblock)); | ||
960 | |||
961 | XFS_STATS_INC(xs_add_exlist); | ||
962 | |||
1151 | #define LEFT r[0] | 963 | #define LEFT r[0] |
1152 | #define RIGHT r[1] | 964 | #define RIGHT r[1] |
1153 | #define PREV r[2] | 965 | #define PREV r[2] |
966 | |||
1154 | /* | 967 | /* |
1155 | * Set up a bunch of variables to make the tests simpler. | 968 | * Set up a bunch of variables to make the tests simpler. |
1156 | */ | 969 | */ |
1157 | error = 0; | 970 | error = 0; |
1158 | cur = *curp; | ||
1159 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
1160 | ep = xfs_iext_get_ext(ifp, *idx); | 971 | ep = xfs_iext_get_ext(ifp, *idx); |
1161 | xfs_bmbt_get_all(ep, &PREV); | 972 | xfs_bmbt_get_all(ep, &PREV); |
1162 | newext = new->br_state; | 973 | newext = new->br_state; |
@@ -1406,10 +1217,11 @@ xfs_bmap_add_extent_unwritten_real( | |||
1406 | goto done; | 1217 | goto done; |
1407 | if ((error = xfs_btree_decrement(cur, 0, &i))) | 1218 | if ((error = xfs_btree_decrement(cur, 0, &i))) |
1408 | goto done; | 1219 | goto done; |
1409 | if (xfs_bmbt_update(cur, LEFT.br_startoff, | 1220 | error = xfs_bmbt_update(cur, LEFT.br_startoff, |
1410 | LEFT.br_startblock, | 1221 | LEFT.br_startblock, |
1411 | LEFT.br_blockcount + new->br_blockcount, | 1222 | LEFT.br_blockcount + new->br_blockcount, |
1412 | LEFT.br_state)) | 1223 | LEFT.br_state); |
1224 | if (error) | ||
1413 | goto done; | 1225 | goto done; |
1414 | } | 1226 | } |
1415 | break; | 1227 | break; |
@@ -1607,9 +1419,29 @@ xfs_bmap_add_extent_unwritten_real( | |||
1607 | */ | 1419 | */ |
1608 | ASSERT(0); | 1420 | ASSERT(0); |
1609 | } | 1421 | } |
1610 | *curp = cur; | 1422 | |
1423 | /* convert to a btree if necessary */ | ||
1424 | if (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS && | ||
1425 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > ifp->if_ext_max) { | ||
1426 | int tmp_logflags; /* partial log flag return val */ | ||
1427 | |||
1428 | ASSERT(cur == NULL); | ||
1429 | error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, | ||
1430 | 0, &tmp_logflags, XFS_DATA_FORK); | ||
1431 | *logflagsp |= tmp_logflags; | ||
1432 | if (error) | ||
1433 | goto done; | ||
1434 | } | ||
1435 | |||
1436 | /* clear out the allocated field, done with it now in any case. */ | ||
1437 | if (cur) { | ||
1438 | cur->bc_private.b.allocated = 0; | ||
1439 | *curp = cur; | ||
1440 | } | ||
1441 | |||
1442 | xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); | ||
1611 | done: | 1443 | done: |
1612 | *logflagsp = rval; | 1444 | *logflagsp |= rval; |
1613 | return error; | 1445 | return error; |
1614 | #undef LEFT | 1446 | #undef LEFT |
1615 | #undef RIGHT | 1447 | #undef RIGHT |
@@ -1617,16 +1449,13 @@ done: | |||
1617 | } | 1449 | } |
1618 | 1450 | ||
1619 | /* | 1451 | /* |
1620 | * Called by xfs_bmap_add_extent to handle cases converting a hole | 1452 | * Convert a hole to a delayed allocation. |
1621 | * to a delayed allocation. | ||
1622 | */ | 1453 | */ |
1623 | /*ARGSUSED*/ | 1454 | STATIC void |
1624 | STATIC int /* error */ | ||
1625 | xfs_bmap_add_extent_hole_delay( | 1455 | xfs_bmap_add_extent_hole_delay( |
1626 | xfs_inode_t *ip, /* incore inode pointer */ | 1456 | xfs_inode_t *ip, /* incore inode pointer */ |
1627 | xfs_extnum_t *idx, /* extent number to update/insert */ | 1457 | xfs_extnum_t *idx, /* extent number to update/insert */ |
1628 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 1458 | xfs_bmbt_irec_t *new) /* new data to add to file extents */ |
1629 | int *logflagsp) /* inode logging flags */ | ||
1630 | { | 1459 | { |
1631 | xfs_ifork_t *ifp; /* inode fork pointer */ | 1460 | xfs_ifork_t *ifp; /* inode fork pointer */ |
1632 | xfs_bmbt_irec_t left; /* left neighbor extent entry */ | 1461 | xfs_bmbt_irec_t left; /* left neighbor extent entry */ |
@@ -1761,23 +1590,17 @@ xfs_bmap_add_extent_hole_delay( | |||
1761 | * Nothing to do for disk quota accounting here. | 1590 | * Nothing to do for disk quota accounting here. |
1762 | */ | 1591 | */ |
1763 | } | 1592 | } |
1764 | *logflagsp = 0; | ||
1765 | return 0; | ||
1766 | } | 1593 | } |
1767 | 1594 | ||
1768 | /* | 1595 | /* |
1769 | * Called by xfs_bmap_add_extent to handle cases converting a hole | 1596 | * Convert a hole to a real allocation. |
1770 | * to a real allocation. | ||
1771 | */ | 1597 | */ |
1772 | STATIC int /* error */ | 1598 | STATIC int /* error */ |
1773 | xfs_bmap_add_extent_hole_real( | 1599 | xfs_bmap_add_extent_hole_real( |
1774 | xfs_inode_t *ip, /* incore inode pointer */ | 1600 | struct xfs_bmalloca *bma, |
1775 | xfs_extnum_t *idx, /* extent number to update/insert */ | 1601 | int whichfork) |
1776 | xfs_btree_cur_t *cur, /* if null, not a btree */ | ||
1777 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
1778 | int *logflagsp, /* inode logging flags */ | ||
1779 | int whichfork) /* data or attr fork */ | ||
1780 | { | 1602 | { |
1603 | struct xfs_bmbt_irec *new = &bma->got; | ||
1781 | int error; /* error return value */ | 1604 | int error; /* error return value */ |
1782 | int i; /* temp state */ | 1605 | int i; /* temp state */ |
1783 | xfs_ifork_t *ifp; /* inode fork pointer */ | 1606 | xfs_ifork_t *ifp; /* inode fork pointer */ |
@@ -1786,19 +1609,26 @@ xfs_bmap_add_extent_hole_real( | |||
1786 | int rval=0; /* return value (logging flags) */ | 1609 | int rval=0; /* return value (logging flags) */ |
1787 | int state; /* state bits, accessed thru macros */ | 1610 | int state; /* state bits, accessed thru macros */ |
1788 | 1611 | ||
1789 | ifp = XFS_IFORK_PTR(ip, whichfork); | 1612 | ifp = XFS_IFORK_PTR(bma->ip, whichfork); |
1790 | ASSERT(*idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); | 1613 | |
1791 | state = 0; | 1614 | ASSERT(bma->idx >= 0); |
1615 | ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); | ||
1616 | ASSERT(!isnullstartblock(new->br_startblock)); | ||
1617 | ASSERT(!bma->cur || | ||
1618 | !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); | ||
1619 | |||
1620 | XFS_STATS_INC(xs_add_exlist); | ||
1792 | 1621 | ||
1622 | state = 0; | ||
1793 | if (whichfork == XFS_ATTR_FORK) | 1623 | if (whichfork == XFS_ATTR_FORK) |
1794 | state |= BMAP_ATTRFORK; | 1624 | state |= BMAP_ATTRFORK; |
1795 | 1625 | ||
1796 | /* | 1626 | /* |
1797 | * Check and set flags if this segment has a left neighbor. | 1627 | * Check and set flags if this segment has a left neighbor. |
1798 | */ | 1628 | */ |
1799 | if (*idx > 0) { | 1629 | if (bma->idx > 0) { |
1800 | state |= BMAP_LEFT_VALID; | 1630 | state |= BMAP_LEFT_VALID; |
1801 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); | 1631 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); |
1802 | if (isnullstartblock(left.br_startblock)) | 1632 | if (isnullstartblock(left.br_startblock)) |
1803 | state |= BMAP_LEFT_DELAY; | 1633 | state |= BMAP_LEFT_DELAY; |
1804 | } | 1634 | } |
@@ -1807,9 +1637,9 @@ xfs_bmap_add_extent_hole_real( | |||
1807 | * Check and set flags if this segment has a current value. | 1637 | * Check and set flags if this segment has a current value. |
1808 | * Not true if we're inserting into the "hole" at eof. | 1638 | * Not true if we're inserting into the "hole" at eof. |
1809 | */ | 1639 | */ |
1810 | if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { | 1640 | if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { |
1811 | state |= BMAP_RIGHT_VALID; | 1641 | state |= BMAP_RIGHT_VALID; |
1812 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); | 1642 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); |
1813 | if (isnullstartblock(right.br_startblock)) | 1643 | if (isnullstartblock(right.br_startblock)) |
1814 | state |= BMAP_RIGHT_DELAY; | 1644 | state |= BMAP_RIGHT_DELAY; |
1815 | } | 1645 | } |
@@ -1846,39 +1676,42 @@ xfs_bmap_add_extent_hole_real( | |||
1846 | * left and on the right. | 1676 | * left and on the right. |
1847 | * Merge all three into a single extent record. | 1677 | * Merge all three into a single extent record. |
1848 | */ | 1678 | */ |
1849 | --*idx; | 1679 | --bma->idx; |
1850 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 1680 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
1851 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | 1681 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), |
1852 | left.br_blockcount + new->br_blockcount + | 1682 | left.br_blockcount + new->br_blockcount + |
1853 | right.br_blockcount); | 1683 | right.br_blockcount); |
1854 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 1684 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
1855 | 1685 | ||
1856 | xfs_iext_remove(ip, *idx + 1, 1, state); | 1686 | xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); |
1857 | 1687 | ||
1858 | XFS_IFORK_NEXT_SET(ip, whichfork, | 1688 | XFS_IFORK_NEXT_SET(bma->ip, whichfork, |
1859 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | 1689 | XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); |
1860 | if (cur == NULL) { | 1690 | if (bma->cur == NULL) { |
1861 | rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); | 1691 | rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); |
1862 | } else { | 1692 | } else { |
1863 | rval = XFS_ILOG_CORE; | 1693 | rval = XFS_ILOG_CORE; |
1864 | if ((error = xfs_bmbt_lookup_eq(cur, | 1694 | error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, |
1865 | right.br_startoff, | 1695 | right.br_startblock, right.br_blockcount, |
1866 | right.br_startblock, | 1696 | &i); |
1867 | right.br_blockcount, &i))) | 1697 | if (error) |
1868 | goto done; | 1698 | goto done; |
1869 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1699 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
1870 | if ((error = xfs_btree_delete(cur, &i))) | 1700 | error = xfs_btree_delete(bma->cur, &i); |
1701 | if (error) | ||
1871 | goto done; | 1702 | goto done; |
1872 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1703 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
1873 | if ((error = xfs_btree_decrement(cur, 0, &i))) | 1704 | error = xfs_btree_decrement(bma->cur, 0, &i); |
1705 | if (error) | ||
1874 | goto done; | 1706 | goto done; |
1875 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1707 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
1876 | if ((error = xfs_bmbt_update(cur, left.br_startoff, | 1708 | error = xfs_bmbt_update(bma->cur, left.br_startoff, |
1877 | left.br_startblock, | 1709 | left.br_startblock, |
1878 | left.br_blockcount + | 1710 | left.br_blockcount + |
1879 | new->br_blockcount + | 1711 | new->br_blockcount + |
1880 | right.br_blockcount, | 1712 | right.br_blockcount, |
1881 | left.br_state))) | 1713 | left.br_state); |
1714 | if (error) | ||
1882 | goto done; | 1715 | goto done; |
1883 | } | 1716 | } |
1884 | break; | 1717 | break; |
@@ -1889,27 +1722,28 @@ xfs_bmap_add_extent_hole_real( | |||
1889 | * on the left. | 1722 | * on the left. |
1890 | * Merge the new allocation with the left neighbor. | 1723 | * Merge the new allocation with the left neighbor. |
1891 | */ | 1724 | */ |
1892 | --*idx; | 1725 | --bma->idx; |
1893 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 1726 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
1894 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | 1727 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), |
1895 | left.br_blockcount + new->br_blockcount); | 1728 | left.br_blockcount + new->br_blockcount); |
1896 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 1729 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
1897 | 1730 | ||
1898 | if (cur == NULL) { | 1731 | if (bma->cur == NULL) { |
1899 | rval = xfs_ilog_fext(whichfork); | 1732 | rval = xfs_ilog_fext(whichfork); |
1900 | } else { | 1733 | } else { |
1901 | rval = 0; | 1734 | rval = 0; |
1902 | if ((error = xfs_bmbt_lookup_eq(cur, | 1735 | error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, |
1903 | left.br_startoff, | 1736 | left.br_startblock, left.br_blockcount, |
1904 | left.br_startblock, | 1737 | &i); |
1905 | left.br_blockcount, &i))) | 1738 | if (error) |
1906 | goto done; | 1739 | goto done; |
1907 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1740 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
1908 | if ((error = xfs_bmbt_update(cur, left.br_startoff, | 1741 | error = xfs_bmbt_update(bma->cur, left.br_startoff, |
1909 | left.br_startblock, | 1742 | left.br_startblock, |
1910 | left.br_blockcount + | 1743 | left.br_blockcount + |
1911 | new->br_blockcount, | 1744 | new->br_blockcount, |
1912 | left.br_state))) | 1745 | left.br_state); |
1746 | if (error) | ||
1913 | goto done; | 1747 | goto done; |
1914 | } | 1748 | } |
1915 | break; | 1749 | break; |
@@ -1920,28 +1754,30 @@ xfs_bmap_add_extent_hole_real( | |||
1920 | * on the right. | 1754 | * on the right. |
1921 | * Merge the new allocation with the right neighbor. | 1755 | * Merge the new allocation with the right neighbor. |
1922 | */ | 1756 | */ |
1923 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | 1757 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
1924 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), | 1758 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), |
1925 | new->br_startoff, new->br_startblock, | 1759 | new->br_startoff, new->br_startblock, |
1926 | new->br_blockcount + right.br_blockcount, | 1760 | new->br_blockcount + right.br_blockcount, |
1927 | right.br_state); | 1761 | right.br_state); |
1928 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | 1762 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
1929 | 1763 | ||
1930 | if (cur == NULL) { | 1764 | if (bma->cur == NULL) { |
1931 | rval = xfs_ilog_fext(whichfork); | 1765 | rval = xfs_ilog_fext(whichfork); |
1932 | } else { | 1766 | } else { |
1933 | rval = 0; | 1767 | rval = 0; |
1934 | if ((error = xfs_bmbt_lookup_eq(cur, | 1768 | error = xfs_bmbt_lookup_eq(bma->cur, |
1935 | right.br_startoff, | 1769 | right.br_startoff, |
1936 | right.br_startblock, | 1770 | right.br_startblock, |
1937 | right.br_blockcount, &i))) | 1771 | right.br_blockcount, &i); |
1772 | if (error) | ||
1938 | goto done; | 1773 | goto done; |
1939 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1774 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
1940 | if ((error = xfs_bmbt_update(cur, new->br_startoff, | 1775 | error = xfs_bmbt_update(bma->cur, new->br_startoff, |
1941 | new->br_startblock, | 1776 | new->br_startblock, |
1942 | new->br_blockcount + | 1777 | new->br_blockcount + |
1943 | right.br_blockcount, | 1778 | right.br_blockcount, |
1944 | right.br_state))) | 1779 | right.br_state); |
1780 | if (error) | ||
1945 | goto done; | 1781 | goto done; |
1946 | } | 1782 | } |
1947 | break; | 1783 | break; |
@@ -1952,28 +1788,50 @@ xfs_bmap_add_extent_hole_real( | |||
1952 | * real allocation. | 1788 | * real allocation. |
1953 | * Insert a new entry. | 1789 | * Insert a new entry. |
1954 | */ | 1790 | */ |
1955 | xfs_iext_insert(ip, *idx, 1, new, state); | 1791 | xfs_iext_insert(bma->ip, bma->idx, 1, new, state); |
1956 | XFS_IFORK_NEXT_SET(ip, whichfork, | 1792 | XFS_IFORK_NEXT_SET(bma->ip, whichfork, |
1957 | XFS_IFORK_NEXTENTS(ip, whichfork) + 1); | 1793 | XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); |
1958 | if (cur == NULL) { | 1794 | if (bma->cur == NULL) { |
1959 | rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); | 1795 | rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); |
1960 | } else { | 1796 | } else { |
1961 | rval = XFS_ILOG_CORE; | 1797 | rval = XFS_ILOG_CORE; |
1962 | if ((error = xfs_bmbt_lookup_eq(cur, | 1798 | error = xfs_bmbt_lookup_eq(bma->cur, |
1963 | new->br_startoff, | 1799 | new->br_startoff, |
1964 | new->br_startblock, | 1800 | new->br_startblock, |
1965 | new->br_blockcount, &i))) | 1801 | new->br_blockcount, &i); |
1802 | if (error) | ||
1966 | goto done; | 1803 | goto done; |
1967 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 1804 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); |
1968 | cur->bc_rec.b.br_state = new->br_state; | 1805 | bma->cur->bc_rec.b.br_state = new->br_state; |
1969 | if ((error = xfs_btree_insert(cur, &i))) | 1806 | error = xfs_btree_insert(bma->cur, &i); |
1807 | if (error) | ||
1970 | goto done; | 1808 | goto done; |
1971 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1809 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
1972 | } | 1810 | } |
1973 | break; | 1811 | break; |
1974 | } | 1812 | } |
1813 | |||
1814 | /* convert to a btree if necessary */ | ||
1815 | if (XFS_IFORK_FORMAT(bma->ip, whichfork) == XFS_DINODE_FMT_EXTENTS && | ||
1816 | XFS_IFORK_NEXTENTS(bma->ip, whichfork) > ifp->if_ext_max) { | ||
1817 | int tmp_logflags; /* partial log flag return val */ | ||
1818 | |||
1819 | ASSERT(bma->cur == NULL); | ||
1820 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | ||
1821 | bma->firstblock, bma->flist, &bma->cur, | ||
1822 | 0, &tmp_logflags, whichfork); | ||
1823 | bma->logflags |= tmp_logflags; | ||
1824 | if (error) | ||
1825 | goto done; | ||
1826 | } | ||
1827 | |||
1828 | /* clear out the allocated field, done with it now in any case. */ | ||
1829 | if (bma->cur) | ||
1830 | bma->cur->bc_private.b.allocated = 0; | ||
1831 | |||
1832 | xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); | ||
1975 | done: | 1833 | done: |
1976 | *logflagsp = rval; | 1834 | bma->logflags |= rval; |
1977 | return error; | 1835 | return error; |
1978 | } | 1836 | } |
1979 | 1837 | ||
@@ -2160,26 +2018,26 @@ xfs_bmap_adjacent( | |||
2160 | XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) | 2018 | XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) |
2161 | 2019 | ||
2162 | mp = ap->ip->i_mount; | 2020 | mp = ap->ip->i_mount; |
2163 | nullfb = ap->firstblock == NULLFSBLOCK; | 2021 | nullfb = *ap->firstblock == NULLFSBLOCK; |
2164 | rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; | 2022 | rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; |
2165 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); | 2023 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); |
2166 | /* | 2024 | /* |
2167 | * If allocating at eof, and there's a previous real block, | 2025 | * If allocating at eof, and there's a previous real block, |
2168 | * try to use its last block as our starting point. | 2026 | * try to use its last block as our starting point. |
2169 | */ | 2027 | */ |
2170 | if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF && | 2028 | if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && |
2171 | !isnullstartblock(ap->prevp->br_startblock) && | 2029 | !isnullstartblock(ap->prev.br_startblock) && |
2172 | ISVALID(ap->prevp->br_startblock + ap->prevp->br_blockcount, | 2030 | ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, |
2173 | ap->prevp->br_startblock)) { | 2031 | ap->prev.br_startblock)) { |
2174 | ap->rval = ap->prevp->br_startblock + ap->prevp->br_blockcount; | 2032 | ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; |
2175 | /* | 2033 | /* |
2176 | * Adjust for the gap between prevp and us. | 2034 | * Adjust for the gap between prevp and us. |
2177 | */ | 2035 | */ |
2178 | adjust = ap->off - | 2036 | adjust = ap->offset - |
2179 | (ap->prevp->br_startoff + ap->prevp->br_blockcount); | 2037 | (ap->prev.br_startoff + ap->prev.br_blockcount); |
2180 | if (adjust && | 2038 | if (adjust && |
2181 | ISVALID(ap->rval + adjust, ap->prevp->br_startblock)) | 2039 | ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) |
2182 | ap->rval += adjust; | 2040 | ap->blkno += adjust; |
2183 | } | 2041 | } |
2184 | /* | 2042 | /* |
2185 | * If not at eof, then compare the two neighbor blocks. | 2043 | * If not at eof, then compare the two neighbor blocks. |
@@ -2196,17 +2054,17 @@ xfs_bmap_adjacent( | |||
2196 | * If there's a previous (left) block, select a requested | 2054 | * If there's a previous (left) block, select a requested |
2197 | * start block based on it. | 2055 | * start block based on it. |
2198 | */ | 2056 | */ |
2199 | if (ap->prevp->br_startoff != NULLFILEOFF && | 2057 | if (ap->prev.br_startoff != NULLFILEOFF && |
2200 | !isnullstartblock(ap->prevp->br_startblock) && | 2058 | !isnullstartblock(ap->prev.br_startblock) && |
2201 | (prevbno = ap->prevp->br_startblock + | 2059 | (prevbno = ap->prev.br_startblock + |
2202 | ap->prevp->br_blockcount) && | 2060 | ap->prev.br_blockcount) && |
2203 | ISVALID(prevbno, ap->prevp->br_startblock)) { | 2061 | ISVALID(prevbno, ap->prev.br_startblock)) { |
2204 | /* | 2062 | /* |
2205 | * Calculate gap to end of previous block. | 2063 | * Calculate gap to end of previous block. |
2206 | */ | 2064 | */ |
2207 | adjust = prevdiff = ap->off - | 2065 | adjust = prevdiff = ap->offset - |
2208 | (ap->prevp->br_startoff + | 2066 | (ap->prev.br_startoff + |
2209 | ap->prevp->br_blockcount); | 2067 | ap->prev.br_blockcount); |
2210 | /* | 2068 | /* |
2211 | * Figure the startblock based on the previous block's | 2069 | * Figure the startblock based on the previous block's |
2212 | * end and the gap size. | 2070 | * end and the gap size. |
@@ -2215,9 +2073,9 @@ xfs_bmap_adjacent( | |||
2215 | * allocating, or using it gives us an invalid block | 2073 | * allocating, or using it gives us an invalid block |
2216 | * number, then just use the end of the previous block. | 2074 | * number, then just use the end of the previous block. |
2217 | */ | 2075 | */ |
2218 | if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->alen && | 2076 | if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && |
2219 | ISVALID(prevbno + prevdiff, | 2077 | ISVALID(prevbno + prevdiff, |
2220 | ap->prevp->br_startblock)) | 2078 | ap->prev.br_startblock)) |
2221 | prevbno += adjust; | 2079 | prevbno += adjust; |
2222 | else | 2080 | else |
2223 | prevdiff += adjust; | 2081 | prevdiff += adjust; |
@@ -2238,16 +2096,16 @@ xfs_bmap_adjacent( | |||
2238 | * If there's a following (right) block, select a requested | 2096 | * If there's a following (right) block, select a requested |
2239 | * start block based on it. | 2097 | * start block based on it. |
2240 | */ | 2098 | */ |
2241 | if (!isnullstartblock(ap->gotp->br_startblock)) { | 2099 | if (!isnullstartblock(ap->got.br_startblock)) { |
2242 | /* | 2100 | /* |
2243 | * Calculate gap to start of next block. | 2101 | * Calculate gap to start of next block. |
2244 | */ | 2102 | */ |
2245 | adjust = gotdiff = ap->gotp->br_startoff - ap->off; | 2103 | adjust = gotdiff = ap->got.br_startoff - ap->offset; |
2246 | /* | 2104 | /* |
2247 | * Figure the startblock based on the next block's | 2105 | * Figure the startblock based on the next block's |
2248 | * start and the gap size. | 2106 | * start and the gap size. |
2249 | */ | 2107 | */ |
2250 | gotbno = ap->gotp->br_startblock; | 2108 | gotbno = ap->got.br_startblock; |
2251 | /* | 2109 | /* |
2252 | * Heuristic! | 2110 | * Heuristic! |
2253 | * If the gap is large relative to the piece we're | 2111 | * If the gap is large relative to the piece we're |
@@ -2255,12 +2113,12 @@ xfs_bmap_adjacent( | |||
2255 | * number, then just use the start of the next block | 2113 | * number, then just use the start of the next block |
2256 | * offset by our length. | 2114 | * offset by our length. |
2257 | */ | 2115 | */ |
2258 | if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->alen && | 2116 | if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && |
2259 | ISVALID(gotbno - gotdiff, gotbno)) | 2117 | ISVALID(gotbno - gotdiff, gotbno)) |
2260 | gotbno -= adjust; | 2118 | gotbno -= adjust; |
2261 | else if (ISVALID(gotbno - ap->alen, gotbno)) { | 2119 | else if (ISVALID(gotbno - ap->length, gotbno)) { |
2262 | gotbno -= ap->alen; | 2120 | gotbno -= ap->length; |
2263 | gotdiff += adjust - ap->alen; | 2121 | gotdiff += adjust - ap->length; |
2264 | } else | 2122 | } else |
2265 | gotdiff += adjust; | 2123 | gotdiff += adjust; |
2266 | /* | 2124 | /* |
@@ -2278,14 +2136,14 @@ xfs_bmap_adjacent( | |||
2278 | gotbno = NULLFSBLOCK; | 2136 | gotbno = NULLFSBLOCK; |
2279 | /* | 2137 | /* |
2280 | * If both valid, pick the better one, else the only good | 2138 | * If both valid, pick the better one, else the only good |
2281 | * one, else ap->rval is already set (to 0 or the inode block). | 2139 | * one, else ap->blkno is already set (to 0 or the inode block). |
2282 | */ | 2140 | */ |
2283 | if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) | 2141 | if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) |
2284 | ap->rval = prevdiff <= gotdiff ? prevbno : gotbno; | 2142 | ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; |
2285 | else if (prevbno != NULLFSBLOCK) | 2143 | else if (prevbno != NULLFSBLOCK) |
2286 | ap->rval = prevbno; | 2144 | ap->blkno = prevbno; |
2287 | else if (gotbno != NULLFSBLOCK) | 2145 | else if (gotbno != NULLFSBLOCK) |
2288 | ap->rval = gotbno; | 2146 | ap->blkno = gotbno; |
2289 | } | 2147 | } |
2290 | #undef ISVALID | 2148 | #undef ISVALID |
2291 | } | 2149 | } |
@@ -2305,24 +2163,24 @@ xfs_bmap_rtalloc( | |||
2305 | mp = ap->ip->i_mount; | 2163 | mp = ap->ip->i_mount; |
2306 | align = xfs_get_extsz_hint(ap->ip); | 2164 | align = xfs_get_extsz_hint(ap->ip); |
2307 | prod = align / mp->m_sb.sb_rextsize; | 2165 | prod = align / mp->m_sb.sb_rextsize; |
2308 | error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, | 2166 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, |
2309 | align, 1, ap->eof, 0, | 2167 | align, 1, ap->eof, 0, |
2310 | ap->conv, &ap->off, &ap->alen); | 2168 | ap->conv, &ap->offset, &ap->length); |
2311 | if (error) | 2169 | if (error) |
2312 | return error; | 2170 | return error; |
2313 | ASSERT(ap->alen); | 2171 | ASSERT(ap->length); |
2314 | ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0); | 2172 | ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); |
2315 | 2173 | ||
2316 | /* | 2174 | /* |
2317 | * If the offset & length are not perfectly aligned | 2175 | * If the offset & length are not perfectly aligned |
2318 | * then kill prod, it will just get us in trouble. | 2176 | * then kill prod, it will just get us in trouble. |
2319 | */ | 2177 | */ |
2320 | if (do_mod(ap->off, align) || ap->alen % align) | 2178 | if (do_mod(ap->offset, align) || ap->length % align) |
2321 | prod = 1; | 2179 | prod = 1; |
2322 | /* | 2180 | /* |
2323 | * Set ralen to be the actual requested length in rtextents. | 2181 | * Set ralen to be the actual requested length in rtextents. |
2324 | */ | 2182 | */ |
2325 | ralen = ap->alen / mp->m_sb.sb_rextsize; | 2183 | ralen = ap->length / mp->m_sb.sb_rextsize; |
2326 | /* | 2184 | /* |
2327 | * If the old value was close enough to MAXEXTLEN that | 2185 | * If the old value was close enough to MAXEXTLEN that |
2328 | * we rounded up to it, cut it back so it's valid again. | 2186 | * we rounded up to it, cut it back so it's valid again. |
@@ -2337,21 +2195,21 @@ xfs_bmap_rtalloc( | |||
2337 | * Lock out other modifications to the RT bitmap inode. | 2195 | * Lock out other modifications to the RT bitmap inode. |
2338 | */ | 2196 | */ |
2339 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); | 2197 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); |
2340 | xfs_trans_ijoin_ref(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); | 2198 | xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); |
2341 | 2199 | ||
2342 | /* | 2200 | /* |
2343 | * If it's an allocation to an empty file at offset 0, | 2201 | * If it's an allocation to an empty file at offset 0, |
2344 | * pick an extent that will space things out in the rt area. | 2202 | * pick an extent that will space things out in the rt area. |
2345 | */ | 2203 | */ |
2346 | if (ap->eof && ap->off == 0) { | 2204 | if (ap->eof && ap->offset == 0) { |
2347 | xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ | 2205 | xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ |
2348 | 2206 | ||
2349 | error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); | 2207 | error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); |
2350 | if (error) | 2208 | if (error) |
2351 | return error; | 2209 | return error; |
2352 | ap->rval = rtx * mp->m_sb.sb_rextsize; | 2210 | ap->blkno = rtx * mp->m_sb.sb_rextsize; |
2353 | } else { | 2211 | } else { |
2354 | ap->rval = 0; | 2212 | ap->blkno = 0; |
2355 | } | 2213 | } |
2356 | 2214 | ||
2357 | xfs_bmap_adjacent(ap); | 2215 | xfs_bmap_adjacent(ap); |
@@ -2359,23 +2217,23 @@ xfs_bmap_rtalloc( | |||
2359 | /* | 2217 | /* |
2360 | * Realtime allocation, done through xfs_rtallocate_extent. | 2218 | * Realtime allocation, done through xfs_rtallocate_extent. |
2361 | */ | 2219 | */ |
2362 | atype = ap->rval == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; | 2220 | atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; |
2363 | do_div(ap->rval, mp->m_sb.sb_rextsize); | 2221 | do_div(ap->blkno, mp->m_sb.sb_rextsize); |
2364 | rtb = ap->rval; | 2222 | rtb = ap->blkno; |
2365 | ap->alen = ralen; | 2223 | ap->length = ralen; |
2366 | if ((error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen, | 2224 | if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, |
2367 | &ralen, atype, ap->wasdel, prod, &rtb))) | 2225 | &ralen, atype, ap->wasdel, prod, &rtb))) |
2368 | return error; | 2226 | return error; |
2369 | if (rtb == NULLFSBLOCK && prod > 1 && | 2227 | if (rtb == NULLFSBLOCK && prod > 1 && |
2370 | (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, | 2228 | (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, |
2371 | ap->alen, &ralen, atype, | 2229 | ap->length, &ralen, atype, |
2372 | ap->wasdel, 1, &rtb))) | 2230 | ap->wasdel, 1, &rtb))) |
2373 | return error; | 2231 | return error; |
2374 | ap->rval = rtb; | 2232 | ap->blkno = rtb; |
2375 | if (ap->rval != NULLFSBLOCK) { | 2233 | if (ap->blkno != NULLFSBLOCK) { |
2376 | ap->rval *= mp->m_sb.sb_rextsize; | 2234 | ap->blkno *= mp->m_sb.sb_rextsize; |
2377 | ralen *= mp->m_sb.sb_rextsize; | 2235 | ralen *= mp->m_sb.sb_rextsize; |
2378 | ap->alen = ralen; | 2236 | ap->length = ralen; |
2379 | ap->ip->i_d.di_nblocks += ralen; | 2237 | ap->ip->i_d.di_nblocks += ralen; |
2380 | xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); | 2238 | xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); |
2381 | if (ap->wasdel) | 2239 | if (ap->wasdel) |
@@ -2388,7 +2246,7 @@ xfs_bmap_rtalloc( | |||
2388 | ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : | 2246 | ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : |
2389 | XFS_TRANS_DQ_RTBCOUNT, (long) ralen); | 2247 | XFS_TRANS_DQ_RTBCOUNT, (long) ralen); |
2390 | } else { | 2248 | } else { |
2391 | ap->alen = 0; | 2249 | ap->length = 0; |
2392 | } | 2250 | } |
2393 | return 0; | 2251 | return 0; |
2394 | } | 2252 | } |
@@ -2503,7 +2361,7 @@ xfs_bmap_btalloc_nullfb( | |||
2503 | * AG as the stream may have moved. | 2361 | * AG as the stream may have moved. |
2504 | */ | 2362 | */ |
2505 | if (xfs_inode_is_filestream(ap->ip)) | 2363 | if (xfs_inode_is_filestream(ap->ip)) |
2506 | ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); | 2364 | ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); |
2507 | 2365 | ||
2508 | return 0; | 2366 | return 0; |
2509 | } | 2367 | } |
@@ -2528,52 +2386,52 @@ xfs_bmap_btalloc( | |||
2528 | mp = ap->ip->i_mount; | 2386 | mp = ap->ip->i_mount; |
2529 | align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; | 2387 | align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; |
2530 | if (unlikely(align)) { | 2388 | if (unlikely(align)) { |
2531 | error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp, | 2389 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, |
2532 | align, 0, ap->eof, 0, ap->conv, | 2390 | align, 0, ap->eof, 0, ap->conv, |
2533 | &ap->off, &ap->alen); | 2391 | &ap->offset, &ap->length); |
2534 | ASSERT(!error); | 2392 | ASSERT(!error); |
2535 | ASSERT(ap->alen); | 2393 | ASSERT(ap->length); |
2536 | } | 2394 | } |
2537 | nullfb = ap->firstblock == NULLFSBLOCK; | 2395 | nullfb = *ap->firstblock == NULLFSBLOCK; |
2538 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); | 2396 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); |
2539 | if (nullfb) { | 2397 | if (nullfb) { |
2540 | if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { | 2398 | if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { |
2541 | ag = xfs_filestream_lookup_ag(ap->ip); | 2399 | ag = xfs_filestream_lookup_ag(ap->ip); |
2542 | ag = (ag != NULLAGNUMBER) ? ag : 0; | 2400 | ag = (ag != NULLAGNUMBER) ? ag : 0; |
2543 | ap->rval = XFS_AGB_TO_FSB(mp, ag, 0); | 2401 | ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); |
2544 | } else { | 2402 | } else { |
2545 | ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); | 2403 | ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); |
2546 | } | 2404 | } |
2547 | } else | 2405 | } else |
2548 | ap->rval = ap->firstblock; | 2406 | ap->blkno = *ap->firstblock; |
2549 | 2407 | ||
2550 | xfs_bmap_adjacent(ap); | 2408 | xfs_bmap_adjacent(ap); |
2551 | 2409 | ||
2552 | /* | 2410 | /* |
2553 | * If allowed, use ap->rval; otherwise must use firstblock since | 2411 | * If allowed, use ap->blkno; otherwise must use firstblock since |
2554 | * it's in the right allocation group. | 2412 | * it's in the right allocation group. |
2555 | */ | 2413 | */ |
2556 | if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno) | 2414 | if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) |
2557 | ; | 2415 | ; |
2558 | else | 2416 | else |
2559 | ap->rval = ap->firstblock; | 2417 | ap->blkno = *ap->firstblock; |
2560 | /* | 2418 | /* |
2561 | * Normal allocation, done through xfs_alloc_vextent. | 2419 | * Normal allocation, done through xfs_alloc_vextent. |
2562 | */ | 2420 | */ |
2563 | tryagain = isaligned = 0; | 2421 | tryagain = isaligned = 0; |
2564 | args.tp = ap->tp; | 2422 | args.tp = ap->tp; |
2565 | args.mp = mp; | 2423 | args.mp = mp; |
2566 | args.fsbno = ap->rval; | 2424 | args.fsbno = ap->blkno; |
2567 | 2425 | ||
2568 | /* Trim the allocation back to the maximum an AG can fit. */ | 2426 | /* Trim the allocation back to the maximum an AG can fit. */ |
2569 | args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp)); | 2427 | args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); |
2570 | args.firstblock = ap->firstblock; | 2428 | args.firstblock = *ap->firstblock; |
2571 | blen = 0; | 2429 | blen = 0; |
2572 | if (nullfb) { | 2430 | if (nullfb) { |
2573 | error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); | 2431 | error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); |
2574 | if (error) | 2432 | if (error) |
2575 | return error; | 2433 | return error; |
2576 | } else if (ap->low) { | 2434 | } else if (ap->flist->xbf_low) { |
2577 | if (xfs_inode_is_filestream(ap->ip)) | 2435 | if (xfs_inode_is_filestream(ap->ip)) |
2578 | args.type = XFS_ALLOCTYPE_FIRST_AG; | 2436 | args.type = XFS_ALLOCTYPE_FIRST_AG; |
2579 | else | 2437 | else |
@@ -2587,14 +2445,14 @@ xfs_bmap_btalloc( | |||
2587 | /* apply extent size hints if obtained earlier */ | 2445 | /* apply extent size hints if obtained earlier */ |
2588 | if (unlikely(align)) { | 2446 | if (unlikely(align)) { |
2589 | args.prod = align; | 2447 | args.prod = align; |
2590 | if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) | 2448 | if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) |
2591 | args.mod = (xfs_extlen_t)(args.prod - args.mod); | 2449 | args.mod = (xfs_extlen_t)(args.prod - args.mod); |
2592 | } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { | 2450 | } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { |
2593 | args.prod = 1; | 2451 | args.prod = 1; |
2594 | args.mod = 0; | 2452 | args.mod = 0; |
2595 | } else { | 2453 | } else { |
2596 | args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; | 2454 | args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; |
2597 | if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod)))) | 2455 | if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod)))) |
2598 | args.mod = (xfs_extlen_t)(args.prod - args.mod); | 2456 | args.mod = (xfs_extlen_t)(args.prod - args.mod); |
2599 | } | 2457 | } |
2600 | /* | 2458 | /* |
@@ -2606,8 +2464,8 @@ xfs_bmap_btalloc( | |||
2606 | * is >= the stripe unit and the allocation offset is | 2464 | * is >= the stripe unit and the allocation offset is |
2607 | * at the end of file. | 2465 | * at the end of file. |
2608 | */ | 2466 | */ |
2609 | if (!ap->low && ap->aeof) { | 2467 | if (!ap->flist->xbf_low && ap->aeof) { |
2610 | if (!ap->off) { | 2468 | if (!ap->offset) { |
2611 | args.alignment = mp->m_dalign; | 2469 | args.alignment = mp->m_dalign; |
2612 | atype = args.type; | 2470 | atype = args.type; |
2613 | isaligned = 1; | 2471 | isaligned = 1; |
@@ -2660,7 +2518,7 @@ xfs_bmap_btalloc( | |||
2660 | * turned on. | 2518 | * turned on. |
2661 | */ | 2519 | */ |
2662 | args.type = atype; | 2520 | args.type = atype; |
2663 | args.fsbno = ap->rval; | 2521 | args.fsbno = ap->blkno; |
2664 | args.alignment = mp->m_dalign; | 2522 | args.alignment = mp->m_dalign; |
2665 | args.minlen = nextminlen; | 2523 | args.minlen = nextminlen; |
2666 | args.minalignslop = 0; | 2524 | args.minalignslop = 0; |
@@ -2674,7 +2532,7 @@ xfs_bmap_btalloc( | |||
2674 | * try again. | 2532 | * try again. |
2675 | */ | 2533 | */ |
2676 | args.type = atype; | 2534 | args.type = atype; |
2677 | args.fsbno = ap->rval; | 2535 | args.fsbno = ap->blkno; |
2678 | args.alignment = 0; | 2536 | args.alignment = 0; |
2679 | if ((error = xfs_alloc_vextent(&args))) | 2537 | if ((error = xfs_alloc_vextent(&args))) |
2680 | return error; | 2538 | return error; |
@@ -2683,7 +2541,7 @@ xfs_bmap_btalloc( | |||
2683 | args.minlen > ap->minlen) { | 2541 | args.minlen > ap->minlen) { |
2684 | args.minlen = ap->minlen; | 2542 | args.minlen = ap->minlen; |
2685 | args.type = XFS_ALLOCTYPE_START_BNO; | 2543 | args.type = XFS_ALLOCTYPE_START_BNO; |
2686 | args.fsbno = ap->rval; | 2544 | args.fsbno = ap->blkno; |
2687 | if ((error = xfs_alloc_vextent(&args))) | 2545 | if ((error = xfs_alloc_vextent(&args))) |
2688 | return error; | 2546 | return error; |
2689 | } | 2547 | } |
@@ -2694,13 +2552,26 @@ xfs_bmap_btalloc( | |||
2694 | args.minleft = 0; | 2552 | args.minleft = 0; |
2695 | if ((error = xfs_alloc_vextent(&args))) | 2553 | if ((error = xfs_alloc_vextent(&args))) |
2696 | return error; | 2554 | return error; |
2697 | ap->low = 1; | 2555 | ap->flist->xbf_low = 1; |
2698 | } | 2556 | } |
2699 | if (args.fsbno != NULLFSBLOCK) { | 2557 | if (args.fsbno != NULLFSBLOCK) { |
2700 | ap->firstblock = ap->rval = args.fsbno; | 2558 | /* |
2559 | * check the allocation happened at the same or higher AG than | ||
2560 | * the first block that was allocated. | ||
2561 | */ | ||
2562 | ASSERT(*ap->firstblock == NULLFSBLOCK || | ||
2563 | XFS_FSB_TO_AGNO(mp, *ap->firstblock) == | ||
2564 | XFS_FSB_TO_AGNO(mp, args.fsbno) || | ||
2565 | (ap->flist->xbf_low && | ||
2566 | XFS_FSB_TO_AGNO(mp, *ap->firstblock) < | ||
2567 | XFS_FSB_TO_AGNO(mp, args.fsbno))); | ||
2568 | |||
2569 | ap->blkno = args.fsbno; | ||
2570 | if (*ap->firstblock == NULLFSBLOCK) | ||
2571 | *ap->firstblock = args.fsbno; | ||
2701 | ASSERT(nullfb || fb_agno == args.agno || | 2572 | ASSERT(nullfb || fb_agno == args.agno || |
2702 | (ap->low && fb_agno < args.agno)); | 2573 | (ap->flist->xbf_low && fb_agno < args.agno)); |
2703 | ap->alen = args.len; | 2574 | ap->length = args.len; |
2704 | ap->ip->i_d.di_nblocks += args.len; | 2575 | ap->ip->i_d.di_nblocks += args.len; |
2705 | xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); | 2576 | xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); |
2706 | if (ap->wasdel) | 2577 | if (ap->wasdel) |
@@ -2714,8 +2585,8 @@ xfs_bmap_btalloc( | |||
2714 | XFS_TRANS_DQ_BCOUNT, | 2585 | XFS_TRANS_DQ_BCOUNT, |
2715 | (long) args.len); | 2586 | (long) args.len); |
2716 | } else { | 2587 | } else { |
2717 | ap->rval = NULLFSBLOCK; | 2588 | ap->blkno = NULLFSBLOCK; |
2718 | ap->alen = 0; | 2589 | ap->length = 0; |
2719 | } | 2590 | } |
2720 | return 0; | 2591 | return 0; |
2721 | } | 2592 | } |
@@ -3383,8 +3254,7 @@ xfs_bmap_local_to_extents( | |||
3383 | ASSERT(args.len == 1); | 3254 | ASSERT(args.len == 1); |
3384 | *firstblock = args.fsbno; | 3255 | *firstblock = args.fsbno; |
3385 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); | 3256 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); |
3386 | memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data, | 3257 | memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); |
3387 | ifp->if_bytes); | ||
3388 | xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); | 3258 | xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); |
3389 | xfs_bmap_forkoff_reset(args.mp, ip, whichfork); | 3259 | xfs_bmap_forkoff_reset(args.mp, ip, whichfork); |
3390 | xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); | 3260 | xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); |
@@ -3590,7 +3460,7 @@ xfs_bmap_add_attrfork( | |||
3590 | } | 3460 | } |
3591 | ASSERT(ip->i_d.di_anextents == 0); | 3461 | ASSERT(ip->i_d.di_anextents == 0); |
3592 | 3462 | ||
3593 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); | 3463 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
3594 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 3464 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
3595 | 3465 | ||
3596 | switch (ip->i_d.di_format) { | 3466 | switch (ip->i_d.di_format) { |
@@ -3783,19 +3653,11 @@ xfs_bmap_compute_maxlevels( | |||
3783 | * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi | 3653 | * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi |
3784 | * caller. Frees all the extents that need freeing, which must be done | 3654 | * caller. Frees all the extents that need freeing, which must be done |
3785 | * last due to locking considerations. We never free any extents in | 3655 | * last due to locking considerations. We never free any extents in |
3786 | * the first transaction. This is to allow the caller to make the first | 3656 | * the first transaction. |
3787 | * transaction a synchronous one so that the pointers to the data being | ||
3788 | * broken in this transaction will be permanent before the data is actually | ||
3789 | * freed. This is necessary to prevent blocks from being reallocated | ||
3790 | * and written to before the free and reallocation are actually permanent. | ||
3791 | * We do not just make the first transaction synchronous here, because | ||
3792 | * there are more efficient ways to gain the same protection in some cases | ||
3793 | * (see the file truncation code). | ||
3794 | * | 3657 | * |
3795 | * Return 1 if the given transaction was committed and a new one | 3658 | * Return 1 if the given transaction was committed and a new one |
3796 | * started, and 0 otherwise in the committed parameter. | 3659 | * started, and 0 otherwise in the committed parameter. |
3797 | */ | 3660 | */ |
3798 | /*ARGSUSED*/ | ||
3799 | int /* error */ | 3661 | int /* error */ |
3800 | xfs_bmap_finish( | 3662 | xfs_bmap_finish( |
3801 | xfs_trans_t **tp, /* transaction pointer addr */ | 3663 | xfs_trans_t **tp, /* transaction pointer addr */ |
@@ -3995,42 +3857,122 @@ xfs_bmap_last_before( | |||
3995 | return 0; | 3857 | return 0; |
3996 | } | 3858 | } |
3997 | 3859 | ||
3860 | STATIC int | ||
3861 | xfs_bmap_last_extent( | ||
3862 | struct xfs_trans *tp, | ||
3863 | struct xfs_inode *ip, | ||
3864 | int whichfork, | ||
3865 | struct xfs_bmbt_irec *rec, | ||
3866 | int *is_empty) | ||
3867 | { | ||
3868 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); | ||
3869 | int error; | ||
3870 | int nextents; | ||
3871 | |||
3872 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
3873 | error = xfs_iread_extents(tp, ip, whichfork); | ||
3874 | if (error) | ||
3875 | return error; | ||
3876 | } | ||
3877 | |||
3878 | nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | ||
3879 | if (nextents == 0) { | ||
3880 | *is_empty = 1; | ||
3881 | return 0; | ||
3882 | } | ||
3883 | |||
3884 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec); | ||
3885 | *is_empty = 0; | ||
3886 | return 0; | ||
3887 | } | ||
3888 | |||
3889 | /* | ||
3890 | * Check the last inode extent to determine whether this allocation will result | ||
3891 | * in blocks being allocated at the end of the file. When we allocate new data | ||
3892 | * blocks at the end of the file which do not start at the previous data block, | ||
3893 | * we will try to align the new blocks at stripe unit boundaries. | ||
3894 | * | ||
3895 | * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be | ||
3896 | * at, or past the EOF. | ||
3897 | */ | ||
3898 | STATIC int | ||
3899 | xfs_bmap_isaeof( | ||
3900 | struct xfs_bmalloca *bma, | ||
3901 | int whichfork) | ||
3902 | { | ||
3903 | struct xfs_bmbt_irec rec; | ||
3904 | int is_empty; | ||
3905 | int error; | ||
3906 | |||
3907 | bma->aeof = 0; | ||
3908 | error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, | ||
3909 | &is_empty); | ||
3910 | if (error || is_empty) | ||
3911 | return error; | ||
3912 | |||
3913 | /* | ||
3914 | * Check if we are allocation or past the last extent, or at least into | ||
3915 | * the last delayed allocated extent. | ||
3916 | */ | ||
3917 | bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || | ||
3918 | (bma->offset >= rec.br_startoff && | ||
3919 | isnullstartblock(rec.br_startblock)); | ||
3920 | return 0; | ||
3921 | } | ||
3922 | |||
3923 | /* | ||
3924 | * Check if the endoff is outside the last extent. If so the caller will grow | ||
3925 | * the allocation to a stripe unit boundary. All offsets are considered outside | ||
3926 | * the end of file for an empty fork, so 1 is returned in *eof in that case. | ||
3927 | */ | ||
3928 | int | ||
3929 | xfs_bmap_eof( | ||
3930 | struct xfs_inode *ip, | ||
3931 | xfs_fileoff_t endoff, | ||
3932 | int whichfork, | ||
3933 | int *eof) | ||
3934 | { | ||
3935 | struct xfs_bmbt_irec rec; | ||
3936 | int error; | ||
3937 | |||
3938 | error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); | ||
3939 | if (error || *eof) | ||
3940 | return error; | ||
3941 | |||
3942 | *eof = endoff >= rec.br_startoff + rec.br_blockcount; | ||
3943 | return 0; | ||
3944 | } | ||
3945 | |||
3998 | /* | 3946 | /* |
3999 | * Returns the file-relative block number of the first block past eof in | 3947 | * Returns the file-relative block number of the first block past eof in |
4000 | * the file. This is not based on i_size, it is based on the extent records. | 3948 | * the file. This is not based on i_size, it is based on the extent records. |
4001 | * Returns 0 for local files, as they do not have extent records. | 3949 | * Returns 0 for local files, as they do not have extent records. |
4002 | */ | 3950 | */ |
4003 | int /* error */ | 3951 | int |
4004 | xfs_bmap_last_offset( | 3952 | xfs_bmap_last_offset( |
4005 | xfs_trans_t *tp, /* transaction pointer */ | 3953 | struct xfs_trans *tp, |
4006 | xfs_inode_t *ip, /* incore inode */ | 3954 | struct xfs_inode *ip, |
4007 | xfs_fileoff_t *last_block, /* last block */ | 3955 | xfs_fileoff_t *last_block, |
4008 | int whichfork) /* data or attr fork */ | 3956 | int whichfork) |
4009 | { | 3957 | { |
4010 | xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ | 3958 | struct xfs_bmbt_irec rec; |
4011 | int error; /* error return value */ | 3959 | int is_empty; |
4012 | xfs_ifork_t *ifp; /* inode fork pointer */ | 3960 | int error; |
4013 | xfs_extnum_t nextents; /* number of extent entries */ | 3961 | |
3962 | *last_block = 0; | ||
3963 | |||
3964 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) | ||
3965 | return 0; | ||
4014 | 3966 | ||
4015 | if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && | 3967 | if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && |
4016 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | 3968 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) |
4017 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) | ||
4018 | return XFS_ERROR(EIO); | 3969 | return XFS_ERROR(EIO); |
4019 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { | 3970 | |
4020 | *last_block = 0; | 3971 | error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); |
4021 | return 0; | 3972 | if (error || is_empty) |
4022 | } | ||
4023 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
4024 | if (!(ifp->if_flags & XFS_IFEXTENTS) && | ||
4025 | (error = xfs_iread_extents(tp, ip, whichfork))) | ||
4026 | return error; | 3973 | return error; |
4027 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 3974 | |
4028 | if (!nextents) { | 3975 | *last_block = rec.br_startoff + rec.br_blockcount; |
4029 | *last_block = 0; | ||
4030 | return 0; | ||
4031 | } | ||
4032 | ep = xfs_iext_get_ext(ifp, nextents - 1); | ||
4033 | *last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep); | ||
4034 | return 0; | 3976 | return 0; |
4035 | } | 3977 | } |
4036 | 3978 | ||
@@ -4160,7 +4102,6 @@ xfs_bmap_read_extents( | |||
4160 | xfs_extnum_t num_recs; | 4102 | xfs_extnum_t num_recs; |
4161 | xfs_extnum_t start; | 4103 | xfs_extnum_t start; |
4162 | 4104 | ||
4163 | |||
4164 | num_recs = xfs_btree_get_numrecs(block); | 4105 | num_recs = xfs_btree_get_numrecs(block); |
4165 | if (unlikely(i + num_recs > room)) { | 4106 | if (unlikely(i + num_recs > room)) { |
4166 | ASSERT(i + num_recs <= room); | 4107 | ASSERT(i + num_recs <= room); |
@@ -4283,9 +4224,8 @@ xfs_bmap_validate_ret( | |||
4283 | ASSERT(i == 0 || | 4224 | ASSERT(i == 0 || |
4284 | mval[i - 1].br_startoff + mval[i - 1].br_blockcount == | 4225 | mval[i - 1].br_startoff + mval[i - 1].br_blockcount == |
4285 | mval[i].br_startoff); | 4226 | mval[i].br_startoff); |
4286 | if ((flags & XFS_BMAPI_WRITE) && !(flags & XFS_BMAPI_DELAY)) | 4227 | ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && |
4287 | ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && | 4228 | mval[i].br_startblock != HOLESTARTBLOCK); |
4288 | mval[i].br_startblock != HOLESTARTBLOCK); | ||
4289 | ASSERT(mval[i].br_state == XFS_EXT_NORM || | 4229 | ASSERT(mval[i].br_state == XFS_EXT_NORM || |
4290 | mval[i].br_state == XFS_EXT_UNWRITTEN); | 4230 | mval[i].br_state == XFS_EXT_UNWRITTEN); |
4291 | } | 4231 | } |
@@ -4294,66 +4234,609 @@ xfs_bmap_validate_ret( | |||
4294 | 4234 | ||
4295 | 4235 | ||
4296 | /* | 4236 | /* |
4297 | * Map file blocks to filesystem blocks. | 4237 | * Trim the returned map to the required bounds |
4298 | * File range is given by the bno/len pair. | 4238 | */ |
4299 | * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set) | 4239 | STATIC void |
4300 | * into a hole or past eof. | 4240 | xfs_bmapi_trim_map( |
4301 | * Only allocates blocks from a single allocation group, | 4241 | struct xfs_bmbt_irec *mval, |
4302 | * to avoid locking problems. | 4242 | struct xfs_bmbt_irec *got, |
4243 | xfs_fileoff_t *bno, | ||
4244 | xfs_filblks_t len, | ||
4245 | xfs_fileoff_t obno, | ||
4246 | xfs_fileoff_t end, | ||
4247 | int n, | ||
4248 | int flags) | ||
4249 | { | ||
4250 | if ((flags & XFS_BMAPI_ENTIRE) || | ||
4251 | got->br_startoff + got->br_blockcount <= obno) { | ||
4252 | *mval = *got; | ||
4253 | if (isnullstartblock(got->br_startblock)) | ||
4254 | mval->br_startblock = DELAYSTARTBLOCK; | ||
4255 | return; | ||
4256 | } | ||
4257 | |||
4258 | if (obno > *bno) | ||
4259 | *bno = obno; | ||
4260 | ASSERT((*bno >= obno) || (n == 0)); | ||
4261 | ASSERT(*bno < end); | ||
4262 | mval->br_startoff = *bno; | ||
4263 | if (isnullstartblock(got->br_startblock)) | ||
4264 | mval->br_startblock = DELAYSTARTBLOCK; | ||
4265 | else | ||
4266 | mval->br_startblock = got->br_startblock + | ||
4267 | (*bno - got->br_startoff); | ||
4268 | /* | ||
4269 | * Return the minimum of what we got and what we asked for for | ||
4270 | * the length. We can use the len variable here because it is | ||
4271 | * modified below and we could have been there before coming | ||
4272 | * here if the first part of the allocation didn't overlap what | ||
4273 | * was asked for. | ||
4274 | */ | ||
4275 | mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno, | ||
4276 | got->br_blockcount - (*bno - got->br_startoff)); | ||
4277 | mval->br_state = got->br_state; | ||
4278 | ASSERT(mval->br_blockcount <= len); | ||
4279 | return; | ||
4280 | } | ||
4281 | |||
4282 | /* | ||
4283 | * Update and validate the extent map to return | ||
4284 | */ | ||
4285 | STATIC void | ||
4286 | xfs_bmapi_update_map( | ||
4287 | struct xfs_bmbt_irec **map, | ||
4288 | xfs_fileoff_t *bno, | ||
4289 | xfs_filblks_t *len, | ||
4290 | xfs_fileoff_t obno, | ||
4291 | xfs_fileoff_t end, | ||
4292 | int *n, | ||
4293 | int flags) | ||
4294 | { | ||
4295 | xfs_bmbt_irec_t *mval = *map; | ||
4296 | |||
4297 | ASSERT((flags & XFS_BMAPI_ENTIRE) || | ||
4298 | ((mval->br_startoff + mval->br_blockcount) <= end)); | ||
4299 | ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) || | ||
4300 | (mval->br_startoff < obno)); | ||
4301 | |||
4302 | *bno = mval->br_startoff + mval->br_blockcount; | ||
4303 | *len = end - *bno; | ||
4304 | if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) { | ||
4305 | /* update previous map with new information */ | ||
4306 | ASSERT(mval->br_startblock == mval[-1].br_startblock); | ||
4307 | ASSERT(mval->br_blockcount > mval[-1].br_blockcount); | ||
4308 | ASSERT(mval->br_state == mval[-1].br_state); | ||
4309 | mval[-1].br_blockcount = mval->br_blockcount; | ||
4310 | mval[-1].br_state = mval->br_state; | ||
4311 | } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK && | ||
4312 | mval[-1].br_startblock != DELAYSTARTBLOCK && | ||
4313 | mval[-1].br_startblock != HOLESTARTBLOCK && | ||
4314 | mval->br_startblock == mval[-1].br_startblock + | ||
4315 | mval[-1].br_blockcount && | ||
4316 | ((flags & XFS_BMAPI_IGSTATE) || | ||
4317 | mval[-1].br_state == mval->br_state)) { | ||
4318 | ASSERT(mval->br_startoff == | ||
4319 | mval[-1].br_startoff + mval[-1].br_blockcount); | ||
4320 | mval[-1].br_blockcount += mval->br_blockcount; | ||
4321 | } else if (*n > 0 && | ||
4322 | mval->br_startblock == DELAYSTARTBLOCK && | ||
4323 | mval[-1].br_startblock == DELAYSTARTBLOCK && | ||
4324 | mval->br_startoff == | ||
4325 | mval[-1].br_startoff + mval[-1].br_blockcount) { | ||
4326 | mval[-1].br_blockcount += mval->br_blockcount; | ||
4327 | mval[-1].br_state = mval->br_state; | ||
4328 | } else if (!((*n == 0) && | ||
4329 | ((mval->br_startoff + mval->br_blockcount) <= | ||
4330 | obno))) { | ||
4331 | mval++; | ||
4332 | (*n)++; | ||
4333 | } | ||
4334 | *map = mval; | ||
4335 | } | ||
4336 | |||
4337 | /* | ||
4338 | * Map file blocks to filesystem blocks without allocation. | ||
4339 | */ | ||
4340 | int | ||
4341 | xfs_bmapi_read( | ||
4342 | struct xfs_inode *ip, | ||
4343 | xfs_fileoff_t bno, | ||
4344 | xfs_filblks_t len, | ||
4345 | struct xfs_bmbt_irec *mval, | ||
4346 | int *nmap, | ||
4347 | int flags) | ||
4348 | { | ||
4349 | struct xfs_mount *mp = ip->i_mount; | ||
4350 | struct xfs_ifork *ifp; | ||
4351 | struct xfs_bmbt_irec got; | ||
4352 | struct xfs_bmbt_irec prev; | ||
4353 | xfs_fileoff_t obno; | ||
4354 | xfs_fileoff_t end; | ||
4355 | xfs_extnum_t lastx; | ||
4356 | int error; | ||
4357 | int eof; | ||
4358 | int n = 0; | ||
4359 | int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | ||
4360 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
4361 | |||
4362 | ASSERT(*nmap >= 1); | ||
4363 | ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| | ||
4364 | XFS_BMAPI_IGSTATE))); | ||
4365 | |||
4366 | if (unlikely(XFS_TEST_ERROR( | ||
4367 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | ||
4368 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), | ||
4369 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | ||
4370 | XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); | ||
4371 | return XFS_ERROR(EFSCORRUPTED); | ||
4372 | } | ||
4373 | |||
4374 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
4375 | return XFS_ERROR(EIO); | ||
4376 | |||
4377 | XFS_STATS_INC(xs_blk_mapr); | ||
4378 | |||
4379 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
4380 | ASSERT(ifp->if_ext_max == | ||
4381 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
4382 | |||
4383 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
4384 | error = xfs_iread_extents(NULL, ip, whichfork); | ||
4385 | if (error) | ||
4386 | return error; | ||
4387 | } | ||
4388 | |||
4389 | xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); | ||
4390 | end = bno + len; | ||
4391 | obno = bno; | ||
4392 | |||
4393 | while (bno < end && n < *nmap) { | ||
4394 | /* Reading past eof, act as though there's a hole up to end. */ | ||
4395 | if (eof) | ||
4396 | got.br_startoff = end; | ||
4397 | if (got.br_startoff > bno) { | ||
4398 | /* Reading in a hole. */ | ||
4399 | mval->br_startoff = bno; | ||
4400 | mval->br_startblock = HOLESTARTBLOCK; | ||
4401 | mval->br_blockcount = | ||
4402 | XFS_FILBLKS_MIN(len, got.br_startoff - bno); | ||
4403 | mval->br_state = XFS_EXT_NORM; | ||
4404 | bno += mval->br_blockcount; | ||
4405 | len -= mval->br_blockcount; | ||
4406 | mval++; | ||
4407 | n++; | ||
4408 | continue; | ||
4409 | } | ||
4410 | |||
4411 | /* set up the extent map to return. */ | ||
4412 | xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); | ||
4413 | xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); | ||
4414 | |||
4415 | /* If we're done, stop now. */ | ||
4416 | if (bno >= end || n >= *nmap) | ||
4417 | break; | ||
4418 | |||
4419 | /* Else go on to the next record. */ | ||
4420 | if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) | ||
4421 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); | ||
4422 | else | ||
4423 | eof = 1; | ||
4424 | } | ||
4425 | *nmap = n; | ||
4426 | return 0; | ||
4427 | } | ||
4428 | |||
4429 | STATIC int | ||
4430 | xfs_bmapi_reserve_delalloc( | ||
4431 | struct xfs_inode *ip, | ||
4432 | xfs_fileoff_t aoff, | ||
4433 | xfs_filblks_t len, | ||
4434 | struct xfs_bmbt_irec *got, | ||
4435 | struct xfs_bmbt_irec *prev, | ||
4436 | xfs_extnum_t *lastx, | ||
4437 | int eof) | ||
4438 | { | ||
4439 | struct xfs_mount *mp = ip->i_mount; | ||
4440 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
4441 | xfs_extlen_t alen; | ||
4442 | xfs_extlen_t indlen; | ||
4443 | char rt = XFS_IS_REALTIME_INODE(ip); | ||
4444 | xfs_extlen_t extsz; | ||
4445 | int error; | ||
4446 | |||
4447 | alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); | ||
4448 | if (!eof) | ||
4449 | alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); | ||
4450 | |||
4451 | /* Figure out the extent size, adjust alen */ | ||
4452 | extsz = xfs_get_extsz_hint(ip); | ||
4453 | if (extsz) { | ||
4454 | /* | ||
4455 | * Make sure we don't exceed a single extent length when we | ||
4456 | * align the extent by reducing length we are going to | ||
4457 | * allocate by the maximum amount extent size aligment may | ||
4458 | * require. | ||
4459 | */ | ||
4460 | alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1)); | ||
4461 | error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, | ||
4462 | 1, 0, &aoff, &alen); | ||
4463 | ASSERT(!error); | ||
4464 | } | ||
4465 | |||
4466 | if (rt) | ||
4467 | extsz = alen / mp->m_sb.sb_rextsize; | ||
4468 | |||
4469 | /* | ||
4470 | * Make a transaction-less quota reservation for delayed allocation | ||
4471 | * blocks. This number gets adjusted later. We return if we haven't | ||
4472 | * allocated blocks already inside this loop. | ||
4473 | */ | ||
4474 | error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, | ||
4475 | rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); | ||
4476 | if (error) | ||
4477 | return error; | ||
4478 | |||
4479 | /* | ||
4480 | * Split changing sb for alen and indlen since they could be coming | ||
4481 | * from different places. | ||
4482 | */ | ||
4483 | indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); | ||
4484 | ASSERT(indlen > 0); | ||
4485 | |||
4486 | if (rt) { | ||
4487 | error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, | ||
4488 | -((int64_t)extsz), 0); | ||
4489 | } else { | ||
4490 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | ||
4491 | -((int64_t)alen), 0); | ||
4492 | } | ||
4493 | |||
4494 | if (error) | ||
4495 | goto out_unreserve_quota; | ||
4496 | |||
4497 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | ||
4498 | -((int64_t)indlen), 0); | ||
4499 | if (error) | ||
4500 | goto out_unreserve_blocks; | ||
4501 | |||
4502 | |||
4503 | ip->i_delayed_blks += alen; | ||
4504 | |||
4505 | got->br_startoff = aoff; | ||
4506 | got->br_startblock = nullstartblock(indlen); | ||
4507 | got->br_blockcount = alen; | ||
4508 | got->br_state = XFS_EXT_NORM; | ||
4509 | xfs_bmap_add_extent_hole_delay(ip, lastx, got); | ||
4510 | |||
4511 | /* | ||
4512 | * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay | ||
4513 | * might have merged it into one of the neighbouring ones. | ||
4514 | */ | ||
4515 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); | ||
4516 | |||
4517 | ASSERT(got->br_startoff <= aoff); | ||
4518 | ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); | ||
4519 | ASSERT(isnullstartblock(got->br_startblock)); | ||
4520 | ASSERT(got->br_state == XFS_EXT_NORM); | ||
4521 | return 0; | ||
4522 | |||
4523 | out_unreserve_blocks: | ||
4524 | if (rt) | ||
4525 | xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); | ||
4526 | else | ||
4527 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); | ||
4528 | out_unreserve_quota: | ||
4529 | if (XFS_IS_QUOTA_ON(mp)) | ||
4530 | xfs_trans_unreserve_quota_nblks(NULL, ip, alen, 0, rt ? | ||
4531 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); | ||
4532 | return error; | ||
4533 | } | ||
4534 | |||
4535 | /* | ||
4536 | * Map file blocks to filesystem blocks, adding delayed allocations as needed. | ||
4537 | */ | ||
4538 | int | ||
4539 | xfs_bmapi_delay( | ||
4540 | struct xfs_inode *ip, /* incore inode */ | ||
4541 | xfs_fileoff_t bno, /* starting file offs. mapped */ | ||
4542 | xfs_filblks_t len, /* length to map in file */ | ||
4543 | struct xfs_bmbt_irec *mval, /* output: map values */ | ||
4544 | int *nmap, /* i/o: mval size/count */ | ||
4545 | int flags) /* XFS_BMAPI_... */ | ||
4546 | { | ||
4547 | struct xfs_mount *mp = ip->i_mount; | ||
4548 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
4549 | struct xfs_bmbt_irec got; /* current file extent record */ | ||
4550 | struct xfs_bmbt_irec prev; /* previous file extent record */ | ||
4551 | xfs_fileoff_t obno; /* old block number (offset) */ | ||
4552 | xfs_fileoff_t end; /* end of mapped file region */ | ||
4553 | xfs_extnum_t lastx; /* last useful extent number */ | ||
4554 | int eof; /* we've hit the end of extents */ | ||
4555 | int n = 0; /* current extent index */ | ||
4556 | int error = 0; | ||
4557 | |||
4558 | ASSERT(*nmap >= 1); | ||
4559 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); | ||
4560 | ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); | ||
4561 | |||
4562 | if (unlikely(XFS_TEST_ERROR( | ||
4563 | (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && | ||
4564 | XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), | ||
4565 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | ||
4566 | XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp); | ||
4567 | return XFS_ERROR(EFSCORRUPTED); | ||
4568 | } | ||
4569 | |||
4570 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
4571 | return XFS_ERROR(EIO); | ||
4572 | |||
4573 | XFS_STATS_INC(xs_blk_mapw); | ||
4574 | |||
4575 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
4576 | error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); | ||
4577 | if (error) | ||
4578 | return error; | ||
4579 | } | ||
4580 | |||
4581 | xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev); | ||
4582 | end = bno + len; | ||
4583 | obno = bno; | ||
4584 | |||
4585 | while (bno < end && n < *nmap) { | ||
4586 | if (eof || got.br_startoff > bno) { | ||
4587 | error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got, | ||
4588 | &prev, &lastx, eof); | ||
4589 | if (error) { | ||
4590 | if (n == 0) { | ||
4591 | *nmap = 0; | ||
4592 | return error; | ||
4593 | } | ||
4594 | break; | ||
4595 | } | ||
4596 | } | ||
4597 | |||
4598 | /* set up the extent map to return. */ | ||
4599 | xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); | ||
4600 | xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); | ||
4601 | |||
4602 | /* If we're done, stop now. */ | ||
4603 | if (bno >= end || n >= *nmap) | ||
4604 | break; | ||
4605 | |||
4606 | /* Else go on to the next record. */ | ||
4607 | prev = got; | ||
4608 | if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) | ||
4609 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); | ||
4610 | else | ||
4611 | eof = 1; | ||
4612 | } | ||
4613 | |||
4614 | *nmap = n; | ||
4615 | return 0; | ||
4616 | } | ||
4617 | |||
4618 | |||
4619 | STATIC int | ||
4620 | xfs_bmapi_allocate( | ||
4621 | struct xfs_bmalloca *bma, | ||
4622 | int flags) | ||
4623 | { | ||
4624 | struct xfs_mount *mp = bma->ip->i_mount; | ||
4625 | int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | ||
4626 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
4627 | struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); | ||
4628 | int tmp_logflags = 0; | ||
4629 | int error; | ||
4630 | int rt; | ||
4631 | |||
4632 | rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip); | ||
4633 | |||
4634 | /* | ||
4635 | * For the wasdelay case, we could also just allocate the stuff asked | ||
4636 | * for in this bmap call but that wouldn't be as good. | ||
4637 | */ | ||
4638 | if (bma->wasdel) { | ||
4639 | bma->length = (xfs_extlen_t)bma->got.br_blockcount; | ||
4640 | bma->offset = bma->got.br_startoff; | ||
4641 | if (bma->idx != NULLEXTNUM && bma->idx) { | ||
4642 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), | ||
4643 | &bma->prev); | ||
4644 | } | ||
4645 | } else { | ||
4646 | bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); | ||
4647 | if (!bma->eof) | ||
4648 | bma->length = XFS_FILBLKS_MIN(bma->length, | ||
4649 | bma->got.br_startoff - bma->offset); | ||
4650 | } | ||
4651 | |||
4652 | /* | ||
4653 | * Indicate if this is the first user data in the file, or just any | ||
4654 | * user data. | ||
4655 | */ | ||
4656 | if (!(flags & XFS_BMAPI_METADATA)) { | ||
4657 | bma->userdata = (bma->offset == 0) ? | ||
4658 | XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; | ||
4659 | } | ||
4660 | |||
4661 | bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1; | ||
4662 | |||
4663 | /* | ||
4664 | * Only want to do the alignment at the eof if it is userdata and | ||
4665 | * allocation length is larger than a stripe unit. | ||
4666 | */ | ||
4667 | if (mp->m_dalign && bma->length >= mp->m_dalign && | ||
4668 | !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { | ||
4669 | error = xfs_bmap_isaeof(bma, whichfork); | ||
4670 | if (error) | ||
4671 | return error; | ||
4672 | } | ||
4673 | |||
4674 | error = xfs_bmap_alloc(bma); | ||
4675 | if (error) | ||
4676 | return error; | ||
4677 | |||
4678 | if (bma->flist->xbf_low) | ||
4679 | bma->minleft = 0; | ||
4680 | if (bma->cur) | ||
4681 | bma->cur->bc_private.b.firstblock = *bma->firstblock; | ||
4682 | if (bma->blkno == NULLFSBLOCK) | ||
4683 | return 0; | ||
4684 | if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { | ||
4685 | bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); | ||
4686 | bma->cur->bc_private.b.firstblock = *bma->firstblock; | ||
4687 | bma->cur->bc_private.b.flist = bma->flist; | ||
4688 | } | ||
4689 | /* | ||
4690 | * Bump the number of extents we've allocated | ||
4691 | * in this call. | ||
4692 | */ | ||
4693 | bma->nallocs++; | ||
4694 | |||
4695 | if (bma->cur) | ||
4696 | bma->cur->bc_private.b.flags = | ||
4697 | bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; | ||
4698 | |||
4699 | bma->got.br_startoff = bma->offset; | ||
4700 | bma->got.br_startblock = bma->blkno; | ||
4701 | bma->got.br_blockcount = bma->length; | ||
4702 | bma->got.br_state = XFS_EXT_NORM; | ||
4703 | |||
4704 | /* | ||
4705 | * A wasdelay extent has been initialized, so shouldn't be flagged | ||
4706 | * as unwritten. | ||
4707 | */ | ||
4708 | if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) && | ||
4709 | xfs_sb_version_hasextflgbit(&mp->m_sb)) | ||
4710 | bma->got.br_state = XFS_EXT_UNWRITTEN; | ||
4711 | |||
4712 | if (bma->wasdel) | ||
4713 | error = xfs_bmap_add_extent_delay_real(bma); | ||
4714 | else | ||
4715 | error = xfs_bmap_add_extent_hole_real(bma, whichfork); | ||
4716 | |||
4717 | bma->logflags |= tmp_logflags; | ||
4718 | if (error) | ||
4719 | return error; | ||
4720 | |||
4721 | /* | ||
4722 | * Update our extent pointer, given that xfs_bmap_add_extent_delay_real | ||
4723 | * or xfs_bmap_add_extent_hole_real might have merged it into one of | ||
4724 | * the neighbouring ones. | ||
4725 | */ | ||
4726 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); | ||
4727 | |||
4728 | ASSERT(bma->got.br_startoff <= bma->offset); | ||
4729 | ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= | ||
4730 | bma->offset + bma->length); | ||
4731 | ASSERT(bma->got.br_state == XFS_EXT_NORM || | ||
4732 | bma->got.br_state == XFS_EXT_UNWRITTEN); | ||
4733 | return 0; | ||
4734 | } | ||
4735 | |||
4736 | STATIC int | ||
4737 | xfs_bmapi_convert_unwritten( | ||
4738 | struct xfs_bmalloca *bma, | ||
4739 | struct xfs_bmbt_irec *mval, | ||
4740 | xfs_filblks_t len, | ||
4741 | int flags) | ||
4742 | { | ||
4743 | int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | ||
4744 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
4745 | struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); | ||
4746 | int tmp_logflags = 0; | ||
4747 | int error; | ||
4748 | |||
4749 | /* check if we need to do unwritten->real conversion */ | ||
4750 | if (mval->br_state == XFS_EXT_UNWRITTEN && | ||
4751 | (flags & XFS_BMAPI_PREALLOC)) | ||
4752 | return 0; | ||
4753 | |||
4754 | /* check if we need to do real->unwritten conversion */ | ||
4755 | if (mval->br_state == XFS_EXT_NORM && | ||
4756 | (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) != | ||
4757 | (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) | ||
4758 | return 0; | ||
4759 | |||
4760 | /* | ||
4761 | * Modify (by adding) the state flag, if writing. | ||
4762 | */ | ||
4763 | ASSERT(mval->br_blockcount <= len); | ||
4764 | if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { | ||
4765 | bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, | ||
4766 | bma->ip, whichfork); | ||
4767 | bma->cur->bc_private.b.firstblock = *bma->firstblock; | ||
4768 | bma->cur->bc_private.b.flist = bma->flist; | ||
4769 | } | ||
4770 | mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) | ||
4771 | ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; | ||
4772 | |||
4773 | error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, | ||
4774 | &bma->cur, mval, bma->firstblock, bma->flist, | ||
4775 | &tmp_logflags); | ||
4776 | bma->logflags |= tmp_logflags; | ||
4777 | if (error) | ||
4778 | return error; | ||
4779 | |||
4780 | /* | ||
4781 | * Update our extent pointer, given that | ||
4782 | * xfs_bmap_add_extent_unwritten_real might have merged it into one | ||
4783 | * of the neighbouring ones. | ||
4784 | */ | ||
4785 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); | ||
4786 | |||
4787 | /* | ||
4788 | * We may have combined previously unwritten space with written space, | ||
4789 | * so generate another request. | ||
4790 | */ | ||
4791 | if (mval->br_blockcount < len) | ||
4792 | return EAGAIN; | ||
4793 | return 0; | ||
4794 | } | ||
4795 | |||
4796 | /* | ||
4797 | * Map file blocks to filesystem blocks, and allocate blocks or convert the | ||
4798 | * extent state if necessary. Details behaviour is controlled by the flags | ||
4799 | * parameter. Only allocates blocks from a single allocation group, to avoid | ||
4800 | * locking problems. | ||
4801 | * | ||
4303 | * The returned value in "firstblock" from the first call in a transaction | 4802 | * The returned value in "firstblock" from the first call in a transaction |
4304 | * must be remembered and presented to subsequent calls in "firstblock". | 4803 | * must be remembered and presented to subsequent calls in "firstblock". |
4305 | * An upper bound for the number of blocks to be allocated is supplied to | 4804 | * An upper bound for the number of blocks to be allocated is supplied to |
4306 | * the first call in "total"; if no allocation group has that many free | 4805 | * the first call in "total"; if no allocation group has that many free |
4307 | * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). | 4806 | * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). |
4308 | */ | 4807 | */ |
4309 | int /* error */ | 4808 | int |
4310 | xfs_bmapi( | 4809 | xfs_bmapi_write( |
4311 | xfs_trans_t *tp, /* transaction pointer */ | 4810 | struct xfs_trans *tp, /* transaction pointer */ |
4312 | xfs_inode_t *ip, /* incore inode */ | 4811 | struct xfs_inode *ip, /* incore inode */ |
4313 | xfs_fileoff_t bno, /* starting file offs. mapped */ | 4812 | xfs_fileoff_t bno, /* starting file offs. mapped */ |
4314 | xfs_filblks_t len, /* length to map in file */ | 4813 | xfs_filblks_t len, /* length to map in file */ |
4315 | int flags, /* XFS_BMAPI_... */ | 4814 | int flags, /* XFS_BMAPI_... */ |
4316 | xfs_fsblock_t *firstblock, /* first allocated block | 4815 | xfs_fsblock_t *firstblock, /* first allocated block |
4317 | controls a.g. for allocs */ | 4816 | controls a.g. for allocs */ |
4318 | xfs_extlen_t total, /* total blocks needed */ | 4817 | xfs_extlen_t total, /* total blocks needed */ |
4319 | xfs_bmbt_irec_t *mval, /* output: map values */ | 4818 | struct xfs_bmbt_irec *mval, /* output: map values */ |
4320 | int *nmap, /* i/o: mval size/count */ | 4819 | int *nmap, /* i/o: mval size/count */ |
4321 | xfs_bmap_free_t *flist) /* i/o: list extents to free */ | 4820 | struct xfs_bmap_free *flist) /* i/o: list extents to free */ |
4322 | { | 4821 | { |
4323 | xfs_fsblock_t abno; /* allocated block number */ | 4822 | struct xfs_mount *mp = ip->i_mount; |
4324 | xfs_extlen_t alen; /* allocated extent length */ | 4823 | struct xfs_ifork *ifp; |
4325 | xfs_fileoff_t aoff; /* allocated file offset */ | 4824 | struct xfs_bmalloca bma = { 0 }; /* args for xfs_bmap_alloc */ |
4326 | xfs_bmalloca_t bma = { 0 }; /* args for xfs_bmap_alloc */ | 4825 | xfs_fileoff_t end; /* end of mapped file region */ |
4327 | xfs_btree_cur_t *cur; /* bmap btree cursor */ | 4826 | int eof; /* after the end of extents */ |
4328 | xfs_fileoff_t end; /* end of mapped file region */ | 4827 | int error; /* error return */ |
4329 | int eof; /* we've hit the end of extents */ | 4828 | int n; /* current extent index */ |
4330 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | 4829 | xfs_fileoff_t obno; /* old block number (offset) */ |
4331 | int error; /* error return */ | 4830 | int whichfork; /* data or attr fork */ |
4332 | xfs_bmbt_irec_t got; /* current file extent record */ | 4831 | char inhole; /* current location is hole in file */ |
4333 | xfs_ifork_t *ifp; /* inode fork pointer */ | 4832 | char wasdelay; /* old extent was delayed */ |
4334 | xfs_extlen_t indlen; /* indirect blocks length */ | 4833 | |
4335 | xfs_extnum_t lastx; /* last useful extent number */ | ||
4336 | int logflags; /* flags for transaction logging */ | ||
4337 | xfs_extlen_t minleft; /* min blocks left after allocation */ | ||
4338 | xfs_extlen_t minlen; /* min allocation size */ | ||
4339 | xfs_mount_t *mp; /* xfs mount structure */ | ||
4340 | int n; /* current extent index */ | ||
4341 | int nallocs; /* number of extents alloc'd */ | ||
4342 | xfs_extnum_t nextents; /* number of extents in file */ | ||
4343 | xfs_fileoff_t obno; /* old block number (offset) */ | ||
4344 | xfs_bmbt_irec_t prev; /* previous file extent record */ | ||
4345 | int tmp_logflags; /* temp flags holder */ | ||
4346 | int whichfork; /* data or attr fork */ | ||
4347 | char inhole; /* current location is hole in file */ | ||
4348 | char wasdelay; /* old extent was delayed */ | ||
4349 | char wr; /* this is a write request */ | ||
4350 | char rt; /* this is a realtime file */ | ||
4351 | #ifdef DEBUG | 4834 | #ifdef DEBUG |
4352 | xfs_fileoff_t orig_bno; /* original block number value */ | 4835 | xfs_fileoff_t orig_bno; /* original block number value */ |
4353 | int orig_flags; /* original flags arg value */ | 4836 | int orig_flags; /* original flags arg value */ |
4354 | xfs_filblks_t orig_len; /* original value of len arg */ | 4837 | xfs_filblks_t orig_len; /* original value of len arg */ |
4355 | xfs_bmbt_irec_t *orig_mval; /* original value of mval */ | 4838 | struct xfs_bmbt_irec *orig_mval; /* original value of mval */ |
4356 | int orig_nmap; /* original value of *nmap */ | 4839 | int orig_nmap; /* original value of *nmap */ |
4357 | 4840 | ||
4358 | orig_bno = bno; | 4841 | orig_bno = bno; |
4359 | orig_len = len; | 4842 | orig_len = len; |
@@ -4361,488 +4844,133 @@ xfs_bmapi( | |||
4361 | orig_mval = mval; | 4844 | orig_mval = mval; |
4362 | orig_nmap = *nmap; | 4845 | orig_nmap = *nmap; |
4363 | #endif | 4846 | #endif |
4847 | |||
4364 | ASSERT(*nmap >= 1); | 4848 | ASSERT(*nmap >= 1); |
4365 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP || !(flags & XFS_BMAPI_WRITE)); | 4849 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); |
4850 | ASSERT(!(flags & XFS_BMAPI_IGSTATE)); | ||
4851 | ASSERT(tp != NULL); | ||
4852 | |||
4366 | whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | 4853 | whichfork = (flags & XFS_BMAPI_ATTRFORK) ? |
4367 | XFS_ATTR_FORK : XFS_DATA_FORK; | 4854 | XFS_ATTR_FORK : XFS_DATA_FORK; |
4368 | mp = ip->i_mount; | 4855 | |
4369 | if (unlikely(XFS_TEST_ERROR( | 4856 | if (unlikely(XFS_TEST_ERROR( |
4370 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | 4857 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && |
4371 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && | 4858 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && |
4372 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL), | 4859 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL), |
4373 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | 4860 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { |
4374 | XFS_ERROR_REPORT("xfs_bmapi", XFS_ERRLEVEL_LOW, mp); | 4861 | XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); |
4375 | return XFS_ERROR(EFSCORRUPTED); | 4862 | return XFS_ERROR(EFSCORRUPTED); |
4376 | } | 4863 | } |
4864 | |||
4377 | if (XFS_FORCED_SHUTDOWN(mp)) | 4865 | if (XFS_FORCED_SHUTDOWN(mp)) |
4378 | return XFS_ERROR(EIO); | 4866 | return XFS_ERROR(EIO); |
4379 | rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); | 4867 | |
4380 | ifp = XFS_IFORK_PTR(ip, whichfork); | 4868 | ifp = XFS_IFORK_PTR(ip, whichfork); |
4381 | ASSERT(ifp->if_ext_max == | 4869 | ASSERT(ifp->if_ext_max == |
4382 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | 4870 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); |
4383 | if ((wr = (flags & XFS_BMAPI_WRITE)) != 0) | 4871 | |
4384 | XFS_STATS_INC(xs_blk_mapw); | 4872 | XFS_STATS_INC(xs_blk_mapw); |
4385 | else | 4873 | |
4386 | XFS_STATS_INC(xs_blk_mapr); | ||
4387 | /* | ||
4388 | * IGSTATE flag is used to combine extents which | ||
4389 | * differ only due to the state of the extents. | ||
4390 | * This technique is used from xfs_getbmap() | ||
4391 | * when the caller does not wish to see the | ||
4392 | * separation (which is the default). | ||
4393 | * | ||
4394 | * This technique is also used when writing a | ||
4395 | * buffer which has been partially written, | ||
4396 | * (usually by being flushed during a chunkread), | ||
4397 | * to ensure one write takes place. This also | ||
4398 | * prevents a change in the xfs inode extents at | ||
4399 | * this time, intentionally. This change occurs | ||
4400 | * on completion of the write operation, in | ||
4401 | * xfs_strat_comp(), where the xfs_bmapi() call | ||
4402 | * is transactioned, and the extents combined. | ||
4403 | */ | ||
4404 | if ((flags & XFS_BMAPI_IGSTATE) && wr) /* if writing unwritten space */ | ||
4405 | wr = 0; /* no allocations are allowed */ | ||
4406 | ASSERT(wr || !(flags & XFS_BMAPI_DELAY)); | ||
4407 | logflags = 0; | ||
4408 | nallocs = 0; | ||
4409 | cur = NULL; | ||
4410 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { | 4874 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { |
4411 | ASSERT(wr && tp); | 4875 | error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, |
4412 | if ((error = xfs_bmap_local_to_extents(tp, ip, | 4876 | &bma.logflags, whichfork); |
4413 | firstblock, total, &logflags, whichfork))) | 4877 | if (error) |
4414 | goto error0; | 4878 | goto error0; |
4415 | } | 4879 | } |
4416 | if (wr && *firstblock == NULLFSBLOCK) { | 4880 | |
4881 | if (*firstblock == NULLFSBLOCK) { | ||
4417 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) | 4882 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) |
4418 | minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; | 4883 | bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; |
4419 | else | 4884 | else |
4420 | minleft = 1; | 4885 | bma.minleft = 1; |
4421 | } else | 4886 | } else { |
4422 | minleft = 0; | 4887 | bma.minleft = 0; |
4423 | if (!(ifp->if_flags & XFS_IFEXTENTS) && | 4888 | } |
4424 | (error = xfs_iread_extents(tp, ip, whichfork))) | 4889 | |
4425 | goto error0; | 4890 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { |
4426 | ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, | 4891 | error = xfs_iread_extents(tp, ip, whichfork); |
4427 | &prev); | 4892 | if (error) |
4428 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4893 | goto error0; |
4894 | } | ||
4895 | |||
4896 | xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got, | ||
4897 | &bma.prev); | ||
4429 | n = 0; | 4898 | n = 0; |
4430 | end = bno + len; | 4899 | end = bno + len; |
4431 | obno = bno; | 4900 | obno = bno; |
4432 | bma.ip = NULL; | 4901 | |
4902 | bma.tp = tp; | ||
4903 | bma.ip = ip; | ||
4904 | bma.total = total; | ||
4905 | bma.userdata = 0; | ||
4906 | bma.flist = flist; | ||
4907 | bma.firstblock = firstblock; | ||
4433 | 4908 | ||
4434 | while (bno < end && n < *nmap) { | 4909 | while (bno < end && n < *nmap) { |
4435 | /* | 4910 | inhole = eof || bma.got.br_startoff > bno; |
4436 | * Reading past eof, act as though there's a hole | 4911 | wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); |
4437 | * up to end. | 4912 | |
4438 | */ | ||
4439 | if (eof && !wr) | ||
4440 | got.br_startoff = end; | ||
4441 | inhole = eof || got.br_startoff > bno; | ||
4442 | wasdelay = wr && !inhole && !(flags & XFS_BMAPI_DELAY) && | ||
4443 | isnullstartblock(got.br_startblock); | ||
4444 | /* | 4913 | /* |
4445 | * First, deal with the hole before the allocated space | 4914 | * First, deal with the hole before the allocated space |
4446 | * that we found, if any. | 4915 | * that we found, if any. |
4447 | */ | 4916 | */ |
4448 | if (wr && (inhole || wasdelay)) { | 4917 | if (inhole || wasdelay) { |
4449 | /* | 4918 | bma.eof = eof; |
4450 | * For the wasdelay case, we could also just | 4919 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); |
4451 | * allocate the stuff asked for in this bmap call | 4920 | bma.wasdel = wasdelay; |
4452 | * but that wouldn't be as good. | 4921 | bma.length = len; |
4453 | */ | 4922 | bma.offset = bno; |
4454 | if (wasdelay) { | 4923 | |
4455 | alen = (xfs_extlen_t)got.br_blockcount; | 4924 | error = xfs_bmapi_allocate(&bma, flags); |
4456 | aoff = got.br_startoff; | ||
4457 | if (lastx != NULLEXTNUM && lastx) { | ||
4458 | ep = xfs_iext_get_ext(ifp, lastx - 1); | ||
4459 | xfs_bmbt_get_all(ep, &prev); | ||
4460 | } | ||
4461 | } else { | ||
4462 | alen = (xfs_extlen_t) | ||
4463 | XFS_FILBLKS_MIN(len, MAXEXTLEN); | ||
4464 | if (!eof) | ||
4465 | alen = (xfs_extlen_t) | ||
4466 | XFS_FILBLKS_MIN(alen, | ||
4467 | got.br_startoff - bno); | ||
4468 | aoff = bno; | ||
4469 | } | ||
4470 | minlen = (flags & XFS_BMAPI_CONTIG) ? alen : 1; | ||
4471 | if (flags & XFS_BMAPI_DELAY) { | ||
4472 | xfs_extlen_t extsz; | ||
4473 | |||
4474 | /* Figure out the extent size, adjust alen */ | ||
4475 | extsz = xfs_get_extsz_hint(ip); | ||
4476 | if (extsz) { | ||
4477 | /* | ||
4478 | * make sure we don't exceed a single | ||
4479 | * extent length when we align the | ||
4480 | * extent by reducing length we are | ||
4481 | * going to allocate by the maximum | ||
4482 | * amount extent size aligment may | ||
4483 | * require. | ||
4484 | */ | ||
4485 | alen = XFS_FILBLKS_MIN(len, | ||
4486 | MAXEXTLEN - (2 * extsz - 1)); | ||
4487 | error = xfs_bmap_extsize_align(mp, | ||
4488 | &got, &prev, extsz, | ||
4489 | rt, eof, | ||
4490 | flags&XFS_BMAPI_DELAY, | ||
4491 | flags&XFS_BMAPI_CONVERT, | ||
4492 | &aoff, &alen); | ||
4493 | ASSERT(!error); | ||
4494 | } | ||
4495 | |||
4496 | if (rt) | ||
4497 | extsz = alen / mp->m_sb.sb_rextsize; | ||
4498 | |||
4499 | /* | ||
4500 | * Make a transaction-less quota reservation for | ||
4501 | * delayed allocation blocks. This number gets | ||
4502 | * adjusted later. We return if we haven't | ||
4503 | * allocated blocks already inside this loop. | ||
4504 | */ | ||
4505 | error = xfs_trans_reserve_quota_nblks( | ||
4506 | NULL, ip, (long)alen, 0, | ||
4507 | rt ? XFS_QMOPT_RES_RTBLKS : | ||
4508 | XFS_QMOPT_RES_REGBLKS); | ||
4509 | if (error) { | ||
4510 | if (n == 0) { | ||
4511 | *nmap = 0; | ||
4512 | ASSERT(cur == NULL); | ||
4513 | return error; | ||
4514 | } | ||
4515 | break; | ||
4516 | } | ||
4517 | |||
4518 | /* | ||
4519 | * Split changing sb for alen and indlen since | ||
4520 | * they could be coming from different places. | ||
4521 | */ | ||
4522 | indlen = (xfs_extlen_t) | ||
4523 | xfs_bmap_worst_indlen(ip, alen); | ||
4524 | ASSERT(indlen > 0); | ||
4525 | |||
4526 | if (rt) { | ||
4527 | error = xfs_mod_incore_sb(mp, | ||
4528 | XFS_SBS_FREXTENTS, | ||
4529 | -((int64_t)extsz), 0); | ||
4530 | } else { | ||
4531 | error = xfs_icsb_modify_counters(mp, | ||
4532 | XFS_SBS_FDBLOCKS, | ||
4533 | -((int64_t)alen), 0); | ||
4534 | } | ||
4535 | if (!error) { | ||
4536 | error = xfs_icsb_modify_counters(mp, | ||
4537 | XFS_SBS_FDBLOCKS, | ||
4538 | -((int64_t)indlen), 0); | ||
4539 | if (error && rt) | ||
4540 | xfs_mod_incore_sb(mp, | ||
4541 | XFS_SBS_FREXTENTS, | ||
4542 | (int64_t)extsz, 0); | ||
4543 | else if (error) | ||
4544 | xfs_icsb_modify_counters(mp, | ||
4545 | XFS_SBS_FDBLOCKS, | ||
4546 | (int64_t)alen, 0); | ||
4547 | } | ||
4548 | |||
4549 | if (error) { | ||
4550 | if (XFS_IS_QUOTA_ON(mp)) | ||
4551 | /* unreserve the blocks now */ | ||
4552 | (void) | ||
4553 | xfs_trans_unreserve_quota_nblks( | ||
4554 | NULL, ip, | ||
4555 | (long)alen, 0, rt ? | ||
4556 | XFS_QMOPT_RES_RTBLKS : | ||
4557 | XFS_QMOPT_RES_REGBLKS); | ||
4558 | break; | ||
4559 | } | ||
4560 | |||
4561 | ip->i_delayed_blks += alen; | ||
4562 | abno = nullstartblock(indlen); | ||
4563 | } else { | ||
4564 | /* | ||
4565 | * If first time, allocate and fill in | ||
4566 | * once-only bma fields. | ||
4567 | */ | ||
4568 | if (bma.ip == NULL) { | ||
4569 | bma.tp = tp; | ||
4570 | bma.ip = ip; | ||
4571 | bma.prevp = &prev; | ||
4572 | bma.gotp = &got; | ||
4573 | bma.total = total; | ||
4574 | bma.userdata = 0; | ||
4575 | } | ||
4576 | /* Indicate if this is the first user data | ||
4577 | * in the file, or just any user data. | ||
4578 | */ | ||
4579 | if (!(flags & XFS_BMAPI_METADATA)) { | ||
4580 | bma.userdata = (aoff == 0) ? | ||
4581 | XFS_ALLOC_INITIAL_USER_DATA : | ||
4582 | XFS_ALLOC_USERDATA; | ||
4583 | } | ||
4584 | /* | ||
4585 | * Fill in changeable bma fields. | ||
4586 | */ | ||
4587 | bma.eof = eof; | ||
4588 | bma.firstblock = *firstblock; | ||
4589 | bma.alen = alen; | ||
4590 | bma.off = aoff; | ||
4591 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); | ||
4592 | bma.wasdel = wasdelay; | ||
4593 | bma.minlen = minlen; | ||
4594 | bma.low = flist->xbf_low; | ||
4595 | bma.minleft = minleft; | ||
4596 | /* | ||
4597 | * Only want to do the alignment at the | ||
4598 | * eof if it is userdata and allocation length | ||
4599 | * is larger than a stripe unit. | ||
4600 | */ | ||
4601 | if (mp->m_dalign && alen >= mp->m_dalign && | ||
4602 | (!(flags & XFS_BMAPI_METADATA)) && | ||
4603 | (whichfork == XFS_DATA_FORK)) { | ||
4604 | if ((error = xfs_bmap_isaeof(ip, aoff, | ||
4605 | whichfork, &bma.aeof))) | ||
4606 | goto error0; | ||
4607 | } else | ||
4608 | bma.aeof = 0; | ||
4609 | /* | ||
4610 | * Call allocator. | ||
4611 | */ | ||
4612 | if ((error = xfs_bmap_alloc(&bma))) | ||
4613 | goto error0; | ||
4614 | /* | ||
4615 | * Copy out result fields. | ||
4616 | */ | ||
4617 | abno = bma.rval; | ||
4618 | if ((flist->xbf_low = bma.low)) | ||
4619 | minleft = 0; | ||
4620 | alen = bma.alen; | ||
4621 | aoff = bma.off; | ||
4622 | ASSERT(*firstblock == NULLFSBLOCK || | ||
4623 | XFS_FSB_TO_AGNO(mp, *firstblock) == | ||
4624 | XFS_FSB_TO_AGNO(mp, bma.firstblock) || | ||
4625 | (flist->xbf_low && | ||
4626 | XFS_FSB_TO_AGNO(mp, *firstblock) < | ||
4627 | XFS_FSB_TO_AGNO(mp, bma.firstblock))); | ||
4628 | *firstblock = bma.firstblock; | ||
4629 | if (cur) | ||
4630 | cur->bc_private.b.firstblock = | ||
4631 | *firstblock; | ||
4632 | if (abno == NULLFSBLOCK) | ||
4633 | break; | ||
4634 | if ((ifp->if_flags & XFS_IFBROOT) && !cur) { | ||
4635 | cur = xfs_bmbt_init_cursor(mp, tp, | ||
4636 | ip, whichfork); | ||
4637 | cur->bc_private.b.firstblock = | ||
4638 | *firstblock; | ||
4639 | cur->bc_private.b.flist = flist; | ||
4640 | } | ||
4641 | /* | ||
4642 | * Bump the number of extents we've allocated | ||
4643 | * in this call. | ||
4644 | */ | ||
4645 | nallocs++; | ||
4646 | } | ||
4647 | if (cur) | ||
4648 | cur->bc_private.b.flags = | ||
4649 | wasdelay ? XFS_BTCUR_BPRV_WASDEL : 0; | ||
4650 | got.br_startoff = aoff; | ||
4651 | got.br_startblock = abno; | ||
4652 | got.br_blockcount = alen; | ||
4653 | got.br_state = XFS_EXT_NORM; /* assume normal */ | ||
4654 | /* | ||
4655 | * Determine state of extent, and the filesystem. | ||
4656 | * A wasdelay extent has been initialized, so | ||
4657 | * shouldn't be flagged as unwritten. | ||
4658 | */ | ||
4659 | if (wr && xfs_sb_version_hasextflgbit(&mp->m_sb)) { | ||
4660 | if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) | ||
4661 | got.br_state = XFS_EXT_UNWRITTEN; | ||
4662 | } | ||
4663 | error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &got, | ||
4664 | firstblock, flist, &tmp_logflags, | ||
4665 | whichfork); | ||
4666 | logflags |= tmp_logflags; | ||
4667 | if (error) | 4925 | if (error) |
4668 | goto error0; | 4926 | goto error0; |
4669 | ep = xfs_iext_get_ext(ifp, lastx); | 4927 | if (bma.blkno == NULLFSBLOCK) |
4670 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4928 | break; |
4671 | xfs_bmbt_get_all(ep, &got); | ||
4672 | ASSERT(got.br_startoff <= aoff); | ||
4673 | ASSERT(got.br_startoff + got.br_blockcount >= | ||
4674 | aoff + alen); | ||
4675 | #ifdef DEBUG | ||
4676 | if (flags & XFS_BMAPI_DELAY) { | ||
4677 | ASSERT(isnullstartblock(got.br_startblock)); | ||
4678 | ASSERT(startblockval(got.br_startblock) > 0); | ||
4679 | } | ||
4680 | ASSERT(got.br_state == XFS_EXT_NORM || | ||
4681 | got.br_state == XFS_EXT_UNWRITTEN); | ||
4682 | #endif | ||
4683 | /* | ||
4684 | * Fall down into the found allocated space case. | ||
4685 | */ | ||
4686 | } else if (inhole) { | ||
4687 | /* | ||
4688 | * Reading in a hole. | ||
4689 | */ | ||
4690 | mval->br_startoff = bno; | ||
4691 | mval->br_startblock = HOLESTARTBLOCK; | ||
4692 | mval->br_blockcount = | ||
4693 | XFS_FILBLKS_MIN(len, got.br_startoff - bno); | ||
4694 | mval->br_state = XFS_EXT_NORM; | ||
4695 | bno += mval->br_blockcount; | ||
4696 | len -= mval->br_blockcount; | ||
4697 | mval++; | ||
4698 | n++; | ||
4699 | continue; | ||
4700 | } | ||
4701 | /* | ||
4702 | * Then deal with the allocated space we found. | ||
4703 | */ | ||
4704 | ASSERT(ep != NULL); | ||
4705 | if (!(flags & XFS_BMAPI_ENTIRE) && | ||
4706 | (got.br_startoff + got.br_blockcount > obno)) { | ||
4707 | if (obno > bno) | ||
4708 | bno = obno; | ||
4709 | ASSERT((bno >= obno) || (n == 0)); | ||
4710 | ASSERT(bno < end); | ||
4711 | mval->br_startoff = bno; | ||
4712 | if (isnullstartblock(got.br_startblock)) { | ||
4713 | ASSERT(!wr || (flags & XFS_BMAPI_DELAY)); | ||
4714 | mval->br_startblock = DELAYSTARTBLOCK; | ||
4715 | } else | ||
4716 | mval->br_startblock = | ||
4717 | got.br_startblock + | ||
4718 | (bno - got.br_startoff); | ||
4719 | /* | ||
4720 | * Return the minimum of what we got and what we | ||
4721 | * asked for for the length. We can use the len | ||
4722 | * variable here because it is modified below | ||
4723 | * and we could have been there before coming | ||
4724 | * here if the first part of the allocation | ||
4725 | * didn't overlap what was asked for. | ||
4726 | */ | ||
4727 | mval->br_blockcount = | ||
4728 | XFS_FILBLKS_MIN(end - bno, got.br_blockcount - | ||
4729 | (bno - got.br_startoff)); | ||
4730 | mval->br_state = got.br_state; | ||
4731 | ASSERT(mval->br_blockcount <= len); | ||
4732 | } else { | ||
4733 | *mval = got; | ||
4734 | if (isnullstartblock(mval->br_startblock)) { | ||
4735 | ASSERT(!wr || (flags & XFS_BMAPI_DELAY)); | ||
4736 | mval->br_startblock = DELAYSTARTBLOCK; | ||
4737 | } | ||
4738 | } | 4929 | } |
4739 | 4930 | ||
4740 | /* | 4931 | /* Deal with the allocated space we found. */ |
4741 | * Check if writing previously allocated but | 4932 | xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno, |
4742 | * unwritten extents. | 4933 | end, n, flags); |
4743 | */ | 4934 | |
4744 | if (wr && | 4935 | /* Execute unwritten extent conversion if necessary */ |
4745 | ((mval->br_state == XFS_EXT_UNWRITTEN && | 4936 | error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); |
4746 | ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) || | 4937 | if (error == EAGAIN) |
4747 | (mval->br_state == XFS_EXT_NORM && | 4938 | continue; |
4748 | ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) == | 4939 | if (error) |
4749 | (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) { | 4940 | goto error0; |
4750 | /* | 4941 | |
4751 | * Modify (by adding) the state flag, if writing. | 4942 | /* update the extent map to return */ |
4752 | */ | 4943 | xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); |
4753 | ASSERT(mval->br_blockcount <= len); | ||
4754 | if ((ifp->if_flags & XFS_IFBROOT) && !cur) { | ||
4755 | cur = xfs_bmbt_init_cursor(mp, | ||
4756 | tp, ip, whichfork); | ||
4757 | cur->bc_private.b.firstblock = | ||
4758 | *firstblock; | ||
4759 | cur->bc_private.b.flist = flist; | ||
4760 | } | ||
4761 | mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) | ||
4762 | ? XFS_EXT_NORM | ||
4763 | : XFS_EXT_UNWRITTEN; | ||
4764 | error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, mval, | ||
4765 | firstblock, flist, &tmp_logflags, | ||
4766 | whichfork); | ||
4767 | logflags |= tmp_logflags; | ||
4768 | if (error) | ||
4769 | goto error0; | ||
4770 | ep = xfs_iext_get_ext(ifp, lastx); | ||
4771 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
4772 | xfs_bmbt_get_all(ep, &got); | ||
4773 | /* | ||
4774 | * We may have combined previously unwritten | ||
4775 | * space with written space, so generate | ||
4776 | * another request. | ||
4777 | */ | ||
4778 | if (mval->br_blockcount < len) | ||
4779 | continue; | ||
4780 | } | ||
4781 | 4944 | ||
4782 | ASSERT((flags & XFS_BMAPI_ENTIRE) || | ||
4783 | ((mval->br_startoff + mval->br_blockcount) <= end)); | ||
4784 | ASSERT((flags & XFS_BMAPI_ENTIRE) || | ||
4785 | (mval->br_blockcount <= len) || | ||
4786 | (mval->br_startoff < obno)); | ||
4787 | bno = mval->br_startoff + mval->br_blockcount; | ||
4788 | len = end - bno; | ||
4789 | if (n > 0 && mval->br_startoff == mval[-1].br_startoff) { | ||
4790 | ASSERT(mval->br_startblock == mval[-1].br_startblock); | ||
4791 | ASSERT(mval->br_blockcount > mval[-1].br_blockcount); | ||
4792 | ASSERT(mval->br_state == mval[-1].br_state); | ||
4793 | mval[-1].br_blockcount = mval->br_blockcount; | ||
4794 | mval[-1].br_state = mval->br_state; | ||
4795 | } else if (n > 0 && mval->br_startblock != DELAYSTARTBLOCK && | ||
4796 | mval[-1].br_startblock != DELAYSTARTBLOCK && | ||
4797 | mval[-1].br_startblock != HOLESTARTBLOCK && | ||
4798 | mval->br_startblock == | ||
4799 | mval[-1].br_startblock + mval[-1].br_blockcount && | ||
4800 | ((flags & XFS_BMAPI_IGSTATE) || | ||
4801 | mval[-1].br_state == mval->br_state)) { | ||
4802 | ASSERT(mval->br_startoff == | ||
4803 | mval[-1].br_startoff + mval[-1].br_blockcount); | ||
4804 | mval[-1].br_blockcount += mval->br_blockcount; | ||
4805 | } else if (n > 0 && | ||
4806 | mval->br_startblock == DELAYSTARTBLOCK && | ||
4807 | mval[-1].br_startblock == DELAYSTARTBLOCK && | ||
4808 | mval->br_startoff == | ||
4809 | mval[-1].br_startoff + mval[-1].br_blockcount) { | ||
4810 | mval[-1].br_blockcount += mval->br_blockcount; | ||
4811 | mval[-1].br_state = mval->br_state; | ||
4812 | } else if (!((n == 0) && | ||
4813 | ((mval->br_startoff + mval->br_blockcount) <= | ||
4814 | obno))) { | ||
4815 | mval++; | ||
4816 | n++; | ||
4817 | } | ||
4818 | /* | 4945 | /* |
4819 | * If we're done, stop now. Stop when we've allocated | 4946 | * If we're done, stop now. Stop when we've allocated |
4820 | * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise | 4947 | * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise |
4821 | * the transaction may get too big. | 4948 | * the transaction may get too big. |
4822 | */ | 4949 | */ |
4823 | if (bno >= end || n >= *nmap || nallocs >= *nmap) | 4950 | if (bno >= end || n >= *nmap || bma.nallocs >= *nmap) |
4824 | break; | 4951 | break; |
4825 | /* | 4952 | |
4826 | * Else go on to the next record. | 4953 | /* Else go on to the next record. */ |
4827 | */ | 4954 | bma.prev = bma.got; |
4828 | prev = got; | 4955 | if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { |
4829 | if (++lastx < nextents) { | 4956 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), |
4830 | ep = xfs_iext_get_ext(ifp, lastx); | 4957 | &bma.got); |
4831 | xfs_bmbt_get_all(ep, &got); | 4958 | } else |
4832 | } else { | ||
4833 | eof = 1; | 4959 | eof = 1; |
4834 | } | ||
4835 | } | 4960 | } |
4836 | *nmap = n; | 4961 | *nmap = n; |
4962 | |||
4837 | /* | 4963 | /* |
4838 | * Transform from btree to extents, give it cur. | 4964 | * Transform from btree to extents, give it cur. |
4839 | */ | 4965 | */ |
4840 | if (tp && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && | 4966 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && |
4841 | XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { | 4967 | XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { |
4842 | ASSERT(wr && cur); | 4968 | int tmp_logflags = 0; |
4843 | error = xfs_bmap_btree_to_extents(tp, ip, cur, | 4969 | |
4970 | ASSERT(bma.cur); | ||
4971 | error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, | ||
4844 | &tmp_logflags, whichfork); | 4972 | &tmp_logflags, whichfork); |
4845 | logflags |= tmp_logflags; | 4973 | bma.logflags |= tmp_logflags; |
4846 | if (error) | 4974 | if (error) |
4847 | goto error0; | 4975 | goto error0; |
4848 | } | 4976 | } |
@@ -4856,34 +4984,33 @@ error0: | |||
4856 | * Log everything. Do this after conversion, there's no point in | 4984 | * Log everything. Do this after conversion, there's no point in |
4857 | * logging the extent records if we've converted to btree format. | 4985 | * logging the extent records if we've converted to btree format. |
4858 | */ | 4986 | */ |
4859 | if ((logflags & xfs_ilog_fext(whichfork)) && | 4987 | if ((bma.logflags & xfs_ilog_fext(whichfork)) && |
4860 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) | 4988 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) |
4861 | logflags &= ~xfs_ilog_fext(whichfork); | 4989 | bma.logflags &= ~xfs_ilog_fext(whichfork); |
4862 | else if ((logflags & xfs_ilog_fbroot(whichfork)) && | 4990 | else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) && |
4863 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) | 4991 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) |
4864 | logflags &= ~xfs_ilog_fbroot(whichfork); | 4992 | bma.logflags &= ~xfs_ilog_fbroot(whichfork); |
4865 | /* | 4993 | /* |
4866 | * Log whatever the flags say, even if error. Otherwise we might miss | 4994 | * Log whatever the flags say, even if error. Otherwise we might miss |
4867 | * detecting a case where the data is changed, there's an error, | 4995 | * detecting a case where the data is changed, there's an error, |
4868 | * and it's not logged so we don't shutdown when we should. | 4996 | * and it's not logged so we don't shutdown when we should. |
4869 | */ | 4997 | */ |
4870 | if (logflags) { | 4998 | if (bma.logflags) |
4871 | ASSERT(tp && wr); | 4999 | xfs_trans_log_inode(tp, ip, bma.logflags); |
4872 | xfs_trans_log_inode(tp, ip, logflags); | 5000 | |
4873 | } | 5001 | if (bma.cur) { |
4874 | if (cur) { | ||
4875 | if (!error) { | 5002 | if (!error) { |
4876 | ASSERT(*firstblock == NULLFSBLOCK || | 5003 | ASSERT(*firstblock == NULLFSBLOCK || |
4877 | XFS_FSB_TO_AGNO(mp, *firstblock) == | 5004 | XFS_FSB_TO_AGNO(mp, *firstblock) == |
4878 | XFS_FSB_TO_AGNO(mp, | 5005 | XFS_FSB_TO_AGNO(mp, |
4879 | cur->bc_private.b.firstblock) || | 5006 | bma.cur->bc_private.b.firstblock) || |
4880 | (flist->xbf_low && | 5007 | (flist->xbf_low && |
4881 | XFS_FSB_TO_AGNO(mp, *firstblock) < | 5008 | XFS_FSB_TO_AGNO(mp, *firstblock) < |
4882 | XFS_FSB_TO_AGNO(mp, | 5009 | XFS_FSB_TO_AGNO(mp, |
4883 | cur->bc_private.b.firstblock))); | 5010 | bma.cur->bc_private.b.firstblock))); |
4884 | *firstblock = cur->bc_private.b.firstblock; | 5011 | *firstblock = bma.cur->bc_private.b.firstblock; |
4885 | } | 5012 | } |
4886 | xfs_btree_del_cursor(cur, | 5013 | xfs_btree_del_cursor(bma.cur, |
4887 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | 5014 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); |
4888 | } | 5015 | } |
4889 | if (!error) | 5016 | if (!error) |
@@ -4893,58 +5020,6 @@ error0: | |||
4893 | } | 5020 | } |
4894 | 5021 | ||
4895 | /* | 5022 | /* |
4896 | * Map file blocks to filesystem blocks, simple version. | ||
4897 | * One block (extent) only, read-only. | ||
4898 | * For flags, only the XFS_BMAPI_ATTRFORK flag is examined. | ||
4899 | * For the other flag values, the effect is as if XFS_BMAPI_METADATA | ||
4900 | * was set and all the others were clear. | ||
4901 | */ | ||
4902 | int /* error */ | ||
4903 | xfs_bmapi_single( | ||
4904 | xfs_trans_t *tp, /* transaction pointer */ | ||
4905 | xfs_inode_t *ip, /* incore inode */ | ||
4906 | int whichfork, /* data or attr fork */ | ||
4907 | xfs_fsblock_t *fsb, /* output: mapped block */ | ||
4908 | xfs_fileoff_t bno) /* starting file offs. mapped */ | ||
4909 | { | ||
4910 | int eof; /* we've hit the end of extents */ | ||
4911 | int error; /* error return */ | ||
4912 | xfs_bmbt_irec_t got; /* current file extent record */ | ||
4913 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
4914 | xfs_extnum_t lastx; /* last useful extent number */ | ||
4915 | xfs_bmbt_irec_t prev; /* previous file extent record */ | ||
4916 | |||
4917 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
4918 | if (unlikely( | ||
4919 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && | ||
4920 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)) { | ||
4921 | XFS_ERROR_REPORT("xfs_bmapi_single", XFS_ERRLEVEL_LOW, | ||
4922 | ip->i_mount); | ||
4923 | return XFS_ERROR(EFSCORRUPTED); | ||
4924 | } | ||
4925 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
4926 | return XFS_ERROR(EIO); | ||
4927 | XFS_STATS_INC(xs_blk_mapr); | ||
4928 | if (!(ifp->if_flags & XFS_IFEXTENTS) && | ||
4929 | (error = xfs_iread_extents(tp, ip, whichfork))) | ||
4930 | return error; | ||
4931 | (void)xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, | ||
4932 | &prev); | ||
4933 | /* | ||
4934 | * Reading past eof, act as though there's a hole | ||
4935 | * up to end. | ||
4936 | */ | ||
4937 | if (eof || got.br_startoff > bno) { | ||
4938 | *fsb = NULLFSBLOCK; | ||
4939 | return 0; | ||
4940 | } | ||
4941 | ASSERT(!isnullstartblock(got.br_startblock)); | ||
4942 | ASSERT(bno < got.br_startoff + got.br_blockcount); | ||
4943 | *fsb = got.br_startblock + (bno - got.br_startoff); | ||
4944 | return 0; | ||
4945 | } | ||
4946 | |||
4947 | /* | ||
4948 | * Unmap (remove) blocks from a file. | 5023 | * Unmap (remove) blocks from a file. |
4949 | * If nexts is nonzero then the number of extents to remove is limited to | 5024 | * If nexts is nonzero then the number of extents to remove is limited to |
4950 | * that value. If not all extents in the block range can be removed then | 5025 | * that value. If not all extents in the block range can be removed then |
@@ -5115,9 +5190,9 @@ xfs_bunmapi( | |||
5115 | del.br_blockcount = mod; | 5190 | del.br_blockcount = mod; |
5116 | } | 5191 | } |
5117 | del.br_state = XFS_EXT_UNWRITTEN; | 5192 | del.br_state = XFS_EXT_UNWRITTEN; |
5118 | error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &del, | 5193 | error = xfs_bmap_add_extent_unwritten_real(tp, ip, |
5119 | firstblock, flist, &logflags, | 5194 | &lastx, &cur, &del, firstblock, flist, |
5120 | XFS_DATA_FORK); | 5195 | &logflags); |
5121 | if (error) | 5196 | if (error) |
5122 | goto error0; | 5197 | goto error0; |
5123 | goto nodelete; | 5198 | goto nodelete; |
@@ -5173,18 +5248,18 @@ xfs_bunmapi( | |||
5173 | } | 5248 | } |
5174 | prev.br_state = XFS_EXT_UNWRITTEN; | 5249 | prev.br_state = XFS_EXT_UNWRITTEN; |
5175 | lastx--; | 5250 | lastx--; |
5176 | error = xfs_bmap_add_extent(tp, ip, &lastx, | 5251 | error = xfs_bmap_add_extent_unwritten_real(tp, |
5177 | &cur, &prev, firstblock, flist, | 5252 | ip, &lastx, &cur, &prev, |
5178 | &logflags, XFS_DATA_FORK); | 5253 | firstblock, flist, &logflags); |
5179 | if (error) | 5254 | if (error) |
5180 | goto error0; | 5255 | goto error0; |
5181 | goto nodelete; | 5256 | goto nodelete; |
5182 | } else { | 5257 | } else { |
5183 | ASSERT(del.br_state == XFS_EXT_NORM); | 5258 | ASSERT(del.br_state == XFS_EXT_NORM); |
5184 | del.br_state = XFS_EXT_UNWRITTEN; | 5259 | del.br_state = XFS_EXT_UNWRITTEN; |
5185 | error = xfs_bmap_add_extent(tp, ip, &lastx, | 5260 | error = xfs_bmap_add_extent_unwritten_real(tp, |
5186 | &cur, &del, firstblock, flist, | 5261 | ip, &lastx, &cur, &del, |
5187 | &logflags, XFS_DATA_FORK); | 5262 | firstblock, flist, &logflags); |
5188 | if (error) | 5263 | if (error) |
5189 | goto error0; | 5264 | goto error0; |
5190 | goto nodelete; | 5265 | goto nodelete; |
@@ -5506,10 +5581,9 @@ xfs_getbmap( | |||
5506 | 5581 | ||
5507 | do { | 5582 | do { |
5508 | nmap = (nexleft > subnex) ? subnex : nexleft; | 5583 | nmap = (nexleft > subnex) ? subnex : nexleft; |
5509 | error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), | 5584 | error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), |
5510 | XFS_BB_TO_FSB(mp, bmv->bmv_length), | 5585 | XFS_BB_TO_FSB(mp, bmv->bmv_length), |
5511 | bmapi_flags, NULL, 0, map, &nmap, | 5586 | map, &nmap, bmapi_flags); |
5512 | NULL); | ||
5513 | if (error) | 5587 | if (error) |
5514 | goto out_free_map; | 5588 | goto out_free_map; |
5515 | ASSERT(nmap <= subnex); | 5589 | ASSERT(nmap <= subnex); |
@@ -5583,89 +5657,6 @@ xfs_getbmap( | |||
5583 | return error; | 5657 | return error; |
5584 | } | 5658 | } |
5585 | 5659 | ||
5586 | /* | ||
5587 | * Check the last inode extent to determine whether this allocation will result | ||
5588 | * in blocks being allocated at the end of the file. When we allocate new data | ||
5589 | * blocks at the end of the file which do not start at the previous data block, | ||
5590 | * we will try to align the new blocks at stripe unit boundaries. | ||
5591 | */ | ||
5592 | STATIC int /* error */ | ||
5593 | xfs_bmap_isaeof( | ||
5594 | xfs_inode_t *ip, /* incore inode pointer */ | ||
5595 | xfs_fileoff_t off, /* file offset in fsblocks */ | ||
5596 | int whichfork, /* data or attribute fork */ | ||
5597 | char *aeof) /* return value */ | ||
5598 | { | ||
5599 | int error; /* error return value */ | ||
5600 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
5601 | xfs_bmbt_rec_host_t *lastrec; /* extent record pointer */ | ||
5602 | xfs_extnum_t nextents; /* number of file extents */ | ||
5603 | xfs_bmbt_irec_t s; /* expanded extent record */ | ||
5604 | |||
5605 | ASSERT(whichfork == XFS_DATA_FORK); | ||
5606 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
5607 | if (!(ifp->if_flags & XFS_IFEXTENTS) && | ||
5608 | (error = xfs_iread_extents(NULL, ip, whichfork))) | ||
5609 | return error; | ||
5610 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
5611 | if (nextents == 0) { | ||
5612 | *aeof = 1; | ||
5613 | return 0; | ||
5614 | } | ||
5615 | /* | ||
5616 | * Go to the last extent | ||
5617 | */ | ||
5618 | lastrec = xfs_iext_get_ext(ifp, nextents - 1); | ||
5619 | xfs_bmbt_get_all(lastrec, &s); | ||
5620 | /* | ||
5621 | * Check we are allocating in the last extent (for delayed allocations) | ||
5622 | * or past the last extent for non-delayed allocations. | ||
5623 | */ | ||
5624 | *aeof = (off >= s.br_startoff && | ||
5625 | off < s.br_startoff + s.br_blockcount && | ||
5626 | isnullstartblock(s.br_startblock)) || | ||
5627 | off >= s.br_startoff + s.br_blockcount; | ||
5628 | return 0; | ||
5629 | } | ||
5630 | |||
5631 | /* | ||
5632 | * Check if the endoff is outside the last extent. If so the caller will grow | ||
5633 | * the allocation to a stripe unit boundary. | ||
5634 | */ | ||
5635 | int /* error */ | ||
5636 | xfs_bmap_eof( | ||
5637 | xfs_inode_t *ip, /* incore inode pointer */ | ||
5638 | xfs_fileoff_t endoff, /* file offset in fsblocks */ | ||
5639 | int whichfork, /* data or attribute fork */ | ||
5640 | int *eof) /* result value */ | ||
5641 | { | ||
5642 | xfs_fsblock_t blockcount; /* extent block count */ | ||
5643 | int error; /* error return value */ | ||
5644 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
5645 | xfs_bmbt_rec_host_t *lastrec; /* extent record pointer */ | ||
5646 | xfs_extnum_t nextents; /* number of file extents */ | ||
5647 | xfs_fileoff_t startoff; /* extent starting file offset */ | ||
5648 | |||
5649 | ASSERT(whichfork == XFS_DATA_FORK); | ||
5650 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
5651 | if (!(ifp->if_flags & XFS_IFEXTENTS) && | ||
5652 | (error = xfs_iread_extents(NULL, ip, whichfork))) | ||
5653 | return error; | ||
5654 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
5655 | if (nextents == 0) { | ||
5656 | *eof = 1; | ||
5657 | return 0; | ||
5658 | } | ||
5659 | /* | ||
5660 | * Go to the last extent | ||
5661 | */ | ||
5662 | lastrec = xfs_iext_get_ext(ifp, nextents - 1); | ||
5663 | startoff = xfs_bmbt_get_startoff(lastrec); | ||
5664 | blockcount = xfs_bmbt_get_blockcount(lastrec); | ||
5665 | *eof = endoff >= startoff + blockcount; | ||
5666 | return 0; | ||
5667 | } | ||
5668 | |||
5669 | #ifdef DEBUG | 5660 | #ifdef DEBUG |
5670 | STATIC struct xfs_buf * | 5661 | STATIC struct xfs_buf * |
5671 | xfs_bmap_get_bp( | 5662 | xfs_bmap_get_bp( |
@@ -6100,9 +6091,8 @@ xfs_bmap_punch_delalloc_range( | |||
6100 | * trying to remove a real extent (which requires a | 6091 | * trying to remove a real extent (which requires a |
6101 | * transaction) or a hole, which is probably a bad idea... | 6092 | * transaction) or a hole, which is probably a bad idea... |
6102 | */ | 6093 | */ |
6103 | error = xfs_bmapi(NULL, ip, start_fsb, 1, | 6094 | error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps, |
6104 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | 6095 | XFS_BMAPI_ENTIRE); |
6105 | &nimaps, NULL); | ||
6106 | 6096 | ||
6107 | if (error) { | 6097 | if (error) { |
6108 | /* something screwed, just bail */ | 6098 | /* something screwed, just bail */ |
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index c62234bde053..89ee672d378a 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -62,27 +62,23 @@ typedef struct xfs_bmap_free | |||
62 | #define XFS_BMAP_MAX_NMAP 4 | 62 | #define XFS_BMAP_MAX_NMAP 4 |
63 | 63 | ||
64 | /* | 64 | /* |
65 | * Flags for xfs_bmapi | 65 | * Flags for xfs_bmapi_* |
66 | */ | 66 | */ |
67 | #define XFS_BMAPI_WRITE 0x001 /* write operation: allocate space */ | 67 | #define XFS_BMAPI_ENTIRE 0x001 /* return entire extent, not trimmed */ |
68 | #define XFS_BMAPI_DELAY 0x002 /* delayed write operation */ | 68 | #define XFS_BMAPI_METADATA 0x002 /* mapping metadata not user data */ |
69 | #define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ | 69 | #define XFS_BMAPI_ATTRFORK 0x004 /* use attribute fork not data */ |
70 | #define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ | 70 | #define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */ |
71 | #define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ | 71 | #define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */ |
72 | #define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ | ||
73 | #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ | ||
74 | /* combine contig. space */ | 72 | /* combine contig. space */ |
75 | #define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */ | 73 | #define XFS_BMAPI_CONTIG 0x020 /* must allocate only one extent */ |
76 | /* | 74 | /* |
77 | * unwritten extent conversion - this needs write cache flushing and no additional | 75 | * unwritten extent conversion - this needs write cache flushing and no additional |
78 | * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts | 76 | * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts |
79 | * from written to unwritten, otherwise convert from unwritten to written. | 77 | * from written to unwritten, otherwise convert from unwritten to written. |
80 | */ | 78 | */ |
81 | #define XFS_BMAPI_CONVERT 0x200 | 79 | #define XFS_BMAPI_CONVERT 0x040 |
82 | 80 | ||
83 | #define XFS_BMAPI_FLAGS \ | 81 | #define XFS_BMAPI_FLAGS \ |
84 | { XFS_BMAPI_WRITE, "WRITE" }, \ | ||
85 | { XFS_BMAPI_DELAY, "DELAY" }, \ | ||
86 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ | 82 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ |
87 | { XFS_BMAPI_METADATA, "METADATA" }, \ | 83 | { XFS_BMAPI_METADATA, "METADATA" }, \ |
88 | { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ | 84 | { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ |
@@ -113,21 +109,28 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) | |||
113 | * Argument structure for xfs_bmap_alloc. | 109 | * Argument structure for xfs_bmap_alloc. |
114 | */ | 110 | */ |
115 | typedef struct xfs_bmalloca { | 111 | typedef struct xfs_bmalloca { |
116 | xfs_fsblock_t firstblock; /* i/o first block allocated */ | 112 | xfs_fsblock_t *firstblock; /* i/o first block allocated */ |
117 | xfs_fsblock_t rval; /* starting block of new extent */ | 113 | struct xfs_bmap_free *flist; /* bmap freelist */ |
118 | xfs_fileoff_t off; /* offset in file filling in */ | ||
119 | struct xfs_trans *tp; /* transaction pointer */ | 114 | struct xfs_trans *tp; /* transaction pointer */ |
120 | struct xfs_inode *ip; /* incore inode pointer */ | 115 | struct xfs_inode *ip; /* incore inode pointer */ |
121 | struct xfs_bmbt_irec *prevp; /* extent before the new one */ | 116 | struct xfs_bmbt_irec prev; /* extent before the new one */ |
122 | struct xfs_bmbt_irec *gotp; /* extent after, or delayed */ | 117 | struct xfs_bmbt_irec got; /* extent after, or delayed */ |
123 | xfs_extlen_t alen; /* i/o length asked/allocated */ | 118 | |
119 | xfs_fileoff_t offset; /* offset in file filling in */ | ||
120 | xfs_extlen_t length; /* i/o length asked/allocated */ | ||
121 | xfs_fsblock_t blkno; /* starting block of new extent */ | ||
122 | |||
123 | struct xfs_btree_cur *cur; /* btree cursor */ | ||
124 | xfs_extnum_t idx; /* current extent index */ | ||
125 | int nallocs;/* number of extents alloc'd */ | ||
126 | int logflags;/* flags for transaction logging */ | ||
127 | |||
124 | xfs_extlen_t total; /* total blocks needed for xaction */ | 128 | xfs_extlen_t total; /* total blocks needed for xaction */ |
125 | xfs_extlen_t minlen; /* minimum allocation size (blocks) */ | 129 | xfs_extlen_t minlen; /* minimum allocation size (blocks) */ |
126 | xfs_extlen_t minleft; /* amount must be left after alloc */ | 130 | xfs_extlen_t minleft; /* amount must be left after alloc */ |
127 | char eof; /* set if allocating past last extent */ | 131 | char eof; /* set if allocating past last extent */ |
128 | char wasdel; /* replacing a delayed allocation */ | 132 | char wasdel; /* replacing a delayed allocation */ |
129 | char userdata;/* set if is user data */ | 133 | char userdata;/* set if is user data */ |
130 | char low; /* low on space, using seq'l ags */ | ||
131 | char aeof; /* allocated space at eof */ | 134 | char aeof; /* allocated space at eof */ |
132 | char conv; /* overwriting unwritten extents */ | 135 | char conv; /* overwriting unwritten extents */ |
133 | } xfs_bmalloca_t; | 136 | } xfs_bmalloca_t; |
@@ -152,251 +155,62 @@ typedef struct xfs_bmalloca { | |||
152 | { BMAP_RIGHT_FILLING, "RF" }, \ | 155 | { BMAP_RIGHT_FILLING, "RF" }, \ |
153 | { BMAP_ATTRFORK, "ATTR" } | 156 | { BMAP_ATTRFORK, "ATTR" } |
154 | 157 | ||
155 | /* | ||
156 | * Add bmap trace insert entries for all the contents of the extent list. | ||
157 | * | ||
158 | * Quite excessive tracing. Only do this for debug builds. | ||
159 | */ | ||
160 | #if defined(__KERNEL) && defined(DEBUG) | 158 | #if defined(__KERNEL) && defined(DEBUG) |
161 | void | 159 | void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, |
162 | xfs_bmap_trace_exlist( | 160 | int whichfork, unsigned long caller_ip); |
163 | struct xfs_inode *ip, /* incore inode pointer */ | ||
164 | xfs_extnum_t cnt, /* count of entries in list */ | ||
165 | int whichfork, | ||
166 | unsigned long caller_ip); /* data or attr fork */ | ||
167 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ | 161 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ |
168 | xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_) | 162 | xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_) |
169 | #else | 163 | #else |
170 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) | 164 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) |
171 | #endif | 165 | #endif |
172 | 166 | ||
173 | /* | 167 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); |
174 | * Convert inode from non-attributed to attributed. | 168 | void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, |
175 | * Must not be in a transaction, ip must not be locked. | 169 | struct xfs_bmap_free *flist, struct xfs_mount *mp); |
176 | */ | 170 | void xfs_bmap_cancel(struct xfs_bmap_free *flist); |
177 | int /* error code */ | 171 | void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); |
178 | xfs_bmap_add_attrfork( | 172 | int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, |
179 | struct xfs_inode *ip, /* incore inode pointer */ | 173 | xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); |
180 | int size, /* space needed for new attribute */ | 174 | int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip, |
181 | int rsvd); /* flag for reserved block allocation */ | 175 | xfs_fileoff_t *last_block, int whichfork); |
182 | 176 | int xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip, | |
183 | /* | 177 | xfs_fileoff_t *unused, int whichfork); |
184 | * Add the extent to the list of extents to be free at transaction end. | 178 | int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork); |
185 | * The list is maintained sorted (by block number). | 179 | int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip, |
186 | */ | 180 | int whichfork); |
187 | void | 181 | int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, |
188 | xfs_bmap_add_free( | 182 | xfs_filblks_t len, struct xfs_bmbt_irec *mval, |
189 | xfs_fsblock_t bno, /* fs block number of extent */ | 183 | int *nmap, int flags); |
190 | xfs_filblks_t len, /* length of extent */ | 184 | int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno, |
191 | xfs_bmap_free_t *flist, /* list of extents */ | 185 | xfs_filblks_t len, struct xfs_bmbt_irec *mval, |
192 | struct xfs_mount *mp); /* mount point structure */ | 186 | int *nmap, int flags); |
193 | 187 | int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, | |
194 | /* | 188 | xfs_fileoff_t bno, xfs_filblks_t len, int flags, |
195 | * Routine to clean up the free list data structure when | 189 | xfs_fsblock_t *firstblock, xfs_extlen_t total, |
196 | * an error occurs during a transaction. | 190 | struct xfs_bmbt_irec *mval, int *nmap, |
197 | */ | 191 | struct xfs_bmap_free *flist); |
198 | void | 192 | int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, |
199 | xfs_bmap_cancel( | 193 | xfs_fileoff_t bno, xfs_filblks_t len, int flags, |
200 | xfs_bmap_free_t *flist); /* free list to clean up */ | 194 | xfs_extnum_t nexts, xfs_fsblock_t *firstblock, |
201 | 195 | struct xfs_bmap_free *flist, int *done); | |
202 | /* | 196 | int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, |
203 | * Compute and fill in the value of the maximum depth of a bmap btree | 197 | xfs_extnum_t num); |
204 | * in this filesystem. Done once, during mount. | 198 | uint xfs_default_attroffset(struct xfs_inode *ip); |
205 | */ | ||
206 | void | ||
207 | xfs_bmap_compute_maxlevels( | ||
208 | struct xfs_mount *mp, /* file system mount structure */ | ||
209 | int whichfork); /* data or attr fork */ | ||
210 | |||
211 | /* | ||
212 | * Returns the file-relative block number of the first unused block in the file. | ||
213 | * This is the lowest-address hole if the file has holes, else the first block | ||
214 | * past the end of file. | ||
215 | */ | ||
216 | int /* error */ | ||
217 | xfs_bmap_first_unused( | ||
218 | struct xfs_trans *tp, /* transaction pointer */ | ||
219 | struct xfs_inode *ip, /* incore inode */ | ||
220 | xfs_extlen_t len, /* size of hole to find */ | ||
221 | xfs_fileoff_t *unused, /* unused block num */ | ||
222 | int whichfork); /* data or attr fork */ | ||
223 | |||
224 | /* | ||
225 | * Returns the file-relative block number of the last block + 1 before | ||
226 | * last_block (input value) in the file. | ||
227 | * This is not based on i_size, it is based on the extent list. | ||
228 | * Returns 0 for local files, as they do not have an extent list. | ||
229 | */ | ||
230 | int /* error */ | ||
231 | xfs_bmap_last_before( | ||
232 | struct xfs_trans *tp, /* transaction pointer */ | ||
233 | struct xfs_inode *ip, /* incore inode */ | ||
234 | xfs_fileoff_t *last_block, /* last block */ | ||
235 | int whichfork); /* data or attr fork */ | ||
236 | |||
237 | /* | ||
238 | * Returns the file-relative block number of the first block past eof in | ||
239 | * the file. This is not based on i_size, it is based on the extent list. | ||
240 | * Returns 0 for local files, as they do not have an extent list. | ||
241 | */ | ||
242 | int /* error */ | ||
243 | xfs_bmap_last_offset( | ||
244 | struct xfs_trans *tp, /* transaction pointer */ | ||
245 | struct xfs_inode *ip, /* incore inode */ | ||
246 | xfs_fileoff_t *unused, /* last block num */ | ||
247 | int whichfork); /* data or attr fork */ | ||
248 | |||
249 | /* | ||
250 | * Returns whether the selected fork of the inode has exactly one | ||
251 | * block or not. For the data fork we check this matches di_size, | ||
252 | * implying the file's range is 0..bsize-1. | ||
253 | */ | ||
254 | int | ||
255 | xfs_bmap_one_block( | ||
256 | struct xfs_inode *ip, /* incore inode */ | ||
257 | int whichfork); /* data or attr fork */ | ||
258 | |||
259 | /* | ||
260 | * Read in the extents to iu_extents. | ||
261 | * All inode fields are set up by caller, we just traverse the btree | ||
262 | * and copy the records in. | ||
263 | */ | ||
264 | int /* error */ | ||
265 | xfs_bmap_read_extents( | ||
266 | struct xfs_trans *tp, /* transaction pointer */ | ||
267 | struct xfs_inode *ip, /* incore inode */ | ||
268 | int whichfork); /* data or attr fork */ | ||
269 | |||
270 | /* | ||
271 | * Map file blocks to filesystem blocks. | ||
272 | * File range is given by the bno/len pair. | ||
273 | * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set) | ||
274 | * into a hole or past eof. | ||
275 | * Only allocates blocks from a single allocation group, | ||
276 | * to avoid locking problems. | ||
277 | * The returned value in "firstblock" from the first call in a transaction | ||
278 | * must be remembered and presented to subsequent calls in "firstblock". | ||
279 | * An upper bound for the number of blocks to be allocated is supplied to | ||
280 | * the first call in "total"; if no allocation group has that many free | ||
281 | * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). | ||
282 | */ | ||
283 | int /* error */ | ||
284 | xfs_bmapi( | ||
285 | struct xfs_trans *tp, /* transaction pointer */ | ||
286 | struct xfs_inode *ip, /* incore inode */ | ||
287 | xfs_fileoff_t bno, /* starting file offs. mapped */ | ||
288 | xfs_filblks_t len, /* length to map in file */ | ||
289 | int flags, /* XFS_BMAPI_... */ | ||
290 | xfs_fsblock_t *firstblock, /* first allocated block | ||
291 | controls a.g. for allocs */ | ||
292 | xfs_extlen_t total, /* total blocks needed */ | ||
293 | struct xfs_bmbt_irec *mval, /* output: map values */ | ||
294 | int *nmap, /* i/o: mval size/count */ | ||
295 | xfs_bmap_free_t *flist); /* i/o: list extents to free */ | ||
296 | |||
297 | /* | ||
298 | * Map file blocks to filesystem blocks, simple version. | ||
299 | * One block only, read-only. | ||
300 | * For flags, only the XFS_BMAPI_ATTRFORK flag is examined. | ||
301 | * For the other flag values, the effect is as if XFS_BMAPI_METADATA | ||
302 | * was set and all the others were clear. | ||
303 | */ | ||
304 | int /* error */ | ||
305 | xfs_bmapi_single( | ||
306 | struct xfs_trans *tp, /* transaction pointer */ | ||
307 | struct xfs_inode *ip, /* incore inode */ | ||
308 | int whichfork, /* data or attr fork */ | ||
309 | xfs_fsblock_t *fsb, /* output: mapped block */ | ||
310 | xfs_fileoff_t bno); /* starting file offs. mapped */ | ||
311 | |||
312 | /* | ||
313 | * Unmap (remove) blocks from a file. | ||
314 | * If nexts is nonzero then the number of extents to remove is limited to | ||
315 | * that value. If not all extents in the block range can be removed then | ||
316 | * *done is set. | ||
317 | */ | ||
318 | int /* error */ | ||
319 | xfs_bunmapi( | ||
320 | struct xfs_trans *tp, /* transaction pointer */ | ||
321 | struct xfs_inode *ip, /* incore inode */ | ||
322 | xfs_fileoff_t bno, /* starting offset to unmap */ | ||
323 | xfs_filblks_t len, /* length to unmap in file */ | ||
324 | int flags, /* XFS_BMAPI_... */ | ||
325 | xfs_extnum_t nexts, /* number of extents max */ | ||
326 | xfs_fsblock_t *firstblock, /* first allocated block | ||
327 | controls a.g. for allocs */ | ||
328 | xfs_bmap_free_t *flist, /* i/o: list extents to free */ | ||
329 | int *done); /* set if not done yet */ | ||
330 | |||
331 | /* | ||
332 | * Check an extent list, which has just been read, for | ||
333 | * any bit in the extent flag field. | ||
334 | */ | ||
335 | int | ||
336 | xfs_check_nostate_extents( | ||
337 | struct xfs_ifork *ifp, | ||
338 | xfs_extnum_t idx, | ||
339 | xfs_extnum_t num); | ||
340 | |||
341 | uint | ||
342 | xfs_default_attroffset( | ||
343 | struct xfs_inode *ip); | ||
344 | 199 | ||
345 | #ifdef __KERNEL__ | 200 | #ifdef __KERNEL__ |
346 | |||
347 | /* | ||
348 | * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi | ||
349 | * caller. Frees all the extents that need freeing, which must be done | ||
350 | * last due to locking considerations. | ||
351 | * | ||
352 | * Return 1 if the given transaction was committed and a new one allocated, | ||
353 | * and 0 otherwise. | ||
354 | */ | ||
355 | int /* error */ | ||
356 | xfs_bmap_finish( | ||
357 | struct xfs_trans **tp, /* transaction pointer addr */ | ||
358 | xfs_bmap_free_t *flist, /* i/o: list extents to free */ | ||
359 | int *committed); /* xact committed or not */ | ||
360 | |||
361 | /* bmap to userspace formatter - copy to user & advance pointer */ | 201 | /* bmap to userspace formatter - copy to user & advance pointer */ |
362 | typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *); | 202 | typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *); |
363 | 203 | ||
364 | /* | 204 | int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, |
365 | * Get inode's extents as described in bmv, and format for output. | 205 | int *committed); |
366 | */ | 206 | int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv, |
367 | int /* error code */ | 207 | xfs_bmap_format_t formatter, void *arg); |
368 | xfs_getbmap( | 208 | int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, |
369 | xfs_inode_t *ip, | 209 | int whichfork, int *eof); |
370 | struct getbmapx *bmv, /* user bmap structure */ | 210 | int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, |
371 | xfs_bmap_format_t formatter, /* format to user */ | 211 | int whichfork, int *count); |
372 | void *arg); /* formatter arg */ | 212 | int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, |
373 | 213 | xfs_fileoff_t start_fsb, xfs_fileoff_t length); | |
374 | /* | ||
375 | * Check if the endoff is outside the last extent. If so the caller will grow | ||
376 | * the allocation to a stripe unit boundary | ||
377 | */ | ||
378 | int | ||
379 | xfs_bmap_eof( | ||
380 | struct xfs_inode *ip, | ||
381 | xfs_fileoff_t endoff, | ||
382 | int whichfork, | ||
383 | int *eof); | ||
384 | |||
385 | /* | ||
386 | * Count fsblocks of the given fork. | ||
387 | */ | ||
388 | int | ||
389 | xfs_bmap_count_blocks( | ||
390 | xfs_trans_t *tp, | ||
391 | struct xfs_inode *ip, | ||
392 | int whichfork, | ||
393 | int *count); | ||
394 | |||
395 | int | ||
396 | xfs_bmap_punch_delalloc_range( | ||
397 | struct xfs_inode *ip, | ||
398 | xfs_fileoff_t start_fsb, | ||
399 | xfs_fileoff_t length); | ||
400 | #endif /* __KERNEL__ */ | 214 | #endif /* __KERNEL__ */ |
401 | 215 | ||
402 | #endif /* __XFS_BMAP_H__ */ | 216 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index cabf4b5604aa..1f19f03af9d3 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -275,8 +275,7 @@ xfs_btree_dup_cursor( | |||
275 | return error; | 275 | return error; |
276 | } | 276 | } |
277 | new->bc_bufs[i] = bp; | 277 | new->bc_bufs[i] = bp; |
278 | ASSERT(bp); | 278 | ASSERT(!xfs_buf_geterror(bp)); |
279 | ASSERT(!XFS_BUF_GETERROR(bp)); | ||
280 | } else | 279 | } else |
281 | new->bc_bufs[i] = NULL; | 280 | new->bc_bufs[i] = NULL; |
282 | } | 281 | } |
@@ -467,8 +466,7 @@ xfs_btree_get_bufl( | |||
467 | ASSERT(fsbno != NULLFSBLOCK); | 466 | ASSERT(fsbno != NULLFSBLOCK); |
468 | d = XFS_FSB_TO_DADDR(mp, fsbno); | 467 | d = XFS_FSB_TO_DADDR(mp, fsbno); |
469 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); | 468 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); |
470 | ASSERT(bp); | 469 | ASSERT(!xfs_buf_geterror(bp)); |
471 | ASSERT(!XFS_BUF_GETERROR(bp)); | ||
472 | return bp; | 470 | return bp; |
473 | } | 471 | } |
474 | 472 | ||
@@ -491,8 +489,7 @@ xfs_btree_get_bufs( | |||
491 | ASSERT(agbno != NULLAGBLOCK); | 489 | ASSERT(agbno != NULLAGBLOCK); |
492 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); | 490 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); |
493 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); | 491 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); |
494 | ASSERT(bp); | 492 | ASSERT(!xfs_buf_geterror(bp)); |
495 | ASSERT(!XFS_BUF_GETERROR(bp)); | ||
496 | return bp; | 493 | return bp; |
497 | } | 494 | } |
498 | 495 | ||
@@ -632,9 +629,9 @@ xfs_btree_read_bufl( | |||
632 | mp->m_bsize, lock, &bp))) { | 629 | mp->m_bsize, lock, &bp))) { |
633 | return error; | 630 | return error; |
634 | } | 631 | } |
635 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); | 632 | ASSERT(!xfs_buf_geterror(bp)); |
636 | if (bp) | 633 | if (bp) |
637 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); | 634 | xfs_buf_set_ref(bp, refval); |
638 | *bpp = bp; | 635 | *bpp = bp; |
639 | return 0; | 636 | return 0; |
640 | } | 637 | } |
@@ -942,13 +939,13 @@ xfs_btree_set_refs( | |||
942 | switch (cur->bc_btnum) { | 939 | switch (cur->bc_btnum) { |
943 | case XFS_BTNUM_BNO: | 940 | case XFS_BTNUM_BNO: |
944 | case XFS_BTNUM_CNT: | 941 | case XFS_BTNUM_CNT: |
945 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF); | 942 | xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); |
946 | break; | 943 | break; |
947 | case XFS_BTNUM_INO: | 944 | case XFS_BTNUM_INO: |
948 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF); | 945 | xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); |
949 | break; | 946 | break; |
950 | case XFS_BTNUM_BMAP: | 947 | case XFS_BTNUM_BMAP: |
951 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF); | 948 | xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); |
952 | break; | 949 | break; |
953 | default: | 950 | default: |
954 | ASSERT(0); | 951 | ASSERT(0); |
@@ -973,8 +970,8 @@ xfs_btree_get_buf_block( | |||
973 | *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, | 970 | *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, |
974 | mp->m_bsize, flags); | 971 | mp->m_bsize, flags); |
975 | 972 | ||
976 | ASSERT(*bpp); | 973 | if (!*bpp) |
977 | ASSERT(!XFS_BUF_GETERROR(*bpp)); | 974 | return ENOMEM; |
978 | 975 | ||
979 | *block = XFS_BUF_TO_BLOCK(*bpp); | 976 | *block = XFS_BUF_TO_BLOCK(*bpp); |
980 | return 0; | 977 | return 0; |
@@ -1006,8 +1003,7 @@ xfs_btree_read_buf_block( | |||
1006 | if (error) | 1003 | if (error) |
1007 | return error; | 1004 | return error; |
1008 | 1005 | ||
1009 | ASSERT(*bpp != NULL); | 1006 | ASSERT(!xfs_buf_geterror(*bpp)); |
1010 | ASSERT(!XFS_BUF_GETERROR(*bpp)); | ||
1011 | 1007 | ||
1012 | xfs_btree_set_refs(cur, *bpp); | 1008 | xfs_btree_set_refs(cur, *bpp); |
1013 | *block = XFS_BUF_TO_BLOCK(*bpp); | 1009 | *block = XFS_BUF_TO_BLOCK(*bpp); |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 8d05a6a46ce3..5b240de104c0 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -262,7 +262,7 @@ typedef struct xfs_btree_cur | |||
262 | /* | 262 | /* |
263 | * Convert from buffer to btree block header. | 263 | * Convert from buffer to btree block header. |
264 | */ | 264 | */ |
265 | #define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp)) | 265 | #define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr)) |
266 | 266 | ||
267 | 267 | ||
268 | /* | 268 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/xfs_buf.c index d1fe74506c4c..cf0ac056815f 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -43,7 +43,6 @@ | |||
43 | 43 | ||
44 | static kmem_zone_t *xfs_buf_zone; | 44 | static kmem_zone_t *xfs_buf_zone; |
45 | STATIC int xfsbufd(void *); | 45 | STATIC int xfsbufd(void *); |
46 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | ||
47 | 46 | ||
48 | static struct workqueue_struct *xfslogd_workqueue; | 47 | static struct workqueue_struct *xfslogd_workqueue; |
49 | struct workqueue_struct *xfsdatad_workqueue; | 48 | struct workqueue_struct *xfsdatad_workqueue; |
@@ -66,10 +65,6 @@ struct workqueue_struct *xfsconvertd_workqueue; | |||
66 | #define xb_to_km(flags) \ | 65 | #define xb_to_km(flags) \ |
67 | (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) | 66 | (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) |
68 | 67 | ||
69 | #define xfs_buf_allocate(flags) \ | ||
70 | kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) | ||
71 | #define xfs_buf_deallocate(bp) \ | ||
72 | kmem_zone_free(xfs_buf_zone, (bp)); | ||
73 | 68 | ||
74 | static inline int | 69 | static inline int |
75 | xfs_buf_is_vmapped( | 70 | xfs_buf_is_vmapped( |
@@ -152,6 +147,7 @@ xfs_buf_stale( | |||
152 | struct xfs_buf *bp) | 147 | struct xfs_buf *bp) |
153 | { | 148 | { |
154 | bp->b_flags |= XBF_STALE; | 149 | bp->b_flags |= XBF_STALE; |
150 | xfs_buf_delwri_dequeue(bp); | ||
155 | atomic_set(&(bp)->b_lru_ref, 0); | 151 | atomic_set(&(bp)->b_lru_ref, 0); |
156 | if (!list_empty(&bp->b_lru)) { | 152 | if (!list_empty(&bp->b_lru)) { |
157 | struct xfs_buftarg *btp = bp->b_target; | 153 | struct xfs_buftarg *btp = bp->b_target; |
@@ -167,14 +163,19 @@ xfs_buf_stale( | |||
167 | ASSERT(atomic_read(&bp->b_hold) >= 1); | 163 | ASSERT(atomic_read(&bp->b_hold) >= 1); |
168 | } | 164 | } |
169 | 165 | ||
170 | STATIC void | 166 | struct xfs_buf * |
171 | _xfs_buf_initialize( | 167 | xfs_buf_alloc( |
172 | xfs_buf_t *bp, | 168 | struct xfs_buftarg *target, |
173 | xfs_buftarg_t *target, | ||
174 | xfs_off_t range_base, | 169 | xfs_off_t range_base, |
175 | size_t range_length, | 170 | size_t range_length, |
176 | xfs_buf_flags_t flags) | 171 | xfs_buf_flags_t flags) |
177 | { | 172 | { |
173 | struct xfs_buf *bp; | ||
174 | |||
175 | bp = kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)); | ||
176 | if (unlikely(!bp)) | ||
177 | return NULL; | ||
178 | |||
178 | /* | 179 | /* |
179 | * We don't want certain flags to appear in b_flags. | 180 | * We don't want certain flags to appear in b_flags. |
180 | */ | 181 | */ |
@@ -203,8 +204,9 @@ _xfs_buf_initialize( | |||
203 | init_waitqueue_head(&bp->b_waiters); | 204 | init_waitqueue_head(&bp->b_waiters); |
204 | 205 | ||
205 | XFS_STATS_INC(xb_create); | 206 | XFS_STATS_INC(xb_create); |
206 | |||
207 | trace_xfs_buf_init(bp, _RET_IP_); | 207 | trace_xfs_buf_init(bp, _RET_IP_); |
208 | |||
209 | return bp; | ||
208 | } | 210 | } |
209 | 211 | ||
210 | /* | 212 | /* |
@@ -277,7 +279,7 @@ xfs_buf_free( | |||
277 | } else if (bp->b_flags & _XBF_KMEM) | 279 | } else if (bp->b_flags & _XBF_KMEM) |
278 | kmem_free(bp->b_addr); | 280 | kmem_free(bp->b_addr); |
279 | _xfs_buf_free_pages(bp); | 281 | _xfs_buf_free_pages(bp); |
280 | xfs_buf_deallocate(bp); | 282 | kmem_zone_free(xfs_buf_zone, bp); |
281 | } | 283 | } |
282 | 284 | ||
283 | /* | 285 | /* |
@@ -416,10 +418,7 @@ _xfs_buf_map_pages( | |||
416 | /* | 418 | /* |
417 | * Look up, and creates if absent, a lockable buffer for | 419 | * Look up, and creates if absent, a lockable buffer for |
418 | * a given range of an inode. The buffer is returned | 420 | * a given range of an inode. The buffer is returned |
419 | * locked. If other overlapping buffers exist, they are | 421 | * locked. No I/O is implied by this call. |
420 | * released before the new buffer is created and locked, | ||
421 | * which may imply that this call will block until those buffers | ||
422 | * are unlocked. No I/O is implied by this call. | ||
423 | */ | 422 | */ |
424 | xfs_buf_t * | 423 | xfs_buf_t * |
425 | _xfs_buf_find( | 424 | _xfs_buf_find( |
@@ -481,8 +480,6 @@ _xfs_buf_find( | |||
481 | 480 | ||
482 | /* No match found */ | 481 | /* No match found */ |
483 | if (new_bp) { | 482 | if (new_bp) { |
484 | _xfs_buf_initialize(new_bp, btp, range_base, | ||
485 | range_length, flags); | ||
486 | rb_link_node(&new_bp->b_rbnode, parent, rbp); | 483 | rb_link_node(&new_bp->b_rbnode, parent, rbp); |
487 | rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); | 484 | rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); |
488 | /* the buffer keeps the perag reference until it is freed */ | 485 | /* the buffer keeps the perag reference until it is freed */ |
@@ -525,35 +522,51 @@ found: | |||
525 | } | 522 | } |
526 | 523 | ||
527 | /* | 524 | /* |
528 | * Assembles a buffer covering the specified range. | 525 | * Assembles a buffer covering the specified range. The code is optimised for |
529 | * Storage in memory for all portions of the buffer will be allocated, | 526 | * cache hits, as metadata intensive workloads will see 3 orders of magnitude |
530 | * although backing storage may not be. | 527 | * more hits than misses. |
531 | */ | 528 | */ |
532 | xfs_buf_t * | 529 | struct xfs_buf * |
533 | xfs_buf_get( | 530 | xfs_buf_get( |
534 | xfs_buftarg_t *target,/* target for buffer */ | 531 | xfs_buftarg_t *target,/* target for buffer */ |
535 | xfs_off_t ioff, /* starting offset of range */ | 532 | xfs_off_t ioff, /* starting offset of range */ |
536 | size_t isize, /* length of range */ | 533 | size_t isize, /* length of range */ |
537 | xfs_buf_flags_t flags) | 534 | xfs_buf_flags_t flags) |
538 | { | 535 | { |
539 | xfs_buf_t *bp, *new_bp; | 536 | struct xfs_buf *bp; |
537 | struct xfs_buf *new_bp; | ||
540 | int error = 0; | 538 | int error = 0; |
541 | 539 | ||
542 | new_bp = xfs_buf_allocate(flags); | 540 | bp = _xfs_buf_find(target, ioff, isize, flags, NULL); |
541 | if (likely(bp)) | ||
542 | goto found; | ||
543 | |||
544 | new_bp = xfs_buf_alloc(target, ioff << BBSHIFT, isize << BBSHIFT, | ||
545 | flags); | ||
543 | if (unlikely(!new_bp)) | 546 | if (unlikely(!new_bp)) |
544 | return NULL; | 547 | return NULL; |
545 | 548 | ||
546 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); | 549 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); |
550 | if (!bp) { | ||
551 | kmem_zone_free(xfs_buf_zone, new_bp); | ||
552 | return NULL; | ||
553 | } | ||
554 | |||
547 | if (bp == new_bp) { | 555 | if (bp == new_bp) { |
548 | error = xfs_buf_allocate_memory(bp, flags); | 556 | error = xfs_buf_allocate_memory(bp, flags); |
549 | if (error) | 557 | if (error) |
550 | goto no_buffer; | 558 | goto no_buffer; |
551 | } else { | 559 | } else |
552 | xfs_buf_deallocate(new_bp); | 560 | kmem_zone_free(xfs_buf_zone, new_bp); |
553 | if (unlikely(bp == NULL)) | ||
554 | return NULL; | ||
555 | } | ||
556 | 561 | ||
562 | /* | ||
563 | * Now we have a workable buffer, fill in the block number so | ||
564 | * that we can do IO on it. | ||
565 | */ | ||
566 | bp->b_bn = ioff; | ||
567 | bp->b_count_desired = bp->b_buffer_length; | ||
568 | |||
569 | found: | ||
557 | if (!(bp->b_flags & XBF_MAPPED)) { | 570 | if (!(bp->b_flags & XBF_MAPPED)) { |
558 | error = _xfs_buf_map_pages(bp, flags); | 571 | error = _xfs_buf_map_pages(bp, flags); |
559 | if (unlikely(error)) { | 572 | if (unlikely(error)) { |
@@ -564,18 +577,10 @@ xfs_buf_get( | |||
564 | } | 577 | } |
565 | 578 | ||
566 | XFS_STATS_INC(xb_get); | 579 | XFS_STATS_INC(xb_get); |
567 | |||
568 | /* | ||
569 | * Always fill in the block number now, the mapped cases can do | ||
570 | * their own overlay of this later. | ||
571 | */ | ||
572 | bp->b_bn = ioff; | ||
573 | bp->b_count_desired = bp->b_buffer_length; | ||
574 | |||
575 | trace_xfs_buf_get(bp, flags, _RET_IP_); | 580 | trace_xfs_buf_get(bp, flags, _RET_IP_); |
576 | return bp; | 581 | return bp; |
577 | 582 | ||
578 | no_buffer: | 583 | no_buffer: |
579 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) | 584 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) |
580 | xfs_buf_unlock(bp); | 585 | xfs_buf_unlock(bp); |
581 | xfs_buf_rele(bp); | 586 | xfs_buf_rele(bp); |
@@ -596,7 +601,7 @@ _xfs_buf_read( | |||
596 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); | 601 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); |
597 | 602 | ||
598 | status = xfs_buf_iorequest(bp); | 603 | status = xfs_buf_iorequest(bp); |
599 | if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC)) | 604 | if (status || bp->b_error || (flags & XBF_ASYNC)) |
600 | return status; | 605 | return status; |
601 | return xfs_buf_iowait(bp); | 606 | return xfs_buf_iowait(bp); |
602 | } | 607 | } |
@@ -679,7 +684,6 @@ xfs_buf_read_uncached( | |||
679 | /* set up the buffer for a read IO */ | 684 | /* set up the buffer for a read IO */ |
680 | XFS_BUF_SET_ADDR(bp, daddr); | 685 | XFS_BUF_SET_ADDR(bp, daddr); |
681 | XFS_BUF_READ(bp); | 686 | XFS_BUF_READ(bp); |
682 | XFS_BUF_BUSY(bp); | ||
683 | 687 | ||
684 | xfsbdstrat(mp, bp); | 688 | xfsbdstrat(mp, bp); |
685 | error = xfs_buf_iowait(bp); | 689 | error = xfs_buf_iowait(bp); |
@@ -690,19 +694,6 @@ xfs_buf_read_uncached( | |||
690 | return bp; | 694 | return bp; |
691 | } | 695 | } |
692 | 696 | ||
693 | xfs_buf_t * | ||
694 | xfs_buf_get_empty( | ||
695 | size_t len, | ||
696 | xfs_buftarg_t *target) | ||
697 | { | ||
698 | xfs_buf_t *bp; | ||
699 | |||
700 | bp = xfs_buf_allocate(0); | ||
701 | if (bp) | ||
702 | _xfs_buf_initialize(bp, target, 0, len, 0); | ||
703 | return bp; | ||
704 | } | ||
705 | |||
706 | /* | 697 | /* |
707 | * Return a buffer allocated as an empty buffer and associated to external | 698 | * Return a buffer allocated as an empty buffer and associated to external |
708 | * memory via xfs_buf_associate_memory() back to it's empty state. | 699 | * memory via xfs_buf_associate_memory() back to it's empty state. |
@@ -788,10 +779,9 @@ xfs_buf_get_uncached( | |||
788 | int error, i; | 779 | int error, i; |
789 | xfs_buf_t *bp; | 780 | xfs_buf_t *bp; |
790 | 781 | ||
791 | bp = xfs_buf_allocate(0); | 782 | bp = xfs_buf_alloc(target, 0, len, 0); |
792 | if (unlikely(bp == NULL)) | 783 | if (unlikely(bp == NULL)) |
793 | goto fail; | 784 | goto fail; |
794 | _xfs_buf_initialize(bp, target, 0, len, 0); | ||
795 | 785 | ||
796 | error = _xfs_buf_get_pages(bp, page_count, 0); | 786 | error = _xfs_buf_get_pages(bp, page_count, 0); |
797 | if (error) | 787 | if (error) |
@@ -819,7 +809,7 @@ xfs_buf_get_uncached( | |||
819 | __free_page(bp->b_pages[i]); | 809 | __free_page(bp->b_pages[i]); |
820 | _xfs_buf_free_pages(bp); | 810 | _xfs_buf_free_pages(bp); |
821 | fail_free_buf: | 811 | fail_free_buf: |
822 | xfs_buf_deallocate(bp); | 812 | kmem_zone_free(xfs_buf_zone, bp); |
823 | fail: | 813 | fail: |
824 | return NULL; | 814 | return NULL; |
825 | } | 815 | } |
@@ -938,12 +928,6 @@ void | |||
938 | xfs_buf_unlock( | 928 | xfs_buf_unlock( |
939 | struct xfs_buf *bp) | 929 | struct xfs_buf *bp) |
940 | { | 930 | { |
941 | if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { | ||
942 | atomic_inc(&bp->b_hold); | ||
943 | bp->b_flags |= XBF_ASYNC; | ||
944 | xfs_buf_delwri_queue(bp, 0); | ||
945 | } | ||
946 | |||
947 | XB_CLEAR_OWNER(bp); | 931 | XB_CLEAR_OWNER(bp); |
948 | up(&bp->b_sema); | 932 | up(&bp->b_sema); |
949 | 933 | ||
@@ -1020,9 +1004,19 @@ xfs_buf_ioerror( | |||
1020 | trace_xfs_buf_ioerror(bp, error, _RET_IP_); | 1004 | trace_xfs_buf_ioerror(bp, error, _RET_IP_); |
1021 | } | 1005 | } |
1022 | 1006 | ||
1007 | void | ||
1008 | xfs_buf_ioerror_alert( | ||
1009 | struct xfs_buf *bp, | ||
1010 | const char *func) | ||
1011 | { | ||
1012 | xfs_alert(bp->b_target->bt_mount, | ||
1013 | "metadata I/O error: block 0x%llx (\"%s\") error %d buf count %zd", | ||
1014 | (__uint64_t)XFS_BUF_ADDR(bp), func, | ||
1015 | bp->b_error, XFS_BUF_COUNT(bp)); | ||
1016 | } | ||
1017 | |||
1023 | int | 1018 | int |
1024 | xfs_bwrite( | 1019 | xfs_bwrite( |
1025 | struct xfs_mount *mp, | ||
1026 | struct xfs_buf *bp) | 1020 | struct xfs_buf *bp) |
1027 | { | 1021 | { |
1028 | int error; | 1022 | int error; |
@@ -1034,25 +1028,13 @@ xfs_bwrite( | |||
1034 | xfs_bdstrat_cb(bp); | 1028 | xfs_bdstrat_cb(bp); |
1035 | 1029 | ||
1036 | error = xfs_buf_iowait(bp); | 1030 | error = xfs_buf_iowait(bp); |
1037 | if (error) | 1031 | if (error) { |
1038 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 1032 | xfs_force_shutdown(bp->b_target->bt_mount, |
1039 | xfs_buf_relse(bp); | 1033 | SHUTDOWN_META_IO_ERROR); |
1034 | } | ||
1040 | return error; | 1035 | return error; |
1041 | } | 1036 | } |
1042 | 1037 | ||
1043 | void | ||
1044 | xfs_bdwrite( | ||
1045 | void *mp, | ||
1046 | struct xfs_buf *bp) | ||
1047 | { | ||
1048 | trace_xfs_buf_bdwrite(bp, _RET_IP_); | ||
1049 | |||
1050 | bp->b_flags &= ~XBF_READ; | ||
1051 | bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); | ||
1052 | |||
1053 | xfs_buf_delwri_queue(bp, 1); | ||
1054 | } | ||
1055 | |||
1056 | /* | 1038 | /* |
1057 | * Called when we want to stop a buffer from getting written or read. | 1039 | * Called when we want to stop a buffer from getting written or read. |
1058 | * We attach the EIO error, muck with its flags, and call xfs_buf_ioend | 1040 | * We attach the EIO error, muck with its flags, and call xfs_buf_ioend |
@@ -1069,15 +1051,14 @@ xfs_bioerror( | |||
1069 | /* | 1051 | /* |
1070 | * No need to wait until the buffer is unpinned, we aren't flushing it. | 1052 | * No need to wait until the buffer is unpinned, we aren't flushing it. |
1071 | */ | 1053 | */ |
1072 | XFS_BUF_ERROR(bp, EIO); | 1054 | xfs_buf_ioerror(bp, EIO); |
1073 | 1055 | ||
1074 | /* | 1056 | /* |
1075 | * We're calling xfs_buf_ioend, so delete XBF_DONE flag. | 1057 | * We're calling xfs_buf_ioend, so delete XBF_DONE flag. |
1076 | */ | 1058 | */ |
1077 | XFS_BUF_UNREAD(bp); | 1059 | XFS_BUF_UNREAD(bp); |
1078 | XFS_BUF_UNDELAYWRITE(bp); | ||
1079 | XFS_BUF_UNDONE(bp); | 1060 | XFS_BUF_UNDONE(bp); |
1080 | XFS_BUF_STALE(bp); | 1061 | xfs_buf_stale(bp); |
1081 | 1062 | ||
1082 | xfs_buf_ioend(bp, 0); | 1063 | xfs_buf_ioend(bp, 0); |
1083 | 1064 | ||
@@ -1094,7 +1075,7 @@ STATIC int | |||
1094 | xfs_bioerror_relse( | 1075 | xfs_bioerror_relse( |
1095 | struct xfs_buf *bp) | 1076 | struct xfs_buf *bp) |
1096 | { | 1077 | { |
1097 | int64_t fl = XFS_BUF_BFLAGS(bp); | 1078 | int64_t fl = bp->b_flags; |
1098 | /* | 1079 | /* |
1099 | * No need to wait until the buffer is unpinned. | 1080 | * No need to wait until the buffer is unpinned. |
1100 | * We aren't flushing it. | 1081 | * We aren't flushing it. |
@@ -1104,9 +1085,8 @@ xfs_bioerror_relse( | |||
1104 | * change that interface. | 1085 | * change that interface. |
1105 | */ | 1086 | */ |
1106 | XFS_BUF_UNREAD(bp); | 1087 | XFS_BUF_UNREAD(bp); |
1107 | XFS_BUF_UNDELAYWRITE(bp); | ||
1108 | XFS_BUF_DONE(bp); | 1088 | XFS_BUF_DONE(bp); |
1109 | XFS_BUF_STALE(bp); | 1089 | xfs_buf_stale(bp); |
1110 | bp->b_iodone = NULL; | 1090 | bp->b_iodone = NULL; |
1111 | if (!(fl & XBF_ASYNC)) { | 1091 | if (!(fl & XBF_ASYNC)) { |
1112 | /* | 1092 | /* |
@@ -1115,8 +1095,8 @@ xfs_bioerror_relse( | |||
1115 | * There's no reason to mark error for | 1095 | * There's no reason to mark error for |
1116 | * ASYNC buffers. | 1096 | * ASYNC buffers. |
1117 | */ | 1097 | */ |
1118 | XFS_BUF_ERROR(bp, EIO); | 1098 | xfs_buf_ioerror(bp, EIO); |
1119 | XFS_BUF_FINISH_IOWAIT(bp); | 1099 | complete(&bp->b_iowait); |
1120 | } else { | 1100 | } else { |
1121 | xfs_buf_relse(bp); | 1101 | xfs_buf_relse(bp); |
1122 | } | 1102 | } |
@@ -1276,15 +1256,10 @@ xfs_buf_iorequest( | |||
1276 | { | 1256 | { |
1277 | trace_xfs_buf_iorequest(bp, _RET_IP_); | 1257 | trace_xfs_buf_iorequest(bp, _RET_IP_); |
1278 | 1258 | ||
1279 | if (bp->b_flags & XBF_DELWRI) { | 1259 | ASSERT(!(bp->b_flags & XBF_DELWRI)); |
1280 | xfs_buf_delwri_queue(bp, 1); | ||
1281 | return 0; | ||
1282 | } | ||
1283 | 1260 | ||
1284 | if (bp->b_flags & XBF_WRITE) { | 1261 | if (bp->b_flags & XBF_WRITE) |
1285 | xfs_buf_wait_unpin(bp); | 1262 | xfs_buf_wait_unpin(bp); |
1286 | } | ||
1287 | |||
1288 | xfs_buf_hold(bp); | 1263 | xfs_buf_hold(bp); |
1289 | 1264 | ||
1290 | /* Set the count to 1 initially, this will stop an I/O | 1265 | /* Set the count to 1 initially, this will stop an I/O |
@@ -1324,7 +1299,7 @@ xfs_buf_offset( | |||
1324 | struct page *page; | 1299 | struct page *page; |
1325 | 1300 | ||
1326 | if (bp->b_flags & XBF_MAPPED) | 1301 | if (bp->b_flags & XBF_MAPPED) |
1327 | return XFS_BUF_PTR(bp) + offset; | 1302 | return bp->b_addr + offset; |
1328 | 1303 | ||
1329 | offset += bp->b_offset; | 1304 | offset += bp->b_offset; |
1330 | page = bp->b_pages[offset >> PAGE_SHIFT]; | 1305 | page = bp->b_pages[offset >> PAGE_SHIFT]; |
@@ -1482,9 +1457,13 @@ xfs_setsize_buftarg_flags( | |||
1482 | btp->bt_smask = sectorsize - 1; | 1457 | btp->bt_smask = sectorsize - 1; |
1483 | 1458 | ||
1484 | if (set_blocksize(btp->bt_bdev, sectorsize)) { | 1459 | if (set_blocksize(btp->bt_bdev, sectorsize)) { |
1460 | char name[BDEVNAME_SIZE]; | ||
1461 | |||
1462 | bdevname(btp->bt_bdev, name); | ||
1463 | |||
1485 | xfs_warn(btp->bt_mount, | 1464 | xfs_warn(btp->bt_mount, |
1486 | "Cannot set_blocksize to %u on device %s\n", | 1465 | "Cannot set_blocksize to %u on device %s\n", |
1487 | sectorsize, XFS_BUFTARG_NAME(btp)); | 1466 | sectorsize, name); |
1488 | return EINVAL; | 1467 | return EINVAL; |
1489 | } | 1468 | } |
1490 | 1469 | ||
@@ -1515,12 +1494,12 @@ xfs_setsize_buftarg( | |||
1515 | } | 1494 | } |
1516 | 1495 | ||
1517 | STATIC int | 1496 | STATIC int |
1518 | xfs_alloc_delwrite_queue( | 1497 | xfs_alloc_delwri_queue( |
1519 | xfs_buftarg_t *btp, | 1498 | xfs_buftarg_t *btp, |
1520 | const char *fsname) | 1499 | const char *fsname) |
1521 | { | 1500 | { |
1522 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | 1501 | INIT_LIST_HEAD(&btp->bt_delwri_queue); |
1523 | spin_lock_init(&btp->bt_delwrite_lock); | 1502 | spin_lock_init(&btp->bt_delwri_lock); |
1524 | btp->bt_flags = 0; | 1503 | btp->bt_flags = 0; |
1525 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | 1504 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); |
1526 | if (IS_ERR(btp->bt_task)) | 1505 | if (IS_ERR(btp->bt_task)) |
@@ -1550,7 +1529,7 @@ xfs_alloc_buftarg( | |||
1550 | spin_lock_init(&btp->bt_lru_lock); | 1529 | spin_lock_init(&btp->bt_lru_lock); |
1551 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1530 | if (xfs_setsize_buftarg_early(btp, bdev)) |
1552 | goto error; | 1531 | goto error; |
1553 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1532 | if (xfs_alloc_delwri_queue(btp, fsname)) |
1554 | goto error; | 1533 | goto error; |
1555 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | 1534 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; |
1556 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | 1535 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; |
@@ -1566,56 +1545,48 @@ error: | |||
1566 | /* | 1545 | /* |
1567 | * Delayed write buffer handling | 1546 | * Delayed write buffer handling |
1568 | */ | 1547 | */ |
1569 | STATIC void | 1548 | void |
1570 | xfs_buf_delwri_queue( | 1549 | xfs_buf_delwri_queue( |
1571 | xfs_buf_t *bp, | 1550 | xfs_buf_t *bp) |
1572 | int unlock) | ||
1573 | { | 1551 | { |
1574 | struct list_head *dwq = &bp->b_target->bt_delwrite_queue; | 1552 | struct xfs_buftarg *btp = bp->b_target; |
1575 | spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; | ||
1576 | 1553 | ||
1577 | trace_xfs_buf_delwri_queue(bp, _RET_IP_); | 1554 | trace_xfs_buf_delwri_queue(bp, _RET_IP_); |
1578 | 1555 | ||
1579 | ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); | 1556 | ASSERT(!(bp->b_flags & XBF_READ)); |
1580 | 1557 | ||
1581 | spin_lock(dwlk); | 1558 | spin_lock(&btp->bt_delwri_lock); |
1582 | /* If already in the queue, dequeue and place at tail */ | ||
1583 | if (!list_empty(&bp->b_list)) { | 1559 | if (!list_empty(&bp->b_list)) { |
1560 | /* if already in the queue, move it to the tail */ | ||
1584 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | 1561 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); |
1585 | if (unlock) | 1562 | list_move_tail(&bp->b_list, &btp->bt_delwri_queue); |
1586 | atomic_dec(&bp->b_hold); | 1563 | } else { |
1587 | list_del(&bp->b_list); | ||
1588 | } | ||
1589 | |||
1590 | if (list_empty(dwq)) { | ||
1591 | /* start xfsbufd as it is about to have something to do */ | 1564 | /* start xfsbufd as it is about to have something to do */ |
1592 | wake_up_process(bp->b_target->bt_task); | 1565 | if (list_empty(&btp->bt_delwri_queue)) |
1593 | } | 1566 | wake_up_process(bp->b_target->bt_task); |
1594 | 1567 | ||
1595 | bp->b_flags |= _XBF_DELWRI_Q; | 1568 | atomic_inc(&bp->b_hold); |
1596 | list_add_tail(&bp->b_list, dwq); | 1569 | bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC; |
1570 | list_add_tail(&bp->b_list, &btp->bt_delwri_queue); | ||
1571 | } | ||
1597 | bp->b_queuetime = jiffies; | 1572 | bp->b_queuetime = jiffies; |
1598 | spin_unlock(dwlk); | 1573 | spin_unlock(&btp->bt_delwri_lock); |
1599 | |||
1600 | if (unlock) | ||
1601 | xfs_buf_unlock(bp); | ||
1602 | } | 1574 | } |
1603 | 1575 | ||
1604 | void | 1576 | void |
1605 | xfs_buf_delwri_dequeue( | 1577 | xfs_buf_delwri_dequeue( |
1606 | xfs_buf_t *bp) | 1578 | xfs_buf_t *bp) |
1607 | { | 1579 | { |
1608 | spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; | ||
1609 | int dequeued = 0; | 1580 | int dequeued = 0; |
1610 | 1581 | ||
1611 | spin_lock(dwlk); | 1582 | spin_lock(&bp->b_target->bt_delwri_lock); |
1612 | if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { | 1583 | if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { |
1613 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | 1584 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); |
1614 | list_del_init(&bp->b_list); | 1585 | list_del_init(&bp->b_list); |
1615 | dequeued = 1; | 1586 | dequeued = 1; |
1616 | } | 1587 | } |
1617 | bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); | 1588 | bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); |
1618 | spin_unlock(dwlk); | 1589 | spin_unlock(&bp->b_target->bt_delwri_lock); |
1619 | 1590 | ||
1620 | if (dequeued) | 1591 | if (dequeued) |
1621 | xfs_buf_rele(bp); | 1592 | xfs_buf_rele(bp); |
@@ -1647,16 +1618,9 @@ xfs_buf_delwri_promote( | |||
1647 | if (bp->b_queuetime < jiffies - age) | 1618 | if (bp->b_queuetime < jiffies - age) |
1648 | return; | 1619 | return; |
1649 | bp->b_queuetime = jiffies - age; | 1620 | bp->b_queuetime = jiffies - age; |
1650 | spin_lock(&btp->bt_delwrite_lock); | 1621 | spin_lock(&btp->bt_delwri_lock); |
1651 | list_move(&bp->b_list, &btp->bt_delwrite_queue); | 1622 | list_move(&bp->b_list, &btp->bt_delwri_queue); |
1652 | spin_unlock(&btp->bt_delwrite_lock); | 1623 | spin_unlock(&btp->bt_delwri_lock); |
1653 | } | ||
1654 | |||
1655 | STATIC void | ||
1656 | xfs_buf_runall_queues( | ||
1657 | struct workqueue_struct *queue) | ||
1658 | { | ||
1659 | flush_workqueue(queue); | ||
1660 | } | 1624 | } |
1661 | 1625 | ||
1662 | /* | 1626 | /* |
@@ -1670,18 +1634,16 @@ xfs_buf_delwri_split( | |||
1670 | unsigned long age) | 1634 | unsigned long age) |
1671 | { | 1635 | { |
1672 | xfs_buf_t *bp, *n; | 1636 | xfs_buf_t *bp, *n; |
1673 | struct list_head *dwq = &target->bt_delwrite_queue; | ||
1674 | spinlock_t *dwlk = &target->bt_delwrite_lock; | ||
1675 | int skipped = 0; | 1637 | int skipped = 0; |
1676 | int force; | 1638 | int force; |
1677 | 1639 | ||
1678 | force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); | 1640 | force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); |
1679 | INIT_LIST_HEAD(list); | 1641 | INIT_LIST_HEAD(list); |
1680 | spin_lock(dwlk); | 1642 | spin_lock(&target->bt_delwri_lock); |
1681 | list_for_each_entry_safe(bp, n, dwq, b_list) { | 1643 | list_for_each_entry_safe(bp, n, &target->bt_delwri_queue, b_list) { |
1682 | ASSERT(bp->b_flags & XBF_DELWRI); | 1644 | ASSERT(bp->b_flags & XBF_DELWRI); |
1683 | 1645 | ||
1684 | if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) { | 1646 | if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { |
1685 | if (!force && | 1647 | if (!force && |
1686 | time_before(jiffies, bp->b_queuetime + age)) { | 1648 | time_before(jiffies, bp->b_queuetime + age)) { |
1687 | xfs_buf_unlock(bp); | 1649 | xfs_buf_unlock(bp); |
@@ -1695,10 +1657,9 @@ xfs_buf_delwri_split( | |||
1695 | } else | 1657 | } else |
1696 | skipped++; | 1658 | skipped++; |
1697 | } | 1659 | } |
1698 | spin_unlock(dwlk); | ||
1699 | 1660 | ||
1661 | spin_unlock(&target->bt_delwri_lock); | ||
1700 | return skipped; | 1662 | return skipped; |
1701 | |||
1702 | } | 1663 | } |
1703 | 1664 | ||
1704 | /* | 1665 | /* |
@@ -1748,7 +1709,7 @@ xfsbufd( | |||
1748 | } | 1709 | } |
1749 | 1710 | ||
1750 | /* sleep for a long time if there is nothing to do. */ | 1711 | /* sleep for a long time if there is nothing to do. */ |
1751 | if (list_empty(&target->bt_delwrite_queue)) | 1712 | if (list_empty(&target->bt_delwri_queue)) |
1752 | tout = MAX_SCHEDULE_TIMEOUT; | 1713 | tout = MAX_SCHEDULE_TIMEOUT; |
1753 | schedule_timeout_interruptible(tout); | 1714 | schedule_timeout_interruptible(tout); |
1754 | 1715 | ||
@@ -1784,9 +1745,7 @@ xfs_flush_buftarg( | |||
1784 | LIST_HEAD(wait_list); | 1745 | LIST_HEAD(wait_list); |
1785 | struct blk_plug plug; | 1746 | struct blk_plug plug; |
1786 | 1747 | ||
1787 | xfs_buf_runall_queues(xfsconvertd_workqueue); | 1748 | flush_workqueue(xfslogd_workqueue); |
1788 | xfs_buf_runall_queues(xfsdatad_workqueue); | ||
1789 | xfs_buf_runall_queues(xfslogd_workqueue); | ||
1790 | 1749 | ||
1791 | set_bit(XBT_FORCE_FLUSH, &target->bt_flags); | 1750 | set_bit(XBT_FORCE_FLUSH, &target->bt_flags); |
1792 | pincount = xfs_buf_delwri_split(target, &tmp_list, 0); | 1751 | pincount = xfs_buf_delwri_split(target, &tmp_list, 0); |
@@ -1867,11 +1826,3 @@ xfs_buf_terminate(void) | |||
1867 | destroy_workqueue(xfslogd_workqueue); | 1826 | destroy_workqueue(xfslogd_workqueue); |
1868 | kmem_zone_destroy(xfs_buf_zone); | 1827 | kmem_zone_destroy(xfs_buf_zone); |
1869 | } | 1828 | } |
1870 | |||
1871 | #ifdef CONFIG_KDB_MODULES | ||
1872 | struct list_head * | ||
1873 | xfs_get_buftarg_list(void) | ||
1874 | { | ||
1875 | return &xfs_buftarg_list; | ||
1876 | } | ||
1877 | #endif | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/xfs_buf.h index 6a83b46b4bcf..5bab046e859f 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -105,8 +105,8 @@ typedef struct xfs_buftarg { | |||
105 | 105 | ||
106 | /* per device delwri queue */ | 106 | /* per device delwri queue */ |
107 | struct task_struct *bt_task; | 107 | struct task_struct *bt_task; |
108 | struct list_head bt_delwrite_queue; | 108 | struct list_head bt_delwri_queue; |
109 | spinlock_t bt_delwrite_lock; | 109 | spinlock_t bt_delwri_lock; |
110 | unsigned long bt_flags; | 110 | unsigned long bt_flags; |
111 | 111 | ||
112 | /* LRU control structures */ | 112 | /* LRU control structures */ |
@@ -175,7 +175,8 @@ extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t, | |||
175 | extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, | 175 | extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, |
176 | xfs_buf_flags_t); | 176 | xfs_buf_flags_t); |
177 | 177 | ||
178 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); | 178 | struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *, xfs_off_t, size_t, |
179 | xfs_buf_flags_t); | ||
179 | extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); | 180 | extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); |
180 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); | 181 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); |
181 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); | 182 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); |
@@ -197,14 +198,14 @@ extern void xfs_buf_unlock(xfs_buf_t *); | |||
197 | ((bp)->b_sema.count <= 0) | 198 | ((bp)->b_sema.count <= 0) |
198 | 199 | ||
199 | /* Buffer Read and Write Routines */ | 200 | /* Buffer Read and Write Routines */ |
200 | extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); | 201 | extern int xfs_bwrite(struct xfs_buf *bp); |
201 | extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); | ||
202 | 202 | ||
203 | extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); | 203 | extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); |
204 | extern int xfs_bdstrat_cb(struct xfs_buf *); | 204 | extern int xfs_bdstrat_cb(struct xfs_buf *); |
205 | 205 | ||
206 | extern void xfs_buf_ioend(xfs_buf_t *, int); | 206 | extern void xfs_buf_ioend(xfs_buf_t *, int); |
207 | extern void xfs_buf_ioerror(xfs_buf_t *, int); | 207 | extern void xfs_buf_ioerror(xfs_buf_t *, int); |
208 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); | ||
208 | extern int xfs_buf_iorequest(xfs_buf_t *); | 209 | extern int xfs_buf_iorequest(xfs_buf_t *); |
209 | extern int xfs_buf_iowait(xfs_buf_t *); | 210 | extern int xfs_buf_iowait(xfs_buf_t *); |
210 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, | 211 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, |
@@ -221,53 +222,32 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp) | |||
221 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); | 222 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); |
222 | 223 | ||
223 | /* Delayed Write Buffer Routines */ | 224 | /* Delayed Write Buffer Routines */ |
224 | extern void xfs_buf_delwri_dequeue(xfs_buf_t *); | 225 | extern void xfs_buf_delwri_queue(struct xfs_buf *); |
225 | extern void xfs_buf_delwri_promote(xfs_buf_t *); | 226 | extern void xfs_buf_delwri_dequeue(struct xfs_buf *); |
227 | extern void xfs_buf_delwri_promote(struct xfs_buf *); | ||
226 | 228 | ||
227 | /* Buffer Daemon Setup Routines */ | 229 | /* Buffer Daemon Setup Routines */ |
228 | extern int xfs_buf_init(void); | 230 | extern int xfs_buf_init(void); |
229 | extern void xfs_buf_terminate(void); | 231 | extern void xfs_buf_terminate(void); |
230 | 232 | ||
231 | #define xfs_buf_target_name(target) \ | ||
232 | ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) | ||
233 | |||
234 | |||
235 | #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) | ||
236 | #define XFS_BUF_ZEROFLAGS(bp) \ | 233 | #define XFS_BUF_ZEROFLAGS(bp) \ |
237 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ | 234 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ |
238 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) | 235 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) |
239 | 236 | ||
240 | void xfs_buf_stale(struct xfs_buf *bp); | 237 | void xfs_buf_stale(struct xfs_buf *bp); |
241 | #define XFS_BUF_STALE(bp) xfs_buf_stale(bp); | ||
242 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 238 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
243 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) | 239 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) |
244 | #define XFS_BUF_SUPER_STALE(bp) do { \ | ||
245 | XFS_BUF_STALE(bp); \ | ||
246 | xfs_buf_delwri_dequeue(bp); \ | ||
247 | XFS_BUF_DONE(bp); \ | ||
248 | } while (0) | ||
249 | |||
250 | #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) | ||
251 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) | ||
252 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) | ||
253 | 240 | ||
254 | #define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no) | 241 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) |
255 | #define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp) | ||
256 | #define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0) | ||
257 | 242 | ||
258 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) | 243 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) |
259 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) | 244 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) |
260 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) | 245 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) |
261 | 246 | ||
262 | #define XFS_BUF_BUSY(bp) do { } while (0) | ||
263 | #define XFS_BUF_UNBUSY(bp) do { } while (0) | ||
264 | #define XFS_BUF_ISBUSY(bp) (1) | ||
265 | |||
266 | #define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) | 247 | #define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) |
267 | #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) | 248 | #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) |
268 | #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) | 249 | #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) |
269 | 250 | ||
270 | #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) | ||
271 | #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) | 251 | #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) |
272 | #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) | 252 | #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) |
273 | #define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) | 253 | #define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) |
@@ -276,10 +256,6 @@ void xfs_buf_stale(struct xfs_buf *bp); | |||
276 | #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) | 256 | #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) |
277 | #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) | 257 | #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) |
278 | 258 | ||
279 | #define XFS_BUF_SET_START(bp) do { } while (0) | ||
280 | |||
281 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) | ||
282 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) | ||
283 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) | 259 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) |
284 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) | 260 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) |
285 | #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) | 261 | #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) |
@@ -289,23 +265,15 @@ void xfs_buf_stale(struct xfs_buf *bp); | |||
289 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) | 265 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) |
290 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) | 266 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) |
291 | 267 | ||
292 | static inline void | 268 | static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) |
293 | xfs_buf_set_ref( | ||
294 | struct xfs_buf *bp, | ||
295 | int lru_ref) | ||
296 | { | 269 | { |
297 | atomic_set(&bp->b_lru_ref, lru_ref); | 270 | atomic_set(&bp->b_lru_ref, lru_ref); |
298 | } | 271 | } |
299 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) | ||
300 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) | ||
301 | |||
302 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) | ||
303 | 272 | ||
304 | #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); | 273 | static inline int xfs_buf_ispinned(struct xfs_buf *bp) |
305 | 274 | { | |
306 | #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) | 275 | return atomic_read(&bp->b_pin_count); |
307 | #define XFS_BUF_TARGET(bp) ((bp)->b_target) | 276 | } |
308 | #define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) | ||
309 | 277 | ||
310 | static inline void xfs_buf_relse(xfs_buf_t *bp) | 278 | static inline void xfs_buf_relse(xfs_buf_t *bp) |
311 | { | 279 | { |
@@ -323,14 +291,7 @@ extern void xfs_wait_buftarg(xfs_buftarg_t *); | |||
323 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | 291 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); |
324 | extern int xfs_flush_buftarg(xfs_buftarg_t *, int); | 292 | extern int xfs_flush_buftarg(xfs_buftarg_t *, int); |
325 | 293 | ||
326 | #ifdef CONFIG_KDB_MODULES | ||
327 | extern struct list_head *xfs_get_buftarg_list(void); | ||
328 | #endif | ||
329 | |||
330 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) | 294 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) |
331 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) | 295 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) |
332 | 296 | ||
333 | #define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) | ||
334 | #define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) | ||
335 | |||
336 | #endif /* __XFS_BUF_H__ */ | 297 | #endif /* __XFS_BUF_H__ */ |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 88492916c3dc..1a3513881bce 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -124,9 +124,9 @@ xfs_buf_item_log_check( | |||
124 | 124 | ||
125 | bp = bip->bli_buf; | 125 | bp = bip->bli_buf; |
126 | ASSERT(XFS_BUF_COUNT(bp) > 0); | 126 | ASSERT(XFS_BUF_COUNT(bp) > 0); |
127 | ASSERT(XFS_BUF_PTR(bp) != NULL); | 127 | ASSERT(bp->b_addr != NULL); |
128 | orig = bip->bli_orig; | 128 | orig = bip->bli_orig; |
129 | buffer = XFS_BUF_PTR(bp); | 129 | buffer = bp->b_addr; |
130 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { | 130 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { |
131 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { | 131 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { |
132 | xfs_emerg(bp->b_mount, | 132 | xfs_emerg(bp->b_mount, |
@@ -371,7 +371,6 @@ xfs_buf_item_pin( | |||
371 | { | 371 | { |
372 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 372 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
373 | 373 | ||
374 | ASSERT(XFS_BUF_ISBUSY(bip->bli_buf)); | ||
375 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 374 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
376 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 375 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
377 | (bip->bli_flags & XFS_BLI_STALE)); | 376 | (bip->bli_flags & XFS_BLI_STALE)); |
@@ -479,13 +478,13 @@ xfs_buf_item_trylock( | |||
479 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 478 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
480 | struct xfs_buf *bp = bip->bli_buf; | 479 | struct xfs_buf *bp = bip->bli_buf; |
481 | 480 | ||
482 | if (XFS_BUF_ISPINNED(bp)) | 481 | if (xfs_buf_ispinned(bp)) |
483 | return XFS_ITEM_PINNED; | 482 | return XFS_ITEM_PINNED; |
484 | if (!xfs_buf_trylock(bp)) | 483 | if (!xfs_buf_trylock(bp)) |
485 | return XFS_ITEM_LOCKED; | 484 | return XFS_ITEM_LOCKED; |
486 | 485 | ||
487 | /* take a reference to the buffer. */ | 486 | /* take a reference to the buffer. */ |
488 | XFS_BUF_HOLD(bp); | 487 | xfs_buf_hold(bp); |
489 | 488 | ||
490 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 489 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
491 | trace_xfs_buf_item_trylock(bip); | 490 | trace_xfs_buf_item_trylock(bip); |
@@ -630,7 +629,7 @@ xfs_buf_item_push( | |||
630 | * the xfsbufd to get this buffer written. We have to unlock the buffer | 629 | * the xfsbufd to get this buffer written. We have to unlock the buffer |
631 | * to allow the xfsbufd to write it, too. | 630 | * to allow the xfsbufd to write it, too. |
632 | */ | 631 | */ |
633 | STATIC void | 632 | STATIC bool |
634 | xfs_buf_item_pushbuf( | 633 | xfs_buf_item_pushbuf( |
635 | struct xfs_log_item *lip) | 634 | struct xfs_log_item *lip) |
636 | { | 635 | { |
@@ -644,6 +643,7 @@ xfs_buf_item_pushbuf( | |||
644 | 643 | ||
645 | xfs_buf_delwri_promote(bp); | 644 | xfs_buf_delwri_promote(bp); |
646 | xfs_buf_relse(bp); | 645 | xfs_buf_relse(bp); |
646 | return true; | ||
647 | } | 647 | } |
648 | 648 | ||
649 | STATIC void | 649 | STATIC void |
@@ -726,7 +726,7 @@ xfs_buf_item_init( | |||
726 | * to have logged. | 726 | * to have logged. |
727 | */ | 727 | */ |
728 | bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP); | 728 | bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP); |
729 | memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp)); | 729 | memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp)); |
730 | bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP); | 730 | bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP); |
731 | #endif | 731 | #endif |
732 | 732 | ||
@@ -895,7 +895,6 @@ xfs_buf_attach_iodone( | |||
895 | { | 895 | { |
896 | xfs_log_item_t *head_lip; | 896 | xfs_log_item_t *head_lip; |
897 | 897 | ||
898 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
899 | ASSERT(xfs_buf_islocked(bp)); | 898 | ASSERT(xfs_buf_islocked(bp)); |
900 | 899 | ||
901 | lip->li_cb = cb; | 900 | lip->li_cb = cb; |
@@ -960,7 +959,7 @@ xfs_buf_iodone_callbacks( | |||
960 | static ulong lasttime; | 959 | static ulong lasttime; |
961 | static xfs_buftarg_t *lasttarg; | 960 | static xfs_buftarg_t *lasttarg; |
962 | 961 | ||
963 | if (likely(!XFS_BUF_GETERROR(bp))) | 962 | if (likely(!xfs_buf_geterror(bp))) |
964 | goto do_callbacks; | 963 | goto do_callbacks; |
965 | 964 | ||
966 | /* | 965 | /* |
@@ -968,19 +967,18 @@ xfs_buf_iodone_callbacks( | |||
968 | * I/O errors, there's no point in giving this a retry. | 967 | * I/O errors, there's no point in giving this a retry. |
969 | */ | 968 | */ |
970 | if (XFS_FORCED_SHUTDOWN(mp)) { | 969 | if (XFS_FORCED_SHUTDOWN(mp)) { |
971 | XFS_BUF_SUPER_STALE(bp); | 970 | xfs_buf_stale(bp); |
971 | XFS_BUF_DONE(bp); | ||
972 | trace_xfs_buf_item_iodone(bp, _RET_IP_); | 972 | trace_xfs_buf_item_iodone(bp, _RET_IP_); |
973 | goto do_callbacks; | 973 | goto do_callbacks; |
974 | } | 974 | } |
975 | 975 | ||
976 | if (XFS_BUF_TARGET(bp) != lasttarg || | 976 | if (bp->b_target != lasttarg || |
977 | time_after(jiffies, (lasttime + 5*HZ))) { | 977 | time_after(jiffies, (lasttime + 5*HZ))) { |
978 | lasttime = jiffies; | 978 | lasttime = jiffies; |
979 | xfs_alert(mp, "Device %s: metadata write error block 0x%llx", | 979 | xfs_buf_ioerror_alert(bp, __func__); |
980 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | ||
981 | (__uint64_t)XFS_BUF_ADDR(bp)); | ||
982 | } | 980 | } |
983 | lasttarg = XFS_BUF_TARGET(bp); | 981 | lasttarg = bp->b_target; |
984 | 982 | ||
985 | /* | 983 | /* |
986 | * If the write was asynchronous then no one will be looking for the | 984 | * If the write was asynchronous then no one will be looking for the |
@@ -991,12 +989,11 @@ xfs_buf_iodone_callbacks( | |||
991 | * around. | 989 | * around. |
992 | */ | 990 | */ |
993 | if (XFS_BUF_ISASYNC(bp)) { | 991 | if (XFS_BUF_ISASYNC(bp)) { |
994 | XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */ | 992 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ |
995 | 993 | ||
996 | if (!XFS_BUF_ISSTALE(bp)) { | 994 | if (!XFS_BUF_ISSTALE(bp)) { |
997 | XFS_BUF_DELAYWRITE(bp); | 995 | xfs_buf_delwri_queue(bp); |
998 | XFS_BUF_DONE(bp); | 996 | XFS_BUF_DONE(bp); |
999 | XFS_BUF_SET_START(bp); | ||
1000 | } | 997 | } |
1001 | ASSERT(bp->b_iodone != NULL); | 998 | ASSERT(bp->b_iodone != NULL); |
1002 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | 999 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); |
@@ -1008,12 +1005,10 @@ xfs_buf_iodone_callbacks( | |||
1008 | * If the write of the buffer was synchronous, we want to make | 1005 | * If the write of the buffer was synchronous, we want to make |
1009 | * sure to return the error to the caller of xfs_bwrite(). | 1006 | * sure to return the error to the caller of xfs_bwrite(). |
1010 | */ | 1007 | */ |
1011 | XFS_BUF_STALE(bp); | 1008 | xfs_buf_stale(bp); |
1012 | XFS_BUF_DONE(bp); | 1009 | XFS_BUF_DONE(bp); |
1013 | XFS_BUF_UNDELAYWRITE(bp); | ||
1014 | 1010 | ||
1015 | trace_xfs_buf_error_relse(bp, _RET_IP_); | 1011 | trace_xfs_buf_error_relse(bp, _RET_IP_); |
1016 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | ||
1017 | 1012 | ||
1018 | do_callbacks: | 1013 | do_callbacks: |
1019 | xfs_buf_do_callbacks(bp); | 1014 | xfs_buf_do_callbacks(bp); |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 5bfcb8779f9f..77c74257c2a3 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -1578,9 +1578,8 @@ xfs_da_grow_inode_int( | |||
1578 | */ | 1578 | */ |
1579 | nmap = 1; | 1579 | nmap = 1; |
1580 | ASSERT(args->firstblock != NULL); | 1580 | ASSERT(args->firstblock != NULL); |
1581 | error = xfs_bmapi(tp, dp, *bno, count, | 1581 | error = xfs_bmapi_write(tp, dp, *bno, count, |
1582 | xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| | 1582 | xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, |
1583 | XFS_BMAPI_CONTIG, | ||
1584 | args->firstblock, args->total, &map, &nmap, | 1583 | args->firstblock, args->total, &map, &nmap, |
1585 | args->flist); | 1584 | args->flist); |
1586 | if (error) | 1585 | if (error) |
@@ -1602,9 +1601,8 @@ xfs_da_grow_inode_int( | |||
1602 | for (b = *bno, mapi = 0; b < *bno + count; ) { | 1601 | for (b = *bno, mapi = 0; b < *bno + count; ) { |
1603 | nmap = MIN(XFS_BMAP_MAX_NMAP, count); | 1602 | nmap = MIN(XFS_BMAP_MAX_NMAP, count); |
1604 | c = (int)(*bno + count - b); | 1603 | c = (int)(*bno + count - b); |
1605 | error = xfs_bmapi(tp, dp, b, c, | 1604 | error = xfs_bmapi_write(tp, dp, b, c, |
1606 | xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE| | 1605 | xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, |
1607 | XFS_BMAPI_METADATA, | ||
1608 | args->firstblock, args->total, | 1606 | args->firstblock, args->total, |
1609 | &mapp[mapi], &nmap, args->flist); | 1607 | &mapp[mapi], &nmap, args->flist); |
1610 | if (error) | 1608 | if (error) |
@@ -1975,33 +1973,16 @@ xfs_da_do_buf( | |||
1975 | /* | 1973 | /* |
1976 | * Optimize the one-block case. | 1974 | * Optimize the one-block case. |
1977 | */ | 1975 | */ |
1978 | if (nfsb == 1) { | 1976 | if (nfsb == 1) |
1979 | xfs_fsblock_t fsb; | ||
1980 | |||
1981 | if ((error = | ||
1982 | xfs_bmapi_single(trans, dp, whichfork, &fsb, | ||
1983 | (xfs_fileoff_t)bno))) { | ||
1984 | return error; | ||
1985 | } | ||
1986 | mapp = ↦ | 1977 | mapp = ↦ |
1987 | if (fsb == NULLFSBLOCK) { | 1978 | else |
1988 | nmap = 0; | ||
1989 | } else { | ||
1990 | map.br_startblock = fsb; | ||
1991 | map.br_startoff = (xfs_fileoff_t)bno; | ||
1992 | map.br_blockcount = 1; | ||
1993 | nmap = 1; | ||
1994 | } | ||
1995 | } else { | ||
1996 | mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP); | 1979 | mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP); |
1997 | nmap = nfsb; | 1980 | |
1998 | if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno, | 1981 | nmap = nfsb; |
1999 | nfsb, | 1982 | error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, mapp, |
2000 | XFS_BMAPI_METADATA | | 1983 | &nmap, xfs_bmapi_aflag(whichfork)); |
2001 | xfs_bmapi_aflag(whichfork), | 1984 | if (error) |
2002 | NULL, 0, mapp, &nmap, NULL))) | 1985 | goto exit0; |
2003 | goto exit0; | ||
2004 | } | ||
2005 | } else { | 1986 | } else { |
2006 | map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); | 1987 | map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); |
2007 | map.br_startoff = (xfs_fileoff_t)bno; | 1988 | map.br_startoff = (xfs_fileoff_t)bno; |
@@ -2050,7 +2031,7 @@ xfs_da_do_buf( | |||
2050 | case 0: | 2031 | case 0: |
2051 | bp = xfs_trans_get_buf(trans, mp->m_ddev_targp, | 2032 | bp = xfs_trans_get_buf(trans, mp->m_ddev_targp, |
2052 | mappedbno, nmapped, 0); | 2033 | mappedbno, nmapped, 0); |
2053 | error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO); | 2034 | error = bp ? bp->b_error : XFS_ERROR(EIO); |
2054 | break; | 2035 | break; |
2055 | case 1: | 2036 | case 1: |
2056 | case 2: | 2037 | case 2: |
@@ -2072,13 +2053,10 @@ xfs_da_do_buf( | |||
2072 | if (!bp) | 2053 | if (!bp) |
2073 | continue; | 2054 | continue; |
2074 | if (caller == 1) { | 2055 | if (caller == 1) { |
2075 | if (whichfork == XFS_ATTR_FORK) { | 2056 | if (whichfork == XFS_ATTR_FORK) |
2076 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE, | 2057 | xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); |
2077 | XFS_ATTR_BTREE_REF); | 2058 | else |
2078 | } else { | 2059 | xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); |
2079 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE, | ||
2080 | XFS_DIR_BTREE_REF); | ||
2081 | } | ||
2082 | } | 2060 | } |
2083 | if (bplist) { | 2061 | if (bplist) { |
2084 | bplist[nbplist++] = bp; | 2062 | bplist[nbplist++] = bp; |
@@ -2268,7 +2246,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) | |||
2268 | dabuf->nbuf = 1; | 2246 | dabuf->nbuf = 1; |
2269 | bp = bps[0]; | 2247 | bp = bps[0]; |
2270 | dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); | 2248 | dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); |
2271 | dabuf->data = XFS_BUF_PTR(bp); | 2249 | dabuf->data = bp->b_addr; |
2272 | dabuf->bps[0] = bp; | 2250 | dabuf->bps[0] = bp; |
2273 | } else { | 2251 | } else { |
2274 | dabuf->nbuf = nbuf; | 2252 | dabuf->nbuf = nbuf; |
@@ -2279,7 +2257,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) | |||
2279 | dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); | 2257 | dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); |
2280 | for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { | 2258 | for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { |
2281 | bp = bps[i]; | 2259 | bp = bps[i]; |
2282 | memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp), | 2260 | memcpy((char *)dabuf->data + off, bp->b_addr, |
2283 | XFS_BUF_COUNT(bp)); | 2261 | XFS_BUF_COUNT(bp)); |
2284 | } | 2262 | } |
2285 | } | 2263 | } |
@@ -2302,8 +2280,8 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf) | |||
2302 | for (i = off = 0; i < dabuf->nbuf; | 2280 | for (i = off = 0; i < dabuf->nbuf; |
2303 | i++, off += XFS_BUF_COUNT(bp)) { | 2281 | i++, off += XFS_BUF_COUNT(bp)) { |
2304 | bp = dabuf->bps[i]; | 2282 | bp = dabuf->bps[i]; |
2305 | memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off, | 2283 | memcpy(bp->b_addr, dabuf->data + off, |
2306 | XFS_BUF_COUNT(bp)); | 2284 | XFS_BUF_COUNT(bp)); |
2307 | } | 2285 | } |
2308 | } | 2286 | } |
2309 | } | 2287 | } |
@@ -2340,7 +2318,7 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) | |||
2340 | 2318 | ||
2341 | ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); | 2319 | ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); |
2342 | if (dabuf->nbuf == 1) { | 2320 | if (dabuf->nbuf == 1) { |
2343 | ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0])); | 2321 | ASSERT(dabuf->data == dabuf->bps[0]->b_addr); |
2344 | xfs_trans_log_buf(tp, dabuf->bps[0], first, last); | 2322 | xfs_trans_log_buf(tp, dabuf->bps[0], first, last); |
2345 | return; | 2323 | return; |
2346 | } | 2324 | } |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 9a84a85c03b1..654dc6f05bac 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -425,8 +425,8 @@ xfs_swap_extents( | |||
425 | } | 425 | } |
426 | 426 | ||
427 | 427 | ||
428 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 428 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
429 | xfs_trans_ijoin_ref(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 429 | xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
430 | 430 | ||
431 | xfs_trans_log_inode(tp, ip, ilf_fields); | 431 | xfs_trans_log_inode(tp, ip, ilf_fields); |
432 | xfs_trans_log_inode(tp, tip, tilf_fields); | 432 | xfs_trans_log_inode(tp, tip, tilf_fields); |
@@ -438,7 +438,7 @@ xfs_swap_extents( | |||
438 | if (mp->m_flags & XFS_MOUNT_WSYNC) | 438 | if (mp->m_flags & XFS_MOUNT_WSYNC) |
439 | xfs_trans_set_sync(tp); | 439 | xfs_trans_set_sync(tp); |
440 | 440 | ||
441 | error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); | 441 | error = xfs_trans_commit(tp, 0); |
442 | 442 | ||
443 | trace_xfs_swap_extent_after(ip, 0); | 443 | trace_xfs_swap_extent_after(ip, 0); |
444 | trace_xfs_swap_extent_after(tip, 1); | 444 | trace_xfs_swap_extent_after(tip, 1); |
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index dffba9ba0db6..a3721633abc8 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
@@ -148,7 +148,7 @@ typedef enum xfs_dinode_fmt { | |||
148 | be32_to_cpu((dip)->di_nextents) : \ | 148 | be32_to_cpu((dip)->di_nextents) : \ |
149 | be16_to_cpu((dip)->di_anextents)) | 149 | be16_to_cpu((dip)->di_anextents)) |
150 | 150 | ||
151 | #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) | 151 | #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)((bp)->b_addr)) |
152 | 152 | ||
153 | /* | 153 | /* |
154 | * For block and character special files the 32bit dev_t is stored at the | 154 | * For block and character special files the 32bit dev_t is stored at the |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index ca2386d82cdf..66e108f561a3 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
@@ -888,12 +888,10 @@ xfs_dir2_leaf_getdents( | |||
888 | * we already have in the table. | 888 | * we already have in the table. |
889 | */ | 889 | */ |
890 | nmap = map_size - map_valid; | 890 | nmap = map_size - map_valid; |
891 | error = xfs_bmapi(NULL, dp, | 891 | error = xfs_bmapi_read(dp, map_off, |
892 | map_off, | ||
893 | xfs_dir2_byte_to_da(mp, | 892 | xfs_dir2_byte_to_da(mp, |
894 | XFS_DIR2_LEAF_OFFSET) - map_off, | 893 | XFS_DIR2_LEAF_OFFSET) - map_off, |
895 | XFS_BMAPI_METADATA, NULL, 0, | 894 | &map[map_valid], &nmap, 0); |
896 | &map[map_valid], &nmap, NULL); | ||
897 | /* | 895 | /* |
898 | * Don't know if we should ignore this or | 896 | * Don't know if we should ignore this or |
899 | * try to return an error. | 897 | * try to return an error. |
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/xfs_discard.c index 244e797dae32..8a24f0c6c860 100644 --- a/fs/xfs/linux-2.6/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
@@ -38,7 +38,7 @@ xfs_trim_extents( | |||
38 | struct xfs_mount *mp, | 38 | struct xfs_mount *mp, |
39 | xfs_agnumber_t agno, | 39 | xfs_agnumber_t agno, |
40 | xfs_fsblock_t start, | 40 | xfs_fsblock_t start, |
41 | xfs_fsblock_t len, | 41 | xfs_fsblock_t end, |
42 | xfs_fsblock_t minlen, | 42 | xfs_fsblock_t minlen, |
43 | __uint64_t *blocks_trimmed) | 43 | __uint64_t *blocks_trimmed) |
44 | { | 44 | { |
@@ -100,7 +100,7 @@ xfs_trim_extents( | |||
100 | * down partially overlapping ranges for now. | 100 | * down partially overlapping ranges for now. |
101 | */ | 101 | */ |
102 | if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || | 102 | if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || |
103 | XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { | 103 | XFS_AGB_TO_FSB(mp, agno, fbno) > end) { |
104 | trace_xfs_discard_exclude(mp, agno, fbno, flen); | 104 | trace_xfs_discard_exclude(mp, agno, fbno, flen); |
105 | goto next_extent; | 105 | goto next_extent; |
106 | } | 106 | } |
@@ -145,7 +145,7 @@ xfs_ioc_trim( | |||
145 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; | 145 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; |
146 | unsigned int granularity = q->limits.discard_granularity; | 146 | unsigned int granularity = q->limits.discard_granularity; |
147 | struct fstrim_range range; | 147 | struct fstrim_range range; |
148 | xfs_fsblock_t start, len, minlen; | 148 | xfs_fsblock_t start, end, minlen; |
149 | xfs_agnumber_t start_agno, end_agno, agno; | 149 | xfs_agnumber_t start_agno, end_agno, agno; |
150 | __uint64_t blocks_trimmed = 0; | 150 | __uint64_t blocks_trimmed = 0; |
151 | int error, last_error = 0; | 151 | int error, last_error = 0; |
@@ -165,19 +165,19 @@ xfs_ioc_trim( | |||
165 | * matter as trimming blocks is an advisory interface. | 165 | * matter as trimming blocks is an advisory interface. |
166 | */ | 166 | */ |
167 | start = XFS_B_TO_FSBT(mp, range.start); | 167 | start = XFS_B_TO_FSBT(mp, range.start); |
168 | len = XFS_B_TO_FSBT(mp, range.len); | 168 | end = start + XFS_B_TO_FSBT(mp, range.len) - 1; |
169 | minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); | 169 | minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); |
170 | 170 | ||
171 | start_agno = XFS_FSB_TO_AGNO(mp, start); | 171 | if (start >= mp->m_sb.sb_dblocks) |
172 | if (start_agno >= mp->m_sb.sb_agcount) | ||
173 | return -XFS_ERROR(EINVAL); | 172 | return -XFS_ERROR(EINVAL); |
173 | if (end > mp->m_sb.sb_dblocks - 1) | ||
174 | end = mp->m_sb.sb_dblocks - 1; | ||
174 | 175 | ||
175 | end_agno = XFS_FSB_TO_AGNO(mp, start + len); | 176 | start_agno = XFS_FSB_TO_AGNO(mp, start); |
176 | if (end_agno >= mp->m_sb.sb_agcount) | 177 | end_agno = XFS_FSB_TO_AGNO(mp, end); |
177 | end_agno = mp->m_sb.sb_agcount - 1; | ||
178 | 178 | ||
179 | for (agno = start_agno; agno <= end_agno; agno++) { | 179 | for (agno = start_agno; agno <= end_agno; agno++) { |
180 | error = -xfs_trim_extents(mp, agno, start, len, minlen, | 180 | error = -xfs_trim_extents(mp, agno, start, end, minlen, |
181 | &blocks_trimmed); | 181 | &blocks_trimmed); |
182 | if (error) | 182 | if (error) |
183 | last_error = error; | 183 | last_error = error; |
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/xfs_discard.h index 344879aea646..344879aea646 100644 --- a/fs/xfs/linux-2.6/xfs_discard.h +++ b/fs/xfs/xfs_discard.h | |||
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 837f31158d43..25d7280e9f6b 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -318,10 +318,9 @@ xfs_qm_init_dquot_blk( | |||
318 | int curid, i; | 318 | int curid, i; |
319 | 319 | ||
320 | ASSERT(tp); | 320 | ASSERT(tp); |
321 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
322 | ASSERT(xfs_buf_islocked(bp)); | 321 | ASSERT(xfs_buf_islocked(bp)); |
323 | 322 | ||
324 | d = (xfs_dqblk_t *)XFS_BUF_PTR(bp); | 323 | d = bp->b_addr; |
325 | 324 | ||
326 | /* | 325 | /* |
327 | * ID of the first dquot in the block - id's are zero based. | 326 | * ID of the first dquot in the block - id's are zero based. |
@@ -378,16 +377,14 @@ xfs_qm_dqalloc( | |||
378 | return (ESRCH); | 377 | return (ESRCH); |
379 | } | 378 | } |
380 | 379 | ||
381 | xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL); | 380 | xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); |
382 | nmaps = 1; | 381 | nmaps = 1; |
383 | if ((error = xfs_bmapi(tp, quotip, | 382 | error = xfs_bmapi_write(tp, quotip, offset_fsb, |
384 | offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, | 383 | XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, |
385 | XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, | 384 | &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), |
386 | &firstblock, | 385 | &map, &nmaps, &flist); |
387 | XFS_QM_DQALLOC_SPACE_RES(mp), | 386 | if (error) |
388 | &map, &nmaps, &flist))) { | ||
389 | goto error0; | 387 | goto error0; |
390 | } | ||
391 | ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); | 388 | ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); |
392 | ASSERT(nmaps == 1); | 389 | ASSERT(nmaps == 1); |
393 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && | 390 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && |
@@ -403,8 +400,11 @@ xfs_qm_dqalloc( | |||
403 | dqp->q_blkno, | 400 | dqp->q_blkno, |
404 | mp->m_quotainfo->qi_dqchunklen, | 401 | mp->m_quotainfo->qi_dqchunklen, |
405 | 0); | 402 | 0); |
406 | if (!bp || (error = XFS_BUF_GETERROR(bp))) | 403 | |
404 | error = xfs_buf_geterror(bp); | ||
405 | if (error) | ||
407 | goto error1; | 406 | goto error1; |
407 | |||
408 | /* | 408 | /* |
409 | * Make a chunk of dquots out of this buffer and log | 409 | * Make a chunk of dquots out of this buffer and log |
410 | * the entire thing. | 410 | * the entire thing. |
@@ -486,9 +486,8 @@ xfs_qm_dqtobp( | |||
486 | /* | 486 | /* |
487 | * Find the block map; no allocations yet | 487 | * Find the block map; no allocations yet |
488 | */ | 488 | */ |
489 | error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, | 489 | error = xfs_bmapi_read(quotip, dqp->q_fileoffset, |
490 | XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, | 490 | XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); |
491 | NULL, 0, &map, &nmaps, NULL); | ||
492 | 491 | ||
493 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | 492 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); |
494 | if (error) | 493 | if (error) |
@@ -534,13 +533,12 @@ xfs_qm_dqtobp( | |||
534 | return XFS_ERROR(error); | 533 | return XFS_ERROR(error); |
535 | } | 534 | } |
536 | 535 | ||
537 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
538 | ASSERT(xfs_buf_islocked(bp)); | 536 | ASSERT(xfs_buf_islocked(bp)); |
539 | 537 | ||
540 | /* | 538 | /* |
541 | * calculate the location of the dquot inside the buffer. | 539 | * calculate the location of the dquot inside the buffer. |
542 | */ | 540 | */ |
543 | ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); | 541 | ddq = bp->b_addr + dqp->q_bufoffset; |
544 | 542 | ||
545 | /* | 543 | /* |
546 | * A simple sanity check in case we got a corrupted dquot... | 544 | * A simple sanity check in case we got a corrupted dquot... |
@@ -553,7 +551,6 @@ xfs_qm_dqtobp( | |||
553 | xfs_trans_brelse(tp, bp); | 551 | xfs_trans_brelse(tp, bp); |
554 | return XFS_ERROR(EIO); | 552 | return XFS_ERROR(EIO); |
555 | } | 553 | } |
556 | XFS_BUF_BUSY(bp); /* We dirtied this */ | ||
557 | } | 554 | } |
558 | 555 | ||
559 | *O_bpp = bp; | 556 | *O_bpp = bp; |
@@ -608,7 +605,7 @@ xfs_qm_dqread( | |||
608 | dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); | 605 | dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); |
609 | 606 | ||
610 | /* Mark the buf so that this will stay incore a little longer */ | 607 | /* Mark the buf so that this will stay incore a little longer */ |
611 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF); | 608 | xfs_buf_set_ref(bp, XFS_DQUOT_REF); |
612 | 609 | ||
613 | /* | 610 | /* |
614 | * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) | 611 | * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) |
@@ -622,7 +619,6 @@ xfs_qm_dqread( | |||
622 | * this particular dquot was repaired. We still aren't afraid to | 619 | * this particular dquot was repaired. We still aren't afraid to |
623 | * brelse it because we have the changes incore. | 620 | * brelse it because we have the changes incore. |
624 | */ | 621 | */ |
625 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
626 | ASSERT(xfs_buf_islocked(bp)); | 622 | ASSERT(xfs_buf_islocked(bp)); |
627 | xfs_trans_brelse(tp, bp); | 623 | xfs_trans_brelse(tp, bp); |
628 | 624 | ||
@@ -1204,7 +1200,7 @@ xfs_qm_dqflush( | |||
1204 | /* | 1200 | /* |
1205 | * Calculate the location of the dquot inside the buffer. | 1201 | * Calculate the location of the dquot inside the buffer. |
1206 | */ | 1202 | */ |
1207 | ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); | 1203 | ddqp = bp->b_addr + dqp->q_bufoffset; |
1208 | 1204 | ||
1209 | /* | 1205 | /* |
1210 | * A simple sanity check in case we got a corrupted dquot.. | 1206 | * A simple sanity check in case we got a corrupted dquot.. |
@@ -1240,15 +1236,17 @@ xfs_qm_dqflush( | |||
1240 | * If the buffer is pinned then push on the log so we won't | 1236 | * If the buffer is pinned then push on the log so we won't |
1241 | * get stuck waiting in the write for too long. | 1237 | * get stuck waiting in the write for too long. |
1242 | */ | 1238 | */ |
1243 | if (XFS_BUF_ISPINNED(bp)) { | 1239 | if (xfs_buf_ispinned(bp)) { |
1244 | trace_xfs_dqflush_force(dqp); | 1240 | trace_xfs_dqflush_force(dqp); |
1245 | xfs_log_force(mp, 0); | 1241 | xfs_log_force(mp, 0); |
1246 | } | 1242 | } |
1247 | 1243 | ||
1248 | if (flags & SYNC_WAIT) | 1244 | if (flags & SYNC_WAIT) |
1249 | error = xfs_bwrite(mp, bp); | 1245 | error = xfs_bwrite(bp); |
1250 | else | 1246 | else |
1251 | xfs_bdwrite(mp, bp); | 1247 | xfs_buf_delwri_queue(bp); |
1248 | |||
1249 | xfs_buf_relse(bp); | ||
1252 | 1250 | ||
1253 | trace_xfs_dqflush_done(dqp); | 1251 | trace_xfs_dqflush_done(dqp); |
1254 | 1252 | ||
@@ -1447,7 +1445,7 @@ xfs_qm_dqflock_pushbuf_wait( | |||
1447 | goto out_lock; | 1445 | goto out_lock; |
1448 | 1446 | ||
1449 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 1447 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
1450 | if (XFS_BUF_ISPINNED(bp)) | 1448 | if (xfs_buf_ispinned(bp)) |
1451 | xfs_log_force(mp, 0); | 1449 | xfs_log_force(mp, 0); |
1452 | xfs_buf_delwri_promote(bp); | 1450 | xfs_buf_delwri_promote(bp); |
1453 | wake_up_process(bp->b_target->bt_task); | 1451 | wake_up_process(bp->b_target->bt_task); |
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 34b7e945dbfa..34b7e945dbfa 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 9e0e2fa3f2c8..bb3f71d236d2 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c | |||
@@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait( | |||
183 | * search the buffer cache can be a time consuming thing, and AIL lock is a | 183 | * search the buffer cache can be a time consuming thing, and AIL lock is a |
184 | * spinlock. | 184 | * spinlock. |
185 | */ | 185 | */ |
186 | STATIC void | 186 | STATIC bool |
187 | xfs_qm_dquot_logitem_pushbuf( | 187 | xfs_qm_dquot_logitem_pushbuf( |
188 | struct xfs_log_item *lip) | 188 | struct xfs_log_item *lip) |
189 | { | 189 | { |
190 | struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); | 190 | struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); |
191 | struct xfs_dquot *dqp = qlip->qli_dquot; | 191 | struct xfs_dquot *dqp = qlip->qli_dquot; |
192 | struct xfs_buf *bp; | 192 | struct xfs_buf *bp; |
193 | bool ret = true; | ||
193 | 194 | ||
194 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 195 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
195 | 196 | ||
@@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf( | |||
201 | if (completion_done(&dqp->q_flush) || | 202 | if (completion_done(&dqp->q_flush) || |
202 | !(lip->li_flags & XFS_LI_IN_AIL)) { | 203 | !(lip->li_flags & XFS_LI_IN_AIL)) { |
203 | xfs_dqunlock(dqp); | 204 | xfs_dqunlock(dqp); |
204 | return; | 205 | return true; |
205 | } | 206 | } |
206 | 207 | ||
207 | bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, | 208 | bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, |
208 | dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); | 209 | dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); |
209 | xfs_dqunlock(dqp); | 210 | xfs_dqunlock(dqp); |
210 | if (!bp) | 211 | if (!bp) |
211 | return; | 212 | return true; |
212 | if (XFS_BUF_ISDELAYWRITE(bp)) | 213 | if (XFS_BUF_ISDELAYWRITE(bp)) |
213 | xfs_buf_delwri_promote(bp); | 214 | xfs_buf_delwri_promote(bp); |
215 | if (xfs_buf_ispinned(bp)) | ||
216 | ret = false; | ||
214 | xfs_buf_relse(bp); | 217 | xfs_buf_relse(bp); |
218 | return ret; | ||
215 | } | 219 | } |
216 | 220 | ||
217 | /* | 221 | /* |
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 5acae2ada70b..5acae2ada70b 100644 --- a/fs/xfs/quota/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/xfs_export.c index 75e5d322e48f..da108977b21f 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/xfs_export.c | |||
@@ -229,16 +229,16 @@ xfs_fs_nfs_commit_metadata( | |||
229 | { | 229 | { |
230 | struct xfs_inode *ip = XFS_I(inode); | 230 | struct xfs_inode *ip = XFS_I(inode); |
231 | struct xfs_mount *mp = ip->i_mount; | 231 | struct xfs_mount *mp = ip->i_mount; |
232 | int error = 0; | 232 | xfs_lsn_t lsn = 0; |
233 | 233 | ||
234 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 234 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
235 | if (xfs_ipincount(ip)) { | 235 | if (xfs_ipincount(ip)) |
236 | error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, | 236 | lsn = ip->i_itemp->ili_last_lsn; |
237 | XFS_LOG_SYNC, NULL); | ||
238 | } | ||
239 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 237 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
240 | 238 | ||
241 | return error; | 239 | if (!lsn) |
240 | return 0; | ||
241 | return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); | ||
242 | } | 242 | } |
243 | 243 | ||
244 | const struct export_operations xfs_export_operations = { | 244 | const struct export_operations xfs_export_operations = { |
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/xfs_export.h index 3272b6ae7a35..3272b6ae7a35 100644 --- a/fs/xfs/linux-2.6/xfs_export.h +++ b/fs/xfs/xfs_export.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/xfs_file.c index 7f7b42469ea7..753ed9b5c70b 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -124,6 +124,35 @@ xfs_iozero( | |||
124 | return (-status); | 124 | return (-status); |
125 | } | 125 | } |
126 | 126 | ||
127 | /* | ||
128 | * Fsync operations on directories are much simpler than on regular files, | ||
129 | * as there is no file data to flush, and thus also no need for explicit | ||
130 | * cache flush operations, and there are no non-transaction metadata updates | ||
131 | * on directories either. | ||
132 | */ | ||
133 | STATIC int | ||
134 | xfs_dir_fsync( | ||
135 | struct file *file, | ||
136 | loff_t start, | ||
137 | loff_t end, | ||
138 | int datasync) | ||
139 | { | ||
140 | struct xfs_inode *ip = XFS_I(file->f_mapping->host); | ||
141 | struct xfs_mount *mp = ip->i_mount; | ||
142 | xfs_lsn_t lsn = 0; | ||
143 | |||
144 | trace_xfs_dir_fsync(ip); | ||
145 | |||
146 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
147 | if (xfs_ipincount(ip)) | ||
148 | lsn = ip->i_itemp->ili_last_lsn; | ||
149 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
150 | |||
151 | if (!lsn) | ||
152 | return 0; | ||
153 | return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); | ||
154 | } | ||
155 | |||
127 | STATIC int | 156 | STATIC int |
128 | xfs_file_fsync( | 157 | xfs_file_fsync( |
129 | struct file *file, | 158 | struct file *file, |
@@ -137,6 +166,7 @@ xfs_file_fsync( | |||
137 | struct xfs_trans *tp; | 166 | struct xfs_trans *tp; |
138 | int error = 0; | 167 | int error = 0; |
139 | int log_flushed = 0; | 168 | int log_flushed = 0; |
169 | xfs_lsn_t lsn = 0; | ||
140 | 170 | ||
141 | trace_xfs_file_fsync(ip); | 171 | trace_xfs_file_fsync(ip); |
142 | 172 | ||
@@ -149,10 +179,6 @@ xfs_file_fsync( | |||
149 | 179 | ||
150 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 180 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
151 | 181 | ||
152 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
153 | xfs_ioend_wait(ip); | ||
154 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
155 | |||
156 | if (mp->m_flags & XFS_MOUNT_BARRIER) { | 182 | if (mp->m_flags & XFS_MOUNT_BARRIER) { |
157 | /* | 183 | /* |
158 | * If we have an RT and/or log subvolume we need to make sure | 184 | * If we have an RT and/or log subvolume we need to make sure |
@@ -216,11 +242,11 @@ xfs_file_fsync( | |||
216 | * transaction. So we play it safe and fire off the | 242 | * transaction. So we play it safe and fire off the |
217 | * transaction anyway. | 243 | * transaction anyway. |
218 | */ | 244 | */ |
219 | xfs_trans_ijoin(tp, ip); | 245 | xfs_trans_ijoin(tp, ip, 0); |
220 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 246 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
221 | xfs_trans_set_sync(tp); | 247 | error = xfs_trans_commit(tp, 0); |
222 | error = _xfs_trans_commit(tp, 0, &log_flushed); | ||
223 | 248 | ||
249 | lsn = ip->i_itemp->ili_last_lsn; | ||
224 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 250 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
225 | } else { | 251 | } else { |
226 | /* | 252 | /* |
@@ -231,14 +257,14 @@ xfs_file_fsync( | |||
231 | * disk yet, the inode will be still be pinned. If it is, | 257 | * disk yet, the inode will be still be pinned. If it is, |
232 | * force the log. | 258 | * force the log. |
233 | */ | 259 | */ |
234 | if (xfs_ipincount(ip)) { | 260 | if (xfs_ipincount(ip)) |
235 | error = _xfs_log_force_lsn(mp, | 261 | lsn = ip->i_itemp->ili_last_lsn; |
236 | ip->i_itemp->ili_last_lsn, | ||
237 | XFS_LOG_SYNC, &log_flushed); | ||
238 | } | ||
239 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 262 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
240 | } | 263 | } |
241 | 264 | ||
265 | if (!error && lsn) | ||
266 | error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); | ||
267 | |||
242 | /* | 268 | /* |
243 | * If we only have a single device, and the log force about was | 269 | * If we only have a single device, and the log force about was |
244 | * a no-op we might have to flush the data device cache here. | 270 | * a no-op we might have to flush the data device cache here. |
@@ -317,7 +343,19 @@ xfs_file_aio_read( | |||
317 | if (XFS_FORCED_SHUTDOWN(mp)) | 343 | if (XFS_FORCED_SHUTDOWN(mp)) |
318 | return -EIO; | 344 | return -EIO; |
319 | 345 | ||
320 | if (unlikely(ioflags & IO_ISDIRECT)) { | 346 | /* |
347 | * Locking is a bit tricky here. If we take an exclusive lock | ||
348 | * for direct IO, we effectively serialise all new concurrent | ||
349 | * read IO to this file and block it behind IO that is currently in | ||
350 | * progress because IO in progress holds the IO lock shared. We only | ||
351 | * need to hold the lock exclusive to blow away the page cache, so | ||
352 | * only take lock exclusively if the page cache needs invalidation. | ||
353 | * This allows the normal direct IO case of no page cache pages to | ||
354 | * proceeed concurrently without serialisation. | ||
355 | */ | ||
356 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | ||
357 | if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { | ||
358 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | ||
321 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | 359 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); |
322 | 360 | ||
323 | if (inode->i_mapping->nrpages) { | 361 | if (inode->i_mapping->nrpages) { |
@@ -330,8 +368,7 @@ xfs_file_aio_read( | |||
330 | } | 368 | } |
331 | } | 369 | } |
332 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | 370 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
333 | } else | 371 | } |
334 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | ||
335 | 372 | ||
336 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); | 373 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); |
337 | 374 | ||
@@ -407,11 +444,13 @@ xfs_aio_write_isize_update( | |||
407 | */ | 444 | */ |
408 | STATIC void | 445 | STATIC void |
409 | xfs_aio_write_newsize_update( | 446 | xfs_aio_write_newsize_update( |
410 | struct xfs_inode *ip) | 447 | struct xfs_inode *ip, |
448 | xfs_fsize_t new_size) | ||
411 | { | 449 | { |
412 | if (ip->i_new_size) { | 450 | if (new_size == ip->i_new_size) { |
413 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | 451 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); |
414 | ip->i_new_size = 0; | 452 | if (new_size == ip->i_new_size) |
453 | ip->i_new_size = 0; | ||
415 | if (ip->i_d.di_size > ip->i_size) | 454 | if (ip->i_d.di_size > ip->i_size) |
416 | ip->i_d.di_size = ip->i_size; | 455 | ip->i_d.di_size = ip->i_size; |
417 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | 456 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
@@ -462,7 +501,7 @@ xfs_file_splice_write( | |||
462 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 501 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
463 | 502 | ||
464 | xfs_aio_write_isize_update(inode, ppos, ret); | 503 | xfs_aio_write_isize_update(inode, ppos, ret); |
465 | xfs_aio_write_newsize_update(ip); | 504 | xfs_aio_write_newsize_update(ip, new_size); |
466 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 505 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
467 | return ret; | 506 | return ret; |
468 | } | 507 | } |
@@ -500,11 +539,9 @@ xfs_zero_last_block( | |||
500 | 539 | ||
501 | last_fsb = XFS_B_TO_FSBT(mp, isize); | 540 | last_fsb = XFS_B_TO_FSBT(mp, isize); |
502 | nimaps = 1; | 541 | nimaps = 1; |
503 | error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, | 542 | error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); |
504 | &nimaps, NULL); | 543 | if (error) |
505 | if (error) { | ||
506 | return error; | 544 | return error; |
507 | } | ||
508 | ASSERT(nimaps > 0); | 545 | ASSERT(nimaps > 0); |
509 | /* | 546 | /* |
510 | * If the block underlying isize is just a hole, then there | 547 | * If the block underlying isize is just a hole, then there |
@@ -595,8 +632,8 @@ xfs_zero_eof( | |||
595 | while (start_zero_fsb <= end_zero_fsb) { | 632 | while (start_zero_fsb <= end_zero_fsb) { |
596 | nimaps = 1; | 633 | nimaps = 1; |
597 | zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; | 634 | zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; |
598 | error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, | 635 | error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb, |
599 | 0, NULL, 0, &imap, &nimaps, NULL); | 636 | &imap, &nimaps, 0); |
600 | if (error) { | 637 | if (error) { |
601 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 638 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); |
602 | return error; | 639 | return error; |
@@ -659,6 +696,7 @@ xfs_file_aio_write_checks( | |||
659 | struct file *file, | 696 | struct file *file, |
660 | loff_t *pos, | 697 | loff_t *pos, |
661 | size_t *count, | 698 | size_t *count, |
699 | xfs_fsize_t *new_sizep, | ||
662 | int *iolock) | 700 | int *iolock) |
663 | { | 701 | { |
664 | struct inode *inode = file->f_mapping->host; | 702 | struct inode *inode = file->f_mapping->host; |
@@ -666,6 +704,9 @@ xfs_file_aio_write_checks( | |||
666 | xfs_fsize_t new_size; | 704 | xfs_fsize_t new_size; |
667 | int error = 0; | 705 | int error = 0; |
668 | 706 | ||
707 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
708 | *new_sizep = 0; | ||
709 | restart: | ||
669 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); | 710 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
670 | if (error) { | 711 | if (error) { |
671 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | 712 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); |
@@ -673,20 +714,41 @@ xfs_file_aio_write_checks( | |||
673 | return error; | 714 | return error; |
674 | } | 715 | } |
675 | 716 | ||
676 | new_size = *pos + *count; | ||
677 | if (new_size > ip->i_size) | ||
678 | ip->i_new_size = new_size; | ||
679 | |||
680 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) | 717 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) |
681 | file_update_time(file); | 718 | file_update_time(file); |
682 | 719 | ||
683 | /* | 720 | /* |
684 | * If the offset is beyond the size of the file, we need to zero any | 721 | * If the offset is beyond the size of the file, we need to zero any |
685 | * blocks that fall between the existing EOF and the start of this | 722 | * blocks that fall between the existing EOF and the start of this |
686 | * write. | 723 | * write. There is no need to issue zeroing if another in-flght IO ends |
724 | * at or before this one If zeronig is needed and we are currently | ||
725 | * holding the iolock shared, we need to update it to exclusive which | ||
726 | * involves dropping all locks and relocking to maintain correct locking | ||
727 | * order. If we do this, restart the function to ensure all checks and | ||
728 | * values are still valid. | ||
687 | */ | 729 | */ |
688 | if (*pos > ip->i_size) | 730 | if ((ip->i_new_size && *pos > ip->i_new_size) || |
731 | (!ip->i_new_size && *pos > ip->i_size)) { | ||
732 | if (*iolock == XFS_IOLOCK_SHARED) { | ||
733 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | ||
734 | *iolock = XFS_IOLOCK_EXCL; | ||
735 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | ||
736 | goto restart; | ||
737 | } | ||
689 | error = -xfs_zero_eof(ip, *pos, ip->i_size); | 738 | error = -xfs_zero_eof(ip, *pos, ip->i_size); |
739 | } | ||
740 | |||
741 | /* | ||
742 | * If this IO extends beyond EOF, we may need to update ip->i_new_size. | ||
743 | * We have already zeroed space beyond EOF (if necessary). Only update | ||
744 | * ip->i_new_size if this IO ends beyond any other in-flight writes. | ||
745 | */ | ||
746 | new_size = *pos + *count; | ||
747 | if (new_size > ip->i_size) { | ||
748 | if (new_size > ip->i_new_size) | ||
749 | ip->i_new_size = new_size; | ||
750 | *new_sizep = new_size; | ||
751 | } | ||
690 | 752 | ||
691 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | 753 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
692 | if (error) | 754 | if (error) |
@@ -721,7 +783,7 @@ xfs_file_aio_write_checks( | |||
721 | * the dio layer. To avoid the problem with aio, we also need to wait for | 783 | * the dio layer. To avoid the problem with aio, we also need to wait for |
722 | * outstanding IOs to complete so that unwritten extent conversion is completed | 784 | * outstanding IOs to complete so that unwritten extent conversion is completed |
723 | * before we try to map the overlapping block. This is currently implemented by | 785 | * before we try to map the overlapping block. This is currently implemented by |
724 | * hitting it with a big hammer (i.e. xfs_ioend_wait()). | 786 | * hitting it with a big hammer (i.e. inode_dio_wait()). |
725 | * | 787 | * |
726 | * Returns with locks held indicated by @iolock and errors indicated by | 788 | * Returns with locks held indicated by @iolock and errors indicated by |
727 | * negative return values. | 789 | * negative return values. |
@@ -733,6 +795,7 @@ xfs_file_dio_aio_write( | |||
733 | unsigned long nr_segs, | 795 | unsigned long nr_segs, |
734 | loff_t pos, | 796 | loff_t pos, |
735 | size_t ocount, | 797 | size_t ocount, |
798 | xfs_fsize_t *new_size, | ||
736 | int *iolock) | 799 | int *iolock) |
737 | { | 800 | { |
738 | struct file *file = iocb->ki_filp; | 801 | struct file *file = iocb->ki_filp; |
@@ -753,18 +816,35 @@ xfs_file_dio_aio_write( | |||
753 | if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) | 816 | if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) |
754 | unaligned_io = 1; | 817 | unaligned_io = 1; |
755 | 818 | ||
756 | if (unaligned_io || mapping->nrpages || pos > ip->i_size) | 819 | /* |
820 | * We don't need to take an exclusive lock unless there page cache needs | ||
821 | * to be invalidated or unaligned IO is being executed. We don't need to | ||
822 | * consider the EOF extension case here because | ||
823 | * xfs_file_aio_write_checks() will relock the inode as necessary for | ||
824 | * EOF zeroing cases and fill out the new inode size as appropriate. | ||
825 | */ | ||
826 | if (unaligned_io || mapping->nrpages) | ||
757 | *iolock = XFS_IOLOCK_EXCL; | 827 | *iolock = XFS_IOLOCK_EXCL; |
758 | else | 828 | else |
759 | *iolock = XFS_IOLOCK_SHARED; | 829 | *iolock = XFS_IOLOCK_SHARED; |
760 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | 830 | xfs_rw_ilock(ip, *iolock); |
761 | 831 | ||
762 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); | 832 | /* |
833 | * Recheck if there are cached pages that need invalidate after we got | ||
834 | * the iolock to protect against other threads adding new pages while | ||
835 | * we were waiting for the iolock. | ||
836 | */ | ||
837 | if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) { | ||
838 | xfs_rw_iunlock(ip, *iolock); | ||
839 | *iolock = XFS_IOLOCK_EXCL; | ||
840 | xfs_rw_ilock(ip, *iolock); | ||
841 | } | ||
842 | |||
843 | ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); | ||
763 | if (ret) | 844 | if (ret) |
764 | return ret; | 845 | return ret; |
765 | 846 | ||
766 | if (mapping->nrpages) { | 847 | if (mapping->nrpages) { |
767 | WARN_ON(*iolock != XFS_IOLOCK_EXCL); | ||
768 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, | 848 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, |
769 | FI_REMAPF_LOCKED); | 849 | FI_REMAPF_LOCKED); |
770 | if (ret) | 850 | if (ret) |
@@ -776,7 +856,7 @@ xfs_file_dio_aio_write( | |||
776 | * otherwise demote the lock if we had to flush cached pages | 856 | * otherwise demote the lock if we had to flush cached pages |
777 | */ | 857 | */ |
778 | if (unaligned_io) | 858 | if (unaligned_io) |
779 | xfs_ioend_wait(ip); | 859 | inode_dio_wait(inode); |
780 | else if (*iolock == XFS_IOLOCK_EXCL) { | 860 | else if (*iolock == XFS_IOLOCK_EXCL) { |
781 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | 861 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
782 | *iolock = XFS_IOLOCK_SHARED; | 862 | *iolock = XFS_IOLOCK_SHARED; |
@@ -798,6 +878,7 @@ xfs_file_buffered_aio_write( | |||
798 | unsigned long nr_segs, | 878 | unsigned long nr_segs, |
799 | loff_t pos, | 879 | loff_t pos, |
800 | size_t ocount, | 880 | size_t ocount, |
881 | xfs_fsize_t *new_size, | ||
801 | int *iolock) | 882 | int *iolock) |
802 | { | 883 | { |
803 | struct file *file = iocb->ki_filp; | 884 | struct file *file = iocb->ki_filp; |
@@ -809,9 +890,9 @@ xfs_file_buffered_aio_write( | |||
809 | size_t count = ocount; | 890 | size_t count = ocount; |
810 | 891 | ||
811 | *iolock = XFS_IOLOCK_EXCL; | 892 | *iolock = XFS_IOLOCK_EXCL; |
812 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | 893 | xfs_rw_ilock(ip, *iolock); |
813 | 894 | ||
814 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); | 895 | ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); |
815 | if (ret) | 896 | if (ret) |
816 | return ret; | 897 | return ret; |
817 | 898 | ||
@@ -851,6 +932,7 @@ xfs_file_aio_write( | |||
851 | ssize_t ret; | 932 | ssize_t ret; |
852 | int iolock; | 933 | int iolock; |
853 | size_t ocount = 0; | 934 | size_t ocount = 0; |
935 | xfs_fsize_t new_size = 0; | ||
854 | 936 | ||
855 | XFS_STATS_INC(xs_write_calls); | 937 | XFS_STATS_INC(xs_write_calls); |
856 | 938 | ||
@@ -870,10 +952,10 @@ xfs_file_aio_write( | |||
870 | 952 | ||
871 | if (unlikely(file->f_flags & O_DIRECT)) | 953 | if (unlikely(file->f_flags & O_DIRECT)) |
872 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, | 954 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, |
873 | ocount, &iolock); | 955 | ocount, &new_size, &iolock); |
874 | else | 956 | else |
875 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | 957 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, |
876 | ocount, &iolock); | 958 | ocount, &new_size, &iolock); |
877 | 959 | ||
878 | xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); | 960 | xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); |
879 | 961 | ||
@@ -894,7 +976,7 @@ xfs_file_aio_write( | |||
894 | } | 976 | } |
895 | 977 | ||
896 | out_unlock: | 978 | out_unlock: |
897 | xfs_aio_write_newsize_update(ip); | 979 | xfs_aio_write_newsize_update(ip, new_size); |
898 | xfs_rw_iunlock(ip, iolock); | 980 | xfs_rw_iunlock(ip, iolock); |
899 | return ret; | 981 | return ret; |
900 | } | 982 | } |
@@ -1087,7 +1169,7 @@ const struct file_operations xfs_dir_file_operations = { | |||
1087 | #ifdef CONFIG_COMPAT | 1169 | #ifdef CONFIG_COMPAT |
1088 | .compat_ioctl = xfs_file_compat_ioctl, | 1170 | .compat_ioctl = xfs_file_compat_ioctl, |
1089 | #endif | 1171 | #endif |
1090 | .fsync = xfs_file_fsync, | 1172 | .fsync = xfs_dir_fsync, |
1091 | }; | 1173 | }; |
1092 | 1174 | ||
1093 | static const struct vm_operations_struct xfs_file_vm_ops = { | 1175 | static const struct vm_operations_struct xfs_file_vm_ops = { |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 3ff3d9e23ded..5170306a1009 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -682,7 +682,7 @@ xfs_filestream_new_ag( | |||
682 | ip = ap->ip; | 682 | ip = ap->ip; |
683 | mp = ip->i_mount; | 683 | mp = ip->i_mount; |
684 | cache = mp->m_filestream; | 684 | cache = mp->m_filestream; |
685 | minlen = ap->alen; | 685 | minlen = ap->length; |
686 | *agp = NULLAGNUMBER; | 686 | *agp = NULLAGNUMBER; |
687 | 687 | ||
688 | /* | 688 | /* |
@@ -761,7 +761,7 @@ xfs_filestream_new_ag( | |||
761 | */ | 761 | */ |
762 | ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount; | 762 | ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount; |
763 | flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | | 763 | flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | |
764 | (ap->low ? XFS_PICK_LOWSPACE : 0); | 764 | (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); |
765 | 765 | ||
766 | err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); | 766 | err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); |
767 | if (err || *agp == NULLAGNUMBER) | 767 | if (err || *agp == NULLAGNUMBER) |
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c index ed88ed16811c..ed88ed16811c 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/xfs_fs_subr.c | |||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 9153d2c77caf..1c6fdeb702ff 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -194,6 +194,10 @@ xfs_growfs_data_private( | |||
194 | bp = xfs_buf_get(mp->m_ddev_targp, | 194 | bp = xfs_buf_get(mp->m_ddev_targp, |
195 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), | 195 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), |
196 | XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); | 196 | XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); |
197 | if (!bp) { | ||
198 | error = ENOMEM; | ||
199 | goto error0; | ||
200 | } | ||
197 | agf = XFS_BUF_TO_AGF(bp); | 201 | agf = XFS_BUF_TO_AGF(bp); |
198 | memset(agf, 0, mp->m_sb.sb_sectsize); | 202 | memset(agf, 0, mp->m_sb.sb_sectsize); |
199 | agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); | 203 | agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); |
@@ -216,16 +220,21 @@ xfs_growfs_data_private( | |||
216 | tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); | 220 | tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); |
217 | agf->agf_freeblks = cpu_to_be32(tmpsize); | 221 | agf->agf_freeblks = cpu_to_be32(tmpsize); |
218 | agf->agf_longest = cpu_to_be32(tmpsize); | 222 | agf->agf_longest = cpu_to_be32(tmpsize); |
219 | error = xfs_bwrite(mp, bp); | 223 | error = xfs_bwrite(bp); |
220 | if (error) { | 224 | xfs_buf_relse(bp); |
225 | if (error) | ||
221 | goto error0; | 226 | goto error0; |
222 | } | 227 | |
223 | /* | 228 | /* |
224 | * AG inode header block | 229 | * AG inode header block |
225 | */ | 230 | */ |
226 | bp = xfs_buf_get(mp->m_ddev_targp, | 231 | bp = xfs_buf_get(mp->m_ddev_targp, |
227 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), | 232 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), |
228 | XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); | 233 | XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); |
234 | if (!bp) { | ||
235 | error = ENOMEM; | ||
236 | goto error0; | ||
237 | } | ||
229 | agi = XFS_BUF_TO_AGI(bp); | 238 | agi = XFS_BUF_TO_AGI(bp); |
230 | memset(agi, 0, mp->m_sb.sb_sectsize); | 239 | memset(agi, 0, mp->m_sb.sb_sectsize); |
231 | agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); | 240 | agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); |
@@ -240,10 +249,11 @@ xfs_growfs_data_private( | |||
240 | agi->agi_dirino = cpu_to_be32(NULLAGINO); | 249 | agi->agi_dirino = cpu_to_be32(NULLAGINO); |
241 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) | 250 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) |
242 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); | 251 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
243 | error = xfs_bwrite(mp, bp); | 252 | error = xfs_bwrite(bp); |
244 | if (error) { | 253 | xfs_buf_relse(bp); |
254 | if (error) | ||
245 | goto error0; | 255 | goto error0; |
246 | } | 256 | |
247 | /* | 257 | /* |
248 | * BNO btree root block | 258 | * BNO btree root block |
249 | */ | 259 | */ |
@@ -251,6 +261,10 @@ xfs_growfs_data_private( | |||
251 | XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), | 261 | XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), |
252 | BTOBB(mp->m_sb.sb_blocksize), | 262 | BTOBB(mp->m_sb.sb_blocksize), |
253 | XBF_LOCK | XBF_MAPPED); | 263 | XBF_LOCK | XBF_MAPPED); |
264 | if (!bp) { | ||
265 | error = ENOMEM; | ||
266 | goto error0; | ||
267 | } | ||
254 | block = XFS_BUF_TO_BLOCK(bp); | 268 | block = XFS_BUF_TO_BLOCK(bp); |
255 | memset(block, 0, mp->m_sb.sb_blocksize); | 269 | memset(block, 0, mp->m_sb.sb_blocksize); |
256 | block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); | 270 | block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); |
@@ -262,10 +276,11 @@ xfs_growfs_data_private( | |||
262 | arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); | 276 | arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); |
263 | arec->ar_blockcount = cpu_to_be32( | 277 | arec->ar_blockcount = cpu_to_be32( |
264 | agsize - be32_to_cpu(arec->ar_startblock)); | 278 | agsize - be32_to_cpu(arec->ar_startblock)); |
265 | error = xfs_bwrite(mp, bp); | 279 | error = xfs_bwrite(bp); |
266 | if (error) { | 280 | xfs_buf_relse(bp); |
281 | if (error) | ||
267 | goto error0; | 282 | goto error0; |
268 | } | 283 | |
269 | /* | 284 | /* |
270 | * CNT btree root block | 285 | * CNT btree root block |
271 | */ | 286 | */ |
@@ -273,6 +288,10 @@ xfs_growfs_data_private( | |||
273 | XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), | 288 | XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), |
274 | BTOBB(mp->m_sb.sb_blocksize), | 289 | BTOBB(mp->m_sb.sb_blocksize), |
275 | XBF_LOCK | XBF_MAPPED); | 290 | XBF_LOCK | XBF_MAPPED); |
291 | if (!bp) { | ||
292 | error = ENOMEM; | ||
293 | goto error0; | ||
294 | } | ||
276 | block = XFS_BUF_TO_BLOCK(bp); | 295 | block = XFS_BUF_TO_BLOCK(bp); |
277 | memset(block, 0, mp->m_sb.sb_blocksize); | 296 | memset(block, 0, mp->m_sb.sb_blocksize); |
278 | block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); | 297 | block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); |
@@ -285,10 +304,11 @@ xfs_growfs_data_private( | |||
285 | arec->ar_blockcount = cpu_to_be32( | 304 | arec->ar_blockcount = cpu_to_be32( |
286 | agsize - be32_to_cpu(arec->ar_startblock)); | 305 | agsize - be32_to_cpu(arec->ar_startblock)); |
287 | nfree += be32_to_cpu(arec->ar_blockcount); | 306 | nfree += be32_to_cpu(arec->ar_blockcount); |
288 | error = xfs_bwrite(mp, bp); | 307 | error = xfs_bwrite(bp); |
289 | if (error) { | 308 | xfs_buf_relse(bp); |
309 | if (error) | ||
290 | goto error0; | 310 | goto error0; |
291 | } | 311 | |
292 | /* | 312 | /* |
293 | * INO btree root block | 313 | * INO btree root block |
294 | */ | 314 | */ |
@@ -296,6 +316,10 @@ xfs_growfs_data_private( | |||
296 | XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), | 316 | XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), |
297 | BTOBB(mp->m_sb.sb_blocksize), | 317 | BTOBB(mp->m_sb.sb_blocksize), |
298 | XBF_LOCK | XBF_MAPPED); | 318 | XBF_LOCK | XBF_MAPPED); |
319 | if (!bp) { | ||
320 | error = ENOMEM; | ||
321 | goto error0; | ||
322 | } | ||
299 | block = XFS_BUF_TO_BLOCK(bp); | 323 | block = XFS_BUF_TO_BLOCK(bp); |
300 | memset(block, 0, mp->m_sb.sb_blocksize); | 324 | memset(block, 0, mp->m_sb.sb_blocksize); |
301 | block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); | 325 | block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); |
@@ -303,10 +327,10 @@ xfs_growfs_data_private( | |||
303 | block->bb_numrecs = 0; | 327 | block->bb_numrecs = 0; |
304 | block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); | 328 | block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); |
305 | block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); | 329 | block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); |
306 | error = xfs_bwrite(mp, bp); | 330 | error = xfs_bwrite(bp); |
307 | if (error) { | 331 | xfs_buf_relse(bp); |
332 | if (error) | ||
308 | goto error0; | 333 | goto error0; |
309 | } | ||
310 | } | 334 | } |
311 | xfs_trans_agblocks_delta(tp, nfree); | 335 | xfs_trans_agblocks_delta(tp, nfree); |
312 | /* | 336 | /* |
@@ -396,9 +420,9 @@ xfs_growfs_data_private( | |||
396 | * just issue a warning and continue. The real work is | 420 | * just issue a warning and continue. The real work is |
397 | * already done and committed. | 421 | * already done and committed. |
398 | */ | 422 | */ |
399 | if (!(error = xfs_bwrite(mp, bp))) { | 423 | error = xfs_bwrite(bp); |
400 | continue; | 424 | xfs_buf_relse(bp); |
401 | } else { | 425 | if (error) { |
402 | xfs_warn(mp, | 426 | xfs_warn(mp, |
403 | "write error %d updating secondary superblock for ag %d", | 427 | "write error %d updating secondary superblock for ag %d", |
404 | error, agno); | 428 | error, agno); |
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/xfs_globals.c index 76e81cff70b9..76e81cff70b9 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/xfs_globals.c | |||
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index dd5628bd8d0b..169380e66057 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -150,7 +150,7 @@ xfs_check_agi_freecount( | |||
150 | /* | 150 | /* |
151 | * Initialise a new set of inodes. | 151 | * Initialise a new set of inodes. |
152 | */ | 152 | */ |
153 | STATIC void | 153 | STATIC int |
154 | xfs_ialloc_inode_init( | 154 | xfs_ialloc_inode_init( |
155 | struct xfs_mount *mp, | 155 | struct xfs_mount *mp, |
156 | struct xfs_trans *tp, | 156 | struct xfs_trans *tp, |
@@ -202,9 +202,8 @@ xfs_ialloc_inode_init( | |||
202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | 202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, |
203 | mp->m_bsize * blks_per_cluster, | 203 | mp->m_bsize * blks_per_cluster, |
204 | XBF_LOCK); | 204 | XBF_LOCK); |
205 | ASSERT(fbuf); | 205 | if (!fbuf) |
206 | ASSERT(!XFS_BUF_GETERROR(fbuf)); | 206 | return ENOMEM; |
207 | |||
208 | /* | 207 | /* |
209 | * Initialize all inodes in this buffer and then log them. | 208 | * Initialize all inodes in this buffer and then log them. |
210 | * | 209 | * |
@@ -226,6 +225,7 @@ xfs_ialloc_inode_init( | |||
226 | } | 225 | } |
227 | xfs_trans_inode_alloc_buf(tp, fbuf); | 226 | xfs_trans_inode_alloc_buf(tp, fbuf); |
228 | } | 227 | } |
228 | return 0; | ||
229 | } | 229 | } |
230 | 230 | ||
231 | /* | 231 | /* |
@@ -370,9 +370,11 @@ xfs_ialloc_ag_alloc( | |||
370 | * rather than a linear progression to prevent the next generation | 370 | * rather than a linear progression to prevent the next generation |
371 | * number from being easily guessable. | 371 | * number from being easily guessable. |
372 | */ | 372 | */ |
373 | xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len, | 373 | error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, |
374 | random32()); | 374 | args.len, random32()); |
375 | 375 | ||
376 | if (error) | ||
377 | return error; | ||
376 | /* | 378 | /* |
377 | * Convert the results. | 379 | * Convert the results. |
378 | */ | 380 | */ |
@@ -1486,7 +1488,7 @@ xfs_read_agi( | |||
1486 | if (error) | 1488 | if (error) |
1487 | return error; | 1489 | return error; |
1488 | 1490 | ||
1489 | ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp)); | 1491 | ASSERT(!xfs_buf_geterror(*bpp)); |
1490 | agi = XFS_BUF_TO_AGI(*bpp); | 1492 | agi = XFS_BUF_TO_AGI(*bpp); |
1491 | 1493 | ||
1492 | /* | 1494 | /* |
@@ -1503,7 +1505,7 @@ xfs_read_agi( | |||
1503 | return XFS_ERROR(EFSCORRUPTED); | 1505 | return XFS_ERROR(EFSCORRUPTED); |
1504 | } | 1506 | } |
1505 | 1507 | ||
1506 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF); | 1508 | xfs_buf_set_ref(*bpp, XFS_AGI_REF); |
1507 | 1509 | ||
1508 | xfs_check_agi_unlinked(agi); | 1510 | xfs_check_agi_unlinked(agi); |
1509 | return 0; | 1511 | return 0; |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 7759812c1bbe..0fa98b1c70ea 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -75,7 +75,6 @@ xfs_inode_alloc( | |||
75 | return NULL; | 75 | return NULL; |
76 | } | 76 | } |
77 | 77 | ||
78 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
79 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 78 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
80 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 79 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
81 | ASSERT(completion_done(&ip->i_flush)); | 80 | ASSERT(completion_done(&ip->i_flush)); |
@@ -150,7 +149,6 @@ xfs_inode_free( | |||
150 | } | 149 | } |
151 | 150 | ||
152 | /* asserts to verify all state is correct here */ | 151 | /* asserts to verify all state is correct here */ |
153 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
154 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 152 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
155 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 153 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
156 | ASSERT(completion_done(&ip->i_flush)); | 154 | ASSERT(completion_done(&ip->i_flush)); |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2fcca4b03ed3..c0237c602f11 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -190,12 +190,6 @@ xfs_imap_to_bp( | |||
190 | } | 190 | } |
191 | 191 | ||
192 | xfs_inobp_check(mp, bp); | 192 | xfs_inobp_check(mp, bp); |
193 | |||
194 | /* | ||
195 | * Mark the buffer as an inode buffer now that it looks good | ||
196 | */ | ||
197 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | ||
198 | |||
199 | *bpp = bp; | 193 | *bpp = bp; |
200 | return 0; | 194 | return 0; |
201 | } | 195 | } |
@@ -1152,7 +1146,7 @@ xfs_ialloc( | |||
1152 | /* | 1146 | /* |
1153 | * Log the new values stuffed into the inode. | 1147 | * Log the new values stuffed into the inode. |
1154 | */ | 1148 | */ |
1155 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); | 1149 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
1156 | xfs_trans_log_inode(tp, ip, flags); | 1150 | xfs_trans_log_inode(tp, ip, flags); |
1157 | 1151 | ||
1158 | /* now that we have an i_mode we can setup inode ops and unlock */ | 1152 | /* now that we have an i_mode we can setup inode ops and unlock */ |
@@ -1187,6 +1181,7 @@ xfs_isize_check( | |||
1187 | xfs_fileoff_t map_first; | 1181 | xfs_fileoff_t map_first; |
1188 | int nimaps; | 1182 | int nimaps; |
1189 | xfs_bmbt_irec_t imaps[2]; | 1183 | xfs_bmbt_irec_t imaps[2]; |
1184 | int error; | ||
1190 | 1185 | ||
1191 | if (!S_ISREG(ip->i_d.di_mode)) | 1186 | if (!S_ISREG(ip->i_d.di_mode)) |
1192 | return; | 1187 | return; |
@@ -1203,13 +1198,12 @@ xfs_isize_check( | |||
1203 | * The filesystem could be shutting down, so bmapi may return | 1198 | * The filesystem could be shutting down, so bmapi may return |
1204 | * an error. | 1199 | * an error. |
1205 | */ | 1200 | */ |
1206 | if (xfs_bmapi(NULL, ip, map_first, | 1201 | error = xfs_bmapi_read(ip, map_first, |
1207 | (XFS_B_TO_FSB(mp, | 1202 | (XFS_B_TO_FSB(mp, |
1208 | (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - | 1203 | (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first), |
1209 | map_first), | 1204 | imaps, &nimaps, XFS_BMAPI_ENTIRE); |
1210 | XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, | 1205 | if (error) |
1211 | NULL)) | 1206 | return; |
1212 | return; | ||
1213 | ASSERT(nimaps == 1); | 1207 | ASSERT(nimaps == 1); |
1214 | ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); | 1208 | ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); |
1215 | } | 1209 | } |
@@ -1297,7 +1291,7 @@ xfs_itruncate_extents( | |||
1297 | */ | 1291 | */ |
1298 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 1292 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
1299 | if (committed) | 1293 | if (committed) |
1300 | xfs_trans_ijoin(tp, ip); | 1294 | xfs_trans_ijoin(tp, ip, 0); |
1301 | if (error) | 1295 | if (error) |
1302 | goto out_bmap_cancel; | 1296 | goto out_bmap_cancel; |
1303 | 1297 | ||
@@ -1313,7 +1307,7 @@ xfs_itruncate_extents( | |||
1313 | error = xfs_trans_commit(tp, 0); | 1307 | error = xfs_trans_commit(tp, 0); |
1314 | tp = ntp; | 1308 | tp = ntp; |
1315 | 1309 | ||
1316 | xfs_trans_ijoin(tp, ip); | 1310 | xfs_trans_ijoin(tp, ip, 0); |
1317 | 1311 | ||
1318 | if (error) | 1312 | if (error) |
1319 | goto out; | 1313 | goto out; |
@@ -1644,7 +1638,7 @@ xfs_iunlink_remove( | |||
1644 | * inodes that are in memory - they all must be marked stale and attached to | 1638 | * inodes that are in memory - they all must be marked stale and attached to |
1645 | * the cluster buffer. | 1639 | * the cluster buffer. |
1646 | */ | 1640 | */ |
1647 | STATIC void | 1641 | STATIC int |
1648 | xfs_ifree_cluster( | 1642 | xfs_ifree_cluster( |
1649 | xfs_inode_t *free_ip, | 1643 | xfs_inode_t *free_ip, |
1650 | xfs_trans_t *tp, | 1644 | xfs_trans_t *tp, |
@@ -1690,6 +1684,8 @@ xfs_ifree_cluster( | |||
1690 | mp->m_bsize * blks_per_cluster, | 1684 | mp->m_bsize * blks_per_cluster, |
1691 | XBF_LOCK); | 1685 | XBF_LOCK); |
1692 | 1686 | ||
1687 | if (!bp) | ||
1688 | return ENOMEM; | ||
1693 | /* | 1689 | /* |
1694 | * Walk the inodes already attached to the buffer and mark them | 1690 | * Walk the inodes already attached to the buffer and mark them |
1695 | * stale. These will all have the flush locks held, so an | 1691 | * stale. These will all have the flush locks held, so an |
@@ -1799,6 +1795,7 @@ retry: | |||
1799 | } | 1795 | } |
1800 | 1796 | ||
1801 | xfs_perag_put(pag); | 1797 | xfs_perag_put(pag); |
1798 | return 0; | ||
1802 | } | 1799 | } |
1803 | 1800 | ||
1804 | /* | 1801 | /* |
@@ -1878,10 +1875,10 @@ xfs_ifree( | |||
1878 | dip->di_mode = 0; | 1875 | dip->di_mode = 0; |
1879 | 1876 | ||
1880 | if (delete) { | 1877 | if (delete) { |
1881 | xfs_ifree_cluster(ip, tp, first_ino); | 1878 | error = xfs_ifree_cluster(ip, tp, first_ino); |
1882 | } | 1879 | } |
1883 | 1880 | ||
1884 | return 0; | 1881 | return error; |
1885 | } | 1882 | } |
1886 | 1883 | ||
1887 | /* | 1884 | /* |
@@ -2472,11 +2469,11 @@ cluster_corrupt_out: | |||
2472 | */ | 2469 | */ |
2473 | if (bp->b_iodone) { | 2470 | if (bp->b_iodone) { |
2474 | XFS_BUF_UNDONE(bp); | 2471 | XFS_BUF_UNDONE(bp); |
2475 | XFS_BUF_STALE(bp); | 2472 | xfs_buf_stale(bp); |
2476 | XFS_BUF_ERROR(bp,EIO); | 2473 | xfs_buf_ioerror(bp, EIO); |
2477 | xfs_buf_ioend(bp, 0); | 2474 | xfs_buf_ioend(bp, 0); |
2478 | } else { | 2475 | } else { |
2479 | XFS_BUF_STALE(bp); | 2476 | xfs_buf_stale(bp); |
2480 | xfs_buf_relse(bp); | 2477 | xfs_buf_relse(bp); |
2481 | } | 2478 | } |
2482 | } | 2479 | } |
@@ -2585,7 +2582,7 @@ xfs_iflush( | |||
2585 | * If the buffer is pinned then push on the log now so we won't | 2582 | * If the buffer is pinned then push on the log now so we won't |
2586 | * get stuck waiting in the write for too long. | 2583 | * get stuck waiting in the write for too long. |
2587 | */ | 2584 | */ |
2588 | if (XFS_BUF_ISPINNED(bp)) | 2585 | if (xfs_buf_ispinned(bp)) |
2589 | xfs_log_force(mp, 0); | 2586 | xfs_log_force(mp, 0); |
2590 | 2587 | ||
2591 | /* | 2588 | /* |
@@ -2597,9 +2594,11 @@ xfs_iflush( | |||
2597 | goto cluster_corrupt_out; | 2594 | goto cluster_corrupt_out; |
2598 | 2595 | ||
2599 | if (flags & SYNC_WAIT) | 2596 | if (flags & SYNC_WAIT) |
2600 | error = xfs_bwrite(mp, bp); | 2597 | error = xfs_bwrite(bp); |
2601 | else | 2598 | else |
2602 | xfs_bdwrite(mp, bp); | 2599 | xfs_buf_delwri_queue(bp); |
2600 | |||
2601 | xfs_buf_relse(bp); | ||
2603 | return error; | 2602 | return error; |
2604 | 2603 | ||
2605 | corrupt_out: | 2604 | corrupt_out: |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 2380a4bcbece..760140d1dd66 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -257,7 +257,6 @@ typedef struct xfs_inode { | |||
257 | 257 | ||
258 | xfs_fsize_t i_size; /* in-memory size */ | 258 | xfs_fsize_t i_size; /* in-memory size */ |
259 | xfs_fsize_t i_new_size; /* size when write completes */ | 259 | xfs_fsize_t i_new_size; /* size when write completes */ |
260 | atomic_t i_iocount; /* outstanding I/O count */ | ||
261 | 260 | ||
262 | /* VFS inode */ | 261 | /* VFS inode */ |
263 | struct inode i_vnode; /* embedded VFS inode */ | 262 | struct inode i_vnode; /* embedded VFS inode */ |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 588406dc6a35..b7cf21ba240f 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -658,10 +658,8 @@ xfs_inode_item_unlock( | |||
658 | 658 | ||
659 | lock_flags = iip->ili_lock_flags; | 659 | lock_flags = iip->ili_lock_flags; |
660 | iip->ili_lock_flags = 0; | 660 | iip->ili_lock_flags = 0; |
661 | if (lock_flags) { | 661 | if (lock_flags) |
662 | xfs_iunlock(ip, lock_flags); | 662 | xfs_iunlock(ip, lock_flags); |
663 | IRELE(ip); | ||
664 | } | ||
665 | } | 663 | } |
666 | 664 | ||
667 | /* | 665 | /* |
@@ -708,13 +706,14 @@ xfs_inode_item_committed( | |||
708 | * marked delayed write. If that's the case, we'll promote it and that will | 706 | * marked delayed write. If that's the case, we'll promote it and that will |
709 | * allow the caller to write the buffer by triggering the xfsbufd to run. | 707 | * allow the caller to write the buffer by triggering the xfsbufd to run. |
710 | */ | 708 | */ |
711 | STATIC void | 709 | STATIC bool |
712 | xfs_inode_item_pushbuf( | 710 | xfs_inode_item_pushbuf( |
713 | struct xfs_log_item *lip) | 711 | struct xfs_log_item *lip) |
714 | { | 712 | { |
715 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 713 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
716 | struct xfs_inode *ip = iip->ili_inode; | 714 | struct xfs_inode *ip = iip->ili_inode; |
717 | struct xfs_buf *bp; | 715 | struct xfs_buf *bp; |
716 | bool ret = true; | ||
718 | 717 | ||
719 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | 718 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); |
720 | 719 | ||
@@ -725,7 +724,7 @@ xfs_inode_item_pushbuf( | |||
725 | if (completion_done(&ip->i_flush) || | 724 | if (completion_done(&ip->i_flush) || |
726 | !(lip->li_flags & XFS_LI_IN_AIL)) { | 725 | !(lip->li_flags & XFS_LI_IN_AIL)) { |
727 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 726 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
728 | return; | 727 | return true; |
729 | } | 728 | } |
730 | 729 | ||
731 | bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, | 730 | bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, |
@@ -733,10 +732,13 @@ xfs_inode_item_pushbuf( | |||
733 | 732 | ||
734 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 733 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
735 | if (!bp) | 734 | if (!bp) |
736 | return; | 735 | return true; |
737 | if (XFS_BUF_ISDELAYWRITE(bp)) | 736 | if (XFS_BUF_ISDELAYWRITE(bp)) |
738 | xfs_buf_delwri_promote(bp); | 737 | xfs_buf_delwri_promote(bp); |
738 | if (xfs_buf_ispinned(bp)) | ||
739 | ret = false; | ||
739 | xfs_buf_relse(bp); | 740 | xfs_buf_relse(bp); |
741 | return ret; | ||
740 | } | 742 | } |
741 | 743 | ||
742 | /* | 744 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index f7ce7debe14c..d99a90518909 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -1069,7 +1069,7 @@ xfs_ioctl_setattr( | |||
1069 | } | 1069 | } |
1070 | } | 1070 | } |
1071 | 1071 | ||
1072 | xfs_trans_ijoin(tp, ip); | 1072 | xfs_trans_ijoin(tp, ip, 0); |
1073 | 1073 | ||
1074 | /* | 1074 | /* |
1075 | * Change file ownership. Must be the owner or privileged. | 1075 | * Change file ownership. Must be the owner or privileged. |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index d56173b34a2a..d56173b34a2a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 54e623bfbb85..54e623bfbb85 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index 80f4060e8970..80f4060e8970 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h | |||
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 091d82b94c4d..9afa282aa937 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -208,22 +208,20 @@ xfs_iomap_write_direct( | |||
208 | if (error) | 208 | if (error) |
209 | goto error1; | 209 | goto error1; |
210 | 210 | ||
211 | xfs_trans_ijoin(tp, ip); | 211 | xfs_trans_ijoin(tp, ip, 0); |
212 | 212 | ||
213 | bmapi_flag = XFS_BMAPI_WRITE; | 213 | bmapi_flag = 0; |
214 | if (offset < ip->i_size || extsz) | 214 | if (offset < ip->i_size || extsz) |
215 | bmapi_flag |= XFS_BMAPI_PREALLOC; | 215 | bmapi_flag |= XFS_BMAPI_PREALLOC; |
216 | 216 | ||
217 | /* | 217 | /* |
218 | * Issue the xfs_bmapi() call to allocate the blocks. | ||
219 | * | ||
220 | * From this point onwards we overwrite the imap pointer that the | 218 | * From this point onwards we overwrite the imap pointer that the |
221 | * caller gave to us. | 219 | * caller gave to us. |
222 | */ | 220 | */ |
223 | xfs_bmap_init(&free_list, &firstfsb); | 221 | xfs_bmap_init(&free_list, &firstfsb); |
224 | nimaps = 1; | 222 | nimaps = 1; |
225 | error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, | 223 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, |
226 | &firstfsb, 0, imap, &nimaps, &free_list); | 224 | &firstfsb, 0, imap, &nimaps, &free_list); |
227 | if (error) | 225 | if (error) |
228 | goto error0; | 226 | goto error0; |
229 | 227 | ||
@@ -300,8 +298,8 @@ xfs_iomap_eof_want_preallocate( | |||
300 | while (count_fsb > 0) { | 298 | while (count_fsb > 0) { |
301 | imaps = nimaps; | 299 | imaps = nimaps; |
302 | firstblock = NULLFSBLOCK; | 300 | firstblock = NULLFSBLOCK; |
303 | error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0, | 301 | error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps, |
304 | &firstblock, 0, imap, &imaps, NULL); | 302 | 0); |
305 | if (error) | 303 | if (error) |
306 | return error; | 304 | return error; |
307 | for (n = 0; n < imaps; n++) { | 305 | for (n = 0; n < imaps; n++) { |
@@ -381,7 +379,6 @@ xfs_iomap_write_delay( | |||
381 | xfs_fileoff_t last_fsb; | 379 | xfs_fileoff_t last_fsb; |
382 | xfs_off_t aligned_offset; | 380 | xfs_off_t aligned_offset; |
383 | xfs_fileoff_t ioalign; | 381 | xfs_fileoff_t ioalign; |
384 | xfs_fsblock_t firstblock; | ||
385 | xfs_extlen_t extsz; | 382 | xfs_extlen_t extsz; |
386 | int nimaps; | 383 | int nimaps; |
387 | xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; | 384 | xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; |
@@ -425,12 +422,8 @@ retry: | |||
425 | } | 422 | } |
426 | 423 | ||
427 | nimaps = XFS_WRITE_IMAPS; | 424 | nimaps = XFS_WRITE_IMAPS; |
428 | firstblock = NULLFSBLOCK; | 425 | error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb, |
429 | error = xfs_bmapi(NULL, ip, offset_fsb, | 426 | imap, &nimaps, XFS_BMAPI_ENTIRE); |
430 | (xfs_filblks_t)(last_fsb - offset_fsb), | ||
431 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | | ||
432 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, | ||
433 | &nimaps, NULL); | ||
434 | switch (error) { | 427 | switch (error) { |
435 | case 0: | 428 | case 0: |
436 | case ENOSPC: | 429 | case ENOSPC: |
@@ -535,7 +528,7 @@ xfs_iomap_write_allocate( | |||
535 | return XFS_ERROR(error); | 528 | return XFS_ERROR(error); |
536 | } | 529 | } |
537 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 530 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
538 | xfs_trans_ijoin(tp, ip); | 531 | xfs_trans_ijoin(tp, ip, 0); |
539 | 532 | ||
540 | xfs_bmap_init(&free_list, &first_block); | 533 | xfs_bmap_init(&free_list, &first_block); |
541 | 534 | ||
@@ -587,14 +580,12 @@ xfs_iomap_write_allocate( | |||
587 | } | 580 | } |
588 | 581 | ||
589 | /* | 582 | /* |
590 | * Go get the actual blocks. | ||
591 | * | ||
592 | * From this point onwards we overwrite the imap | 583 | * From this point onwards we overwrite the imap |
593 | * pointer that the caller gave to us. | 584 | * pointer that the caller gave to us. |
594 | */ | 585 | */ |
595 | error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, | 586 | error = xfs_bmapi_write(tp, ip, map_start_fsb, |
596 | XFS_BMAPI_WRITE, &first_block, 1, | 587 | count_fsb, 0, &first_block, 1, |
597 | imap, &nimaps, &free_list); | 588 | imap, &nimaps, &free_list); |
598 | if (error) | 589 | if (error) |
599 | goto trans_cancel; | 590 | goto trans_cancel; |
600 | 591 | ||
@@ -701,15 +692,15 @@ xfs_iomap_write_unwritten( | |||
701 | } | 692 | } |
702 | 693 | ||
703 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 694 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
704 | xfs_trans_ijoin(tp, ip); | 695 | xfs_trans_ijoin(tp, ip, 0); |
705 | 696 | ||
706 | /* | 697 | /* |
707 | * Modify the unwritten extent state of the buffer. | 698 | * Modify the unwritten extent state of the buffer. |
708 | */ | 699 | */ |
709 | xfs_bmap_init(&free_list, &firstfsb); | 700 | xfs_bmap_init(&free_list, &firstfsb); |
710 | nimaps = 1; | 701 | nimaps = 1; |
711 | error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, | 702 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, |
712 | XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, | 703 | XFS_BMAPI_CONVERT, &firstfsb, |
713 | 1, &imap, &nimaps, &free_list); | 704 | 1, &imap, &nimaps, &free_list); |
714 | if (error) | 705 | if (error) |
715 | goto error_on_bmapi_transaction; | 706 | goto error_on_bmapi_transaction; |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/xfs_iops.c index b9c172b3fbbe..9ba2a07b7343 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -70,9 +70,8 @@ xfs_synchronize_times( | |||
70 | } | 70 | } |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * If the linux inode is valid, mark it dirty. | 73 | * If the linux inode is valid, mark it dirty, else mark the dirty state |
74 | * Used when committing a dirty inode into a transaction so that | 74 | * in the XFS inode to make sure we pick it up when reclaiming the inode. |
75 | * the inode will get written back by the linux code | ||
76 | */ | 75 | */ |
77 | void | 76 | void |
78 | xfs_mark_inode_dirty_sync( | 77 | xfs_mark_inode_dirty_sync( |
@@ -82,6 +81,10 @@ xfs_mark_inode_dirty_sync( | |||
82 | 81 | ||
83 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) | 82 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) |
84 | mark_inode_dirty_sync(inode); | 83 | mark_inode_dirty_sync(inode); |
84 | else { | ||
85 | barrier(); | ||
86 | ip->i_update_core = 1; | ||
87 | } | ||
85 | } | 88 | } |
86 | 89 | ||
87 | void | 90 | void |
@@ -92,6 +95,28 @@ xfs_mark_inode_dirty( | |||
92 | 95 | ||
93 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) | 96 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) |
94 | mark_inode_dirty(inode); | 97 | mark_inode_dirty(inode); |
98 | else { | ||
99 | barrier(); | ||
100 | ip->i_update_core = 1; | ||
101 | } | ||
102 | |||
103 | } | ||
104 | |||
105 | |||
106 | int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, | ||
107 | void *fs_info) | ||
108 | { | ||
109 | const struct xattr *xattr; | ||
110 | struct xfs_inode *ip = XFS_I(inode); | ||
111 | int error = 0; | ||
112 | |||
113 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { | ||
114 | error = xfs_attr_set(ip, xattr->name, xattr->value, | ||
115 | xattr->value_len, ATTR_SECURE); | ||
116 | if (error < 0) | ||
117 | break; | ||
118 | } | ||
119 | return error; | ||
95 | } | 120 | } |
96 | 121 | ||
97 | /* | 122 | /* |
@@ -100,31 +125,15 @@ xfs_mark_inode_dirty( | |||
100 | * these attrs can be journalled at inode creation time (along with the | 125 | * these attrs can be journalled at inode creation time (along with the |
101 | * inode, of course, such that log replay can't cause these to be lost). | 126 | * inode, of course, such that log replay can't cause these to be lost). |
102 | */ | 127 | */ |
128 | |||
103 | STATIC int | 129 | STATIC int |
104 | xfs_init_security( | 130 | xfs_init_security( |
105 | struct inode *inode, | 131 | struct inode *inode, |
106 | struct inode *dir, | 132 | struct inode *dir, |
107 | const struct qstr *qstr) | 133 | const struct qstr *qstr) |
108 | { | 134 | { |
109 | struct xfs_inode *ip = XFS_I(inode); | 135 | return security_inode_init_security(inode, dir, qstr, |
110 | size_t length; | 136 | &xfs_initxattrs, NULL); |
111 | void *value; | ||
112 | unsigned char *name; | ||
113 | int error; | ||
114 | |||
115 | error = security_inode_init_security(inode, dir, qstr, (char **)&name, | ||
116 | &value, &length); | ||
117 | if (error) { | ||
118 | if (error == -EOPNOTSUPP) | ||
119 | return 0; | ||
120 | return -error; | ||
121 | } | ||
122 | |||
123 | error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); | ||
124 | |||
125 | kfree(name); | ||
126 | kfree(value); | ||
127 | return error; | ||
128 | } | 137 | } |
129 | 138 | ||
130 | static void | 139 | static void |
@@ -457,7 +466,7 @@ xfs_vn_getattr( | |||
457 | trace_xfs_getattr(ip); | 466 | trace_xfs_getattr(ip); |
458 | 467 | ||
459 | if (XFS_FORCED_SHUTDOWN(mp)) | 468 | if (XFS_FORCED_SHUTDOWN(mp)) |
460 | return XFS_ERROR(EIO); | 469 | return -XFS_ERROR(EIO); |
461 | 470 | ||
462 | stat->size = XFS_ISIZE(ip); | 471 | stat->size = XFS_ISIZE(ip); |
463 | stat->dev = inode->i_sb->s_dev; | 472 | stat->dev = inode->i_sb->s_dev; |
@@ -603,7 +612,7 @@ xfs_setattr_nonsize( | |||
603 | } | 612 | } |
604 | } | 613 | } |
605 | 614 | ||
606 | xfs_trans_ijoin(tp, ip); | 615 | xfs_trans_ijoin(tp, ip, 0); |
607 | 616 | ||
608 | /* | 617 | /* |
609 | * Change file ownership. Must be the owner or privileged. | 618 | * Change file ownership. Must be the owner or privileged. |
@@ -825,16 +834,16 @@ xfs_setattr_size( | |||
825 | * care about here. | 834 | * care about here. |
826 | */ | 835 | */ |
827 | if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { | 836 | if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { |
828 | error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, | 837 | error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, 0, |
829 | XBF_ASYNC, FI_NONE); | 838 | FI_NONE); |
830 | if (error) | 839 | if (error) |
831 | goto out_unlock; | 840 | goto out_unlock; |
832 | } | 841 | } |
833 | 842 | ||
834 | /* | 843 | /* |
835 | * Wait for all I/O to complete. | 844 | * Wait for all direct I/O to complete. |
836 | */ | 845 | */ |
837 | xfs_ioend_wait(ip); | 846 | inode_dio_wait(inode); |
838 | 847 | ||
839 | error = -block_truncate_page(inode->i_mapping, iattr->ia_size, | 848 | error = -block_truncate_page(inode->i_mapping, iattr->ia_size, |
840 | xfs_get_blocks); | 849 | xfs_get_blocks); |
@@ -855,7 +864,7 @@ xfs_setattr_size( | |||
855 | 864 | ||
856 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 865 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
857 | 866 | ||
858 | xfs_trans_ijoin(tp, ip); | 867 | xfs_trans_ijoin(tp, ip, 0); |
859 | 868 | ||
860 | /* | 869 | /* |
861 | * Only change the c/mtime if we are changing the size or we are | 870 | * Only change the c/mtime if we are changing the size or we are |
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/xfs_iops.h index ef41c92ce66e..ef41c92ce66e 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/xfs_iops.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/xfs_linux.h index d42f814e4d35..828662f70d64 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/xfs_linux.h | |||
@@ -32,13 +32,12 @@ | |||
32 | # define XFS_BIG_INUMS 0 | 32 | # define XFS_BIG_INUMS 0 |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | #include <xfs_types.h> | 35 | #include "xfs_types.h" |
36 | 36 | ||
37 | #include <kmem.h> | 37 | #include "kmem.h" |
38 | #include <mrlock.h> | 38 | #include "mrlock.h" |
39 | #include <time.h> | 39 | #include "time.h" |
40 | 40 | #include "uuid.h" | |
41 | #include <support/uuid.h> | ||
42 | 41 | ||
43 | #include <linux/semaphore.h> | 42 | #include <linux/semaphore.h> |
44 | #include <linux/mm.h> | 43 | #include <linux/mm.h> |
@@ -69,6 +68,8 @@ | |||
69 | #include <linux/ctype.h> | 68 | #include <linux/ctype.h> |
70 | #include <linux/writeback.h> | 69 | #include <linux/writeback.h> |
71 | #include <linux/capability.h> | 70 | #include <linux/capability.h> |
71 | #include <linux/kthread.h> | ||
72 | #include <linux/freezer.h> | ||
72 | #include <linux/list_sort.h> | 73 | #include <linux/list_sort.h> |
73 | 74 | ||
74 | #include <asm/page.h> | 75 | #include <asm/page.h> |
@@ -78,14 +79,14 @@ | |||
78 | #include <asm/byteorder.h> | 79 | #include <asm/byteorder.h> |
79 | #include <asm/unaligned.h> | 80 | #include <asm/unaligned.h> |
80 | 81 | ||
81 | #include <xfs_vnode.h> | 82 | #include "xfs_vnode.h" |
82 | #include <xfs_stats.h> | 83 | #include "xfs_stats.h" |
83 | #include <xfs_sysctl.h> | 84 | #include "xfs_sysctl.h" |
84 | #include <xfs_iops.h> | 85 | #include "xfs_iops.h" |
85 | #include <xfs_aops.h> | 86 | #include "xfs_aops.h" |
86 | #include <xfs_super.h> | 87 | #include "xfs_super.h" |
87 | #include <xfs_buf.h> | 88 | #include "xfs_buf.h" |
88 | #include <xfs_message.h> | 89 | #include "xfs_message.h" |
89 | 90 | ||
90 | #ifdef __BIG_ENDIAN | 91 | #ifdef __BIG_ENDIAN |
91 | #define XFS_NATIVE_HOST 1 | 92 | #define XFS_NATIVE_HOST 1 |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 06ff8437ed8e..2758a6277c52 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -878,10 +878,10 @@ xlog_iodone(xfs_buf_t *bp) | |||
878 | /* | 878 | /* |
879 | * Race to shutdown the filesystem if we see an error. | 879 | * Race to shutdown the filesystem if we see an error. |
880 | */ | 880 | */ |
881 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, | 881 | if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp, |
882 | XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { | 882 | XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { |
883 | xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp)); | 883 | xfs_buf_ioerror_alert(bp, __func__); |
884 | XFS_BUF_STALE(bp); | 884 | xfs_buf_stale(bp); |
885 | xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); | 885 | xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); |
886 | /* | 886 | /* |
887 | * This flag will be propagated to the trans-committed | 887 | * This flag will be propagated to the trans-committed |
@@ -1047,11 +1047,10 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1047 | xlog_get_iclog_buffer_size(mp, log); | 1047 | xlog_get_iclog_buffer_size(mp, log); |
1048 | 1048 | ||
1049 | error = ENOMEM; | 1049 | error = ENOMEM; |
1050 | bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); | 1050 | bp = xfs_buf_alloc(mp->m_logdev_targp, 0, log->l_iclog_size, 0); |
1051 | if (!bp) | 1051 | if (!bp) |
1052 | goto out_free_log; | 1052 | goto out_free_log; |
1053 | bp->b_iodone = xlog_iodone; | 1053 | bp->b_iodone = xlog_iodone; |
1054 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
1055 | ASSERT(xfs_buf_islocked(bp)); | 1054 | ASSERT(xfs_buf_islocked(bp)); |
1056 | log->l_xbuf = bp; | 1055 | log->l_xbuf = bp; |
1057 | 1056 | ||
@@ -1108,7 +1107,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1108 | iclog->ic_callback_tail = &(iclog->ic_callback); | 1107 | iclog->ic_callback_tail = &(iclog->ic_callback); |
1109 | iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; | 1108 | iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; |
1110 | 1109 | ||
1111 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); | ||
1112 | ASSERT(xfs_buf_islocked(iclog->ic_bp)); | 1110 | ASSERT(xfs_buf_islocked(iclog->ic_bp)); |
1113 | init_waitqueue_head(&iclog->ic_force_wait); | 1111 | init_waitqueue_head(&iclog->ic_force_wait); |
1114 | init_waitqueue_head(&iclog->ic_write_wait); | 1112 | init_waitqueue_head(&iclog->ic_write_wait); |
@@ -1248,8 +1246,8 @@ xlog_bdstrat( | |||
1248 | struct xlog_in_core *iclog = bp->b_fspriv; | 1246 | struct xlog_in_core *iclog = bp->b_fspriv; |
1249 | 1247 | ||
1250 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 1248 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
1251 | XFS_BUF_ERROR(bp, EIO); | 1249 | xfs_buf_ioerror(bp, EIO); |
1252 | XFS_BUF_STALE(bp); | 1250 | xfs_buf_stale(bp); |
1253 | xfs_buf_ioend(bp, 0); | 1251 | xfs_buf_ioend(bp, 0); |
1254 | /* | 1252 | /* |
1255 | * It would seem logical to return EIO here, but we rely on | 1253 | * It would seem logical to return EIO here, but we rely on |
@@ -1355,7 +1353,6 @@ xlog_sync(xlog_t *log, | |||
1355 | XFS_BUF_SET_COUNT(bp, count); | 1353 | XFS_BUF_SET_COUNT(bp, count); |
1356 | bp->b_fspriv = iclog; | 1354 | bp->b_fspriv = iclog; |
1357 | XFS_BUF_ZEROFLAGS(bp); | 1355 | XFS_BUF_ZEROFLAGS(bp); |
1358 | XFS_BUF_BUSY(bp); | ||
1359 | XFS_BUF_ASYNC(bp); | 1356 | XFS_BUF_ASYNC(bp); |
1360 | bp->b_flags |= XBF_SYNCIO; | 1357 | bp->b_flags |= XBF_SYNCIO; |
1361 | 1358 | ||
@@ -1390,24 +1387,23 @@ xlog_sync(xlog_t *log, | |||
1390 | */ | 1387 | */ |
1391 | XFS_BUF_WRITE(bp); | 1388 | XFS_BUF_WRITE(bp); |
1392 | 1389 | ||
1393 | if ((error = xlog_bdstrat(bp))) { | 1390 | error = xlog_bdstrat(bp); |
1394 | xfs_ioerror_alert("xlog_sync", log->l_mp, bp, | 1391 | if (error) { |
1395 | XFS_BUF_ADDR(bp)); | 1392 | xfs_buf_ioerror_alert(bp, "xlog_sync"); |
1396 | return error; | 1393 | return error; |
1397 | } | 1394 | } |
1398 | if (split) { | 1395 | if (split) { |
1399 | bp = iclog->ic_log->l_xbuf; | 1396 | bp = iclog->ic_log->l_xbuf; |
1400 | XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ | 1397 | XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ |
1401 | XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ | 1398 | xfs_buf_associate_memory(bp, |
1402 | (__psint_t)count), split); | 1399 | (char *)&iclog->ic_header + count, split); |
1403 | bp->b_fspriv = iclog; | 1400 | bp->b_fspriv = iclog; |
1404 | XFS_BUF_ZEROFLAGS(bp); | 1401 | XFS_BUF_ZEROFLAGS(bp); |
1405 | XFS_BUF_BUSY(bp); | ||
1406 | XFS_BUF_ASYNC(bp); | 1402 | XFS_BUF_ASYNC(bp); |
1407 | bp->b_flags |= XBF_SYNCIO; | 1403 | bp->b_flags |= XBF_SYNCIO; |
1408 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) | 1404 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) |
1409 | bp->b_flags |= XBF_FUA; | 1405 | bp->b_flags |= XBF_FUA; |
1410 | dptr = XFS_BUF_PTR(bp); | 1406 | dptr = bp->b_addr; |
1411 | /* | 1407 | /* |
1412 | * Bump the cycle numbers at the start of each block | 1408 | * Bump the cycle numbers at the start of each block |
1413 | * since this part of the buffer is at the start of | 1409 | * since this part of the buffer is at the start of |
@@ -1427,9 +1423,9 @@ xlog_sync(xlog_t *log, | |||
1427 | /* account for internal log which doesn't start at block #0 */ | 1423 | /* account for internal log which doesn't start at block #0 */ |
1428 | XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); | 1424 | XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); |
1429 | XFS_BUF_WRITE(bp); | 1425 | XFS_BUF_WRITE(bp); |
1430 | if ((error = xlog_bdstrat(bp))) { | 1426 | error = xlog_bdstrat(bp); |
1431 | xfs_ioerror_alert("xlog_sync (split)", log->l_mp, | 1427 | if (error) { |
1432 | bp, XFS_BUF_ADDR(bp)); | 1428 | xfs_buf_ioerror_alert(bp, "xlog_sync (split)"); |
1433 | return error; | 1429 | return error; |
1434 | } | 1430 | } |
1435 | } | 1431 | } |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 052a2c0ec5fb..541a508adea1 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -147,7 +147,7 @@ xlog_align( | |||
147 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); | 147 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); |
148 | 148 | ||
149 | ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); | 149 | ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); |
150 | return XFS_BUF_PTR(bp) + BBTOB(offset); | 150 | return bp->b_addr + BBTOB(offset); |
151 | } | 151 | } |
152 | 152 | ||
153 | 153 | ||
@@ -178,15 +178,12 @@ xlog_bread_noalign( | |||
178 | 178 | ||
179 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 179 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
180 | XFS_BUF_READ(bp); | 180 | XFS_BUF_READ(bp); |
181 | XFS_BUF_BUSY(bp); | ||
182 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 181 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); |
183 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | ||
184 | 182 | ||
185 | xfsbdstrat(log->l_mp, bp); | 183 | xfsbdstrat(log->l_mp, bp); |
186 | error = xfs_buf_iowait(bp); | 184 | error = xfs_buf_iowait(bp); |
187 | if (error) | 185 | if (error) |
188 | xfs_ioerror_alert("xlog_bread", log->l_mp, | 186 | xfs_buf_ioerror_alert(bp, __func__); |
189 | bp, XFS_BUF_ADDR(bp)); | ||
190 | return error; | 187 | return error; |
191 | } | 188 | } |
192 | 189 | ||
@@ -220,18 +217,18 @@ xlog_bread_offset( | |||
220 | xfs_buf_t *bp, | 217 | xfs_buf_t *bp, |
221 | xfs_caddr_t offset) | 218 | xfs_caddr_t offset) |
222 | { | 219 | { |
223 | xfs_caddr_t orig_offset = XFS_BUF_PTR(bp); | 220 | xfs_caddr_t orig_offset = bp->b_addr; |
224 | int orig_len = bp->b_buffer_length; | 221 | int orig_len = bp->b_buffer_length; |
225 | int error, error2; | 222 | int error, error2; |
226 | 223 | ||
227 | error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks)); | 224 | error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); |
228 | if (error) | 225 | if (error) |
229 | return error; | 226 | return error; |
230 | 227 | ||
231 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); | 228 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); |
232 | 229 | ||
233 | /* must reset buffer pointer even on error */ | 230 | /* must reset buffer pointer even on error */ |
234 | error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len); | 231 | error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); |
235 | if (error) | 232 | if (error) |
236 | return error; | 233 | return error; |
237 | return error2; | 234 | return error2; |
@@ -266,15 +263,14 @@ xlog_bwrite( | |||
266 | 263 | ||
267 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 264 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
268 | XFS_BUF_ZEROFLAGS(bp); | 265 | XFS_BUF_ZEROFLAGS(bp); |
269 | XFS_BUF_BUSY(bp); | 266 | xfs_buf_hold(bp); |
270 | XFS_BUF_HOLD(bp); | ||
271 | xfs_buf_lock(bp); | 267 | xfs_buf_lock(bp); |
272 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 268 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); |
273 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | ||
274 | 269 | ||
275 | if ((error = xfs_bwrite(log->l_mp, bp))) | 270 | error = xfs_bwrite(bp); |
276 | xfs_ioerror_alert("xlog_bwrite", log->l_mp, | 271 | if (error) |
277 | bp, XFS_BUF_ADDR(bp)); | 272 | xfs_buf_ioerror_alert(bp, __func__); |
273 | xfs_buf_relse(bp); | ||
278 | return error; | 274 | return error; |
279 | } | 275 | } |
280 | 276 | ||
@@ -360,14 +356,12 @@ STATIC void | |||
360 | xlog_recover_iodone( | 356 | xlog_recover_iodone( |
361 | struct xfs_buf *bp) | 357 | struct xfs_buf *bp) |
362 | { | 358 | { |
363 | if (XFS_BUF_GETERROR(bp)) { | 359 | if (bp->b_error) { |
364 | /* | 360 | /* |
365 | * We're not going to bother about retrying | 361 | * We're not going to bother about retrying |
366 | * this during recovery. One strike! | 362 | * this during recovery. One strike! |
367 | */ | 363 | */ |
368 | xfs_ioerror_alert("xlog_recover_iodone", | 364 | xfs_buf_ioerror_alert(bp, __func__); |
369 | bp->b_target->bt_mount, bp, | ||
370 | XFS_BUF_ADDR(bp)); | ||
371 | xfs_force_shutdown(bp->b_target->bt_mount, | 365 | xfs_force_shutdown(bp->b_target->bt_mount, |
372 | SHUTDOWN_META_IO_ERROR); | 366 | SHUTDOWN_META_IO_ERROR); |
373 | } | 367 | } |
@@ -1262,7 +1256,7 @@ xlog_write_log_records( | |||
1262 | */ | 1256 | */ |
1263 | ealign = round_down(end_block, sectbb); | 1257 | ealign = round_down(end_block, sectbb); |
1264 | if (j == 0 && (start_block + endcount > ealign)) { | 1258 | if (j == 0 && (start_block + endcount > ealign)) { |
1265 | offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block); | 1259 | offset = bp->b_addr + BBTOB(ealign - start_block); |
1266 | error = xlog_bread_offset(log, ealign, sectbb, | 1260 | error = xlog_bread_offset(log, ealign, sectbb, |
1267 | bp, offset); | 1261 | bp, offset); |
1268 | if (error) | 1262 | if (error) |
@@ -2135,15 +2129,15 @@ xlog_recover_buffer_pass2( | |||
2135 | 2129 | ||
2136 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, | 2130 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
2137 | buf_flags); | 2131 | buf_flags); |
2138 | if (XFS_BUF_ISERROR(bp)) { | 2132 | if (!bp) |
2139 | xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, | 2133 | return XFS_ERROR(ENOMEM); |
2140 | bp, buf_f->blf_blkno); | 2134 | error = bp->b_error; |
2141 | error = XFS_BUF_GETERROR(bp); | 2135 | if (error) { |
2136 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); | ||
2142 | xfs_buf_relse(bp); | 2137 | xfs_buf_relse(bp); |
2143 | return error; | 2138 | return error; |
2144 | } | 2139 | } |
2145 | 2140 | ||
2146 | error = 0; | ||
2147 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { | 2141 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
2148 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2142 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2149 | } else if (buf_f->blf_flags & | 2143 | } else if (buf_f->blf_flags & |
@@ -2174,15 +2168,16 @@ xlog_recover_buffer_pass2( | |||
2174 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && | 2168 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && |
2175 | (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, | 2169 | (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, |
2176 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { | 2170 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { |
2177 | XFS_BUF_STALE(bp); | 2171 | xfs_buf_stale(bp); |
2178 | error = xfs_bwrite(mp, bp); | 2172 | error = xfs_bwrite(bp); |
2179 | } else { | 2173 | } else { |
2180 | ASSERT(bp->b_target->bt_mount == mp); | 2174 | ASSERT(bp->b_target->bt_mount == mp); |
2181 | bp->b_iodone = xlog_recover_iodone; | 2175 | bp->b_iodone = xlog_recover_iodone; |
2182 | xfs_bdwrite(mp, bp); | 2176 | xfs_buf_delwri_queue(bp); |
2183 | } | 2177 | } |
2184 | 2178 | ||
2185 | return (error); | 2179 | xfs_buf_relse(bp); |
2180 | return error; | ||
2186 | } | 2181 | } |
2187 | 2182 | ||
2188 | STATIC int | 2183 | STATIC int |
@@ -2227,14 +2222,16 @@ xlog_recover_inode_pass2( | |||
2227 | 2222 | ||
2228 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, | 2223 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, |
2229 | XBF_LOCK); | 2224 | XBF_LOCK); |
2230 | if (XFS_BUF_ISERROR(bp)) { | 2225 | if (!bp) { |
2231 | xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, | 2226 | error = ENOMEM; |
2232 | bp, in_f->ilf_blkno); | 2227 | goto error; |
2233 | error = XFS_BUF_GETERROR(bp); | 2228 | } |
2229 | error = bp->b_error; | ||
2230 | if (error) { | ||
2231 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); | ||
2234 | xfs_buf_relse(bp); | 2232 | xfs_buf_relse(bp); |
2235 | goto error; | 2233 | goto error; |
2236 | } | 2234 | } |
2237 | error = 0; | ||
2238 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); | 2235 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); |
2239 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); | 2236 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); |
2240 | 2237 | ||
@@ -2439,7 +2436,8 @@ xlog_recover_inode_pass2( | |||
2439 | write_inode_buffer: | 2436 | write_inode_buffer: |
2440 | ASSERT(bp->b_target->bt_mount == mp); | 2437 | ASSERT(bp->b_target->bt_mount == mp); |
2441 | bp->b_iodone = xlog_recover_iodone; | 2438 | bp->b_iodone = xlog_recover_iodone; |
2442 | xfs_bdwrite(mp, bp); | 2439 | xfs_buf_delwri_queue(bp); |
2440 | xfs_buf_relse(bp); | ||
2443 | error: | 2441 | error: |
2444 | if (need_free) | 2442 | if (need_free) |
2445 | kmem_free(in_f); | 2443 | kmem_free(in_f); |
@@ -2537,8 +2535,7 @@ xlog_recover_dquot_pass2( | |||
2537 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), | 2535 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), |
2538 | 0, &bp); | 2536 | 0, &bp); |
2539 | if (error) { | 2537 | if (error) { |
2540 | xfs_ioerror_alert("xlog_recover_do..(read#3)", mp, | 2538 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#3)"); |
2541 | bp, dq_f->qlf_blkno); | ||
2542 | return error; | 2539 | return error; |
2543 | } | 2540 | } |
2544 | ASSERT(bp); | 2541 | ASSERT(bp); |
@@ -2561,7 +2558,8 @@ xlog_recover_dquot_pass2( | |||
2561 | ASSERT(dq_f->qlf_size == 2); | 2558 | ASSERT(dq_f->qlf_size == 2); |
2562 | ASSERT(bp->b_target->bt_mount == mp); | 2559 | ASSERT(bp->b_target->bt_mount == mp); |
2563 | bp->b_iodone = xlog_recover_iodone; | 2560 | bp->b_iodone = xlog_recover_iodone; |
2564 | xfs_bdwrite(mp, bp); | 2561 | xfs_buf_delwri_queue(bp); |
2562 | xfs_buf_relse(bp); | ||
2565 | 2563 | ||
2566 | return (0); | 2564 | return (0); |
2567 | } | 2565 | } |
@@ -3437,7 +3435,7 @@ xlog_do_recovery_pass( | |||
3437 | /* | 3435 | /* |
3438 | * Check for header wrapping around physical end-of-log | 3436 | * Check for header wrapping around physical end-of-log |
3439 | */ | 3437 | */ |
3440 | offset = XFS_BUF_PTR(hbp); | 3438 | offset = hbp->b_addr; |
3441 | split_hblks = 0; | 3439 | split_hblks = 0; |
3442 | wrapped_hblks = 0; | 3440 | wrapped_hblks = 0; |
3443 | if (blk_no + hblks <= log->l_logBBsize) { | 3441 | if (blk_no + hblks <= log->l_logBBsize) { |
@@ -3497,7 +3495,7 @@ xlog_do_recovery_pass( | |||
3497 | } else { | 3495 | } else { |
3498 | /* This log record is split across the | 3496 | /* This log record is split across the |
3499 | * physical end of log */ | 3497 | * physical end of log */ |
3500 | offset = XFS_BUF_PTR(dbp); | 3498 | offset = dbp->b_addr; |
3501 | split_bblks = 0; | 3499 | split_bblks = 0; |
3502 | if (blk_no != log->l_logBBsize) { | 3500 | if (blk_no != log->l_logBBsize) { |
3503 | /* some data is before the physical | 3501 | /* some data is before the physical |
@@ -3656,7 +3654,7 @@ xlog_do_recover( | |||
3656 | return error; | 3654 | return error; |
3657 | } | 3655 | } |
3658 | 3656 | ||
3659 | XFS_bflush(log->l_mp->m_ddev_targp); | 3657 | xfs_flush_buftarg(log->l_mp->m_ddev_targp, 1); |
3660 | 3658 | ||
3661 | /* | 3659 | /* |
3662 | * If IO errors happened during recovery, bail out. | 3660 | * If IO errors happened during recovery, bail out. |
@@ -3689,8 +3687,7 @@ xlog_do_recover( | |||
3689 | xfsbdstrat(log->l_mp, bp); | 3687 | xfsbdstrat(log->l_mp, bp); |
3690 | error = xfs_buf_iowait(bp); | 3688 | error = xfs_buf_iowait(bp); |
3691 | if (error) { | 3689 | if (error) { |
3692 | xfs_ioerror_alert("xlog_do_recover", | 3690 | xfs_buf_ioerror_alert(bp, __func__); |
3693 | log->l_mp, bp, XFS_BUF_ADDR(bp)); | ||
3694 | ASSERT(0); | 3691 | ASSERT(0); |
3695 | xfs_buf_relse(bp); | 3692 | xfs_buf_relse(bp); |
3696 | return error; | 3693 | return error; |
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/xfs_message.c index bd672def95ac..bd672def95ac 100644 --- a/fs/xfs/linux-2.6/xfs_message.c +++ b/fs/xfs/xfs_message.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/xfs_message.h index 7fb7ea007672..7fb7ea007672 100644 --- a/fs/xfs/linux-2.6/xfs_message.h +++ b/fs/xfs/xfs_message.h | |||
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 092e16ae4d9d..d06afbc3540d 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -44,9 +44,6 @@ | |||
44 | #include "xfs_trace.h" | 44 | #include "xfs_trace.h" |
45 | 45 | ||
46 | 46 | ||
47 | STATIC void xfs_unmountfs_wait(xfs_mount_t *); | ||
48 | |||
49 | |||
50 | #ifdef HAVE_PERCPU_SB | 47 | #ifdef HAVE_PERCPU_SB |
51 | STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, | 48 | STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, |
52 | int); | 49 | int); |
@@ -1484,7 +1481,7 @@ xfs_unmountfs( | |||
1484 | * state as much as possible. | 1481 | * state as much as possible. |
1485 | */ | 1482 | */ |
1486 | xfs_reclaim_inodes(mp, 0); | 1483 | xfs_reclaim_inodes(mp, 0); |
1487 | XFS_bflush(mp->m_ddev_targp); | 1484 | xfs_flush_buftarg(mp->m_ddev_targp, 1); |
1488 | xfs_reclaim_inodes(mp, SYNC_WAIT); | 1485 | xfs_reclaim_inodes(mp, SYNC_WAIT); |
1489 | 1486 | ||
1490 | xfs_qm_unmount(mp); | 1487 | xfs_qm_unmount(mp); |
@@ -1496,11 +1493,6 @@ xfs_unmountfs( | |||
1496 | */ | 1493 | */ |
1497 | xfs_log_force(mp, XFS_LOG_SYNC); | 1494 | xfs_log_force(mp, XFS_LOG_SYNC); |
1498 | 1495 | ||
1499 | xfs_binval(mp->m_ddev_targp); | ||
1500 | if (mp->m_rtdev_targp) { | ||
1501 | xfs_binval(mp->m_rtdev_targp); | ||
1502 | } | ||
1503 | |||
1504 | /* | 1496 | /* |
1505 | * Unreserve any blocks we have so that when we unmount we don't account | 1497 | * Unreserve any blocks we have so that when we unmount we don't account |
1506 | * the reserved free space as used. This is really only necessary for | 1498 | * the reserved free space as used. This is really only necessary for |
@@ -1526,7 +1518,16 @@ xfs_unmountfs( | |||
1526 | xfs_warn(mp, "Unable to update superblock counters. " | 1518 | xfs_warn(mp, "Unable to update superblock counters. " |
1527 | "Freespace may not be correct on next mount."); | 1519 | "Freespace may not be correct on next mount."); |
1528 | xfs_unmountfs_writesb(mp); | 1520 | xfs_unmountfs_writesb(mp); |
1529 | xfs_unmountfs_wait(mp); /* wait for async bufs */ | 1521 | |
1522 | /* | ||
1523 | * Make sure all buffers have been flushed and completed before | ||
1524 | * unmounting the log. | ||
1525 | */ | ||
1526 | error = xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
1527 | if (error) | ||
1528 | xfs_warn(mp, "%d busy buffers during unmount.", error); | ||
1529 | xfs_wait_buftarg(mp->m_ddev_targp); | ||
1530 | |||
1530 | xfs_log_unmount_write(mp); | 1531 | xfs_log_unmount_write(mp); |
1531 | xfs_log_unmount(mp); | 1532 | xfs_log_unmount(mp); |
1532 | xfs_uuid_unmount(mp); | 1533 | xfs_uuid_unmount(mp); |
@@ -1537,16 +1538,6 @@ xfs_unmountfs( | |||
1537 | xfs_free_perag(mp); | 1538 | xfs_free_perag(mp); |
1538 | } | 1539 | } |
1539 | 1540 | ||
1540 | STATIC void | ||
1541 | xfs_unmountfs_wait(xfs_mount_t *mp) | ||
1542 | { | ||
1543 | if (mp->m_logdev_targp != mp->m_ddev_targp) | ||
1544 | xfs_wait_buftarg(mp->m_logdev_targp); | ||
1545 | if (mp->m_rtdev_targp) | ||
1546 | xfs_wait_buftarg(mp->m_rtdev_targp); | ||
1547 | xfs_wait_buftarg(mp->m_ddev_targp); | ||
1548 | } | ||
1549 | |||
1550 | int | 1541 | int |
1551 | xfs_fs_writable(xfs_mount_t *mp) | 1542 | xfs_fs_writable(xfs_mount_t *mp) |
1552 | { | 1543 | { |
@@ -1612,15 +1603,14 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) | |||
1612 | 1603 | ||
1613 | XFS_BUF_UNDONE(sbp); | 1604 | XFS_BUF_UNDONE(sbp); |
1614 | XFS_BUF_UNREAD(sbp); | 1605 | XFS_BUF_UNREAD(sbp); |
1615 | XFS_BUF_UNDELAYWRITE(sbp); | 1606 | xfs_buf_delwri_dequeue(sbp); |
1616 | XFS_BUF_WRITE(sbp); | 1607 | XFS_BUF_WRITE(sbp); |
1617 | XFS_BUF_UNASYNC(sbp); | 1608 | XFS_BUF_UNASYNC(sbp); |
1618 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); | 1609 | ASSERT(sbp->b_target == mp->m_ddev_targp); |
1619 | xfsbdstrat(mp, sbp); | 1610 | xfsbdstrat(mp, sbp); |
1620 | error = xfs_buf_iowait(sbp); | 1611 | error = xfs_buf_iowait(sbp); |
1621 | if (error) | 1612 | if (error) |
1622 | xfs_ioerror_alert("xfs_unmountfs_writesb", | 1613 | xfs_buf_ioerror_alert(sbp, __func__); |
1623 | mp, sbp, XFS_BUF_ADDR(sbp)); | ||
1624 | xfs_buf_relse(sbp); | 1614 | xfs_buf_relse(sbp); |
1625 | } | 1615 | } |
1626 | return error; | 1616 | return error; |
@@ -1938,7 +1928,7 @@ xfs_getsb( | |||
1938 | xfs_buf_lock(bp); | 1928 | xfs_buf_lock(bp); |
1939 | } | 1929 | } |
1940 | 1930 | ||
1941 | XFS_BUF_HOLD(bp); | 1931 | xfs_buf_hold(bp); |
1942 | ASSERT(XFS_BUF_ISDONE(bp)); | 1932 | ASSERT(XFS_BUF_ISDONE(bp)); |
1943 | return bp; | 1933 | return bp; |
1944 | } | 1934 | } |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/xfs_qm.c index 46e54ad9a2dc..5cff443f6cdb 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -1240,7 +1240,7 @@ xfs_qm_reset_dqcounts( | |||
1240 | do_div(j, sizeof(xfs_dqblk_t)); | 1240 | do_div(j, sizeof(xfs_dqblk_t)); |
1241 | ASSERT(mp->m_quotainfo->qi_dqperchunk == j); | 1241 | ASSERT(mp->m_quotainfo->qi_dqperchunk == j); |
1242 | #endif | 1242 | #endif |
1243 | ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp); | 1243 | ddq = bp->b_addr; |
1244 | for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { | 1244 | for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { |
1245 | /* | 1245 | /* |
1246 | * Do a sanity check, and if needed, repair the dqblk. Don't | 1246 | * Do a sanity check, and if needed, repair the dqblk. Don't |
@@ -1296,7 +1296,8 @@ xfs_qm_dqiter_bufs( | |||
1296 | break; | 1296 | break; |
1297 | 1297 | ||
1298 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); | 1298 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); |
1299 | xfs_bdwrite(mp, bp); | 1299 | xfs_buf_delwri_queue(bp); |
1300 | xfs_buf_relse(bp); | ||
1300 | /* | 1301 | /* |
1301 | * goto the next block. | 1302 | * goto the next block. |
1302 | */ | 1303 | */ |
@@ -1346,11 +1347,8 @@ xfs_qm_dqiterate( | |||
1346 | * the inode is never added to the transaction. | 1347 | * the inode is never added to the transaction. |
1347 | */ | 1348 | */ |
1348 | xfs_ilock(qip, XFS_ILOCK_SHARED); | 1349 | xfs_ilock(qip, XFS_ILOCK_SHARED); |
1349 | error = xfs_bmapi(NULL, qip, lblkno, | 1350 | error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno, |
1350 | maxlblkcnt - lblkno, | 1351 | map, &nmaps, 0); |
1351 | XFS_BMAPI_METADATA, | ||
1352 | NULL, | ||
1353 | 0, map, &nmaps, NULL); | ||
1354 | xfs_iunlock(qip, XFS_ILOCK_SHARED); | 1352 | xfs_iunlock(qip, XFS_ILOCK_SHARED); |
1355 | if (error) | 1353 | if (error) |
1356 | break; | 1354 | break; |
@@ -1683,7 +1681,7 @@ xfs_qm_quotacheck( | |||
1683 | * quotacheck'd stamp on the superblock. So, here we do a synchronous | 1681 | * quotacheck'd stamp on the superblock. So, here we do a synchronous |
1684 | * flush. | 1682 | * flush. |
1685 | */ | 1683 | */ |
1686 | XFS_bflush(mp->m_ddev_targp); | 1684 | xfs_flush_buftarg(mp->m_ddev_targp, 1); |
1687 | 1685 | ||
1688 | /* | 1686 | /* |
1689 | * If one type of quotas is off, then it will lose its | 1687 | * If one type of quotas is off, then it will lose its |
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/xfs_qm.h index 43b9abe1052c..43b9abe1052c 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/xfs_qm.h | |||
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index a0a829addca9..a0a829addca9 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c | |||
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c index 8671a0b32644..8671a0b32644 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/xfs_qm_stats.c | |||
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h index 5b964fc0dc09..5b964fc0dc09 100644 --- a/fs/xfs/quota/xfs_qm_stats.h +++ b/fs/xfs/xfs_qm_stats.h | |||
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 609246f42e6c..5cc3dde1bc90 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
@@ -261,7 +261,7 @@ xfs_qm_scall_trunc_qfile( | |||
261 | } | 261 | } |
262 | 262 | ||
263 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 263 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
264 | xfs_trans_ijoin(tp, ip); | 264 | xfs_trans_ijoin(tp, ip, 0); |
265 | 265 | ||
266 | error = xfs_itruncate_data(&tp, ip, 0); | 266 | error = xfs_itruncate_data(&tp, ip, 0); |
267 | if (error) { | 267 | if (error) { |
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h index 94a3d927d716..94a3d927d716 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/xfs_quota_priv.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 29b9d642e93d..7e76f537abb7 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
26 | #include "xfs_bmap_btree.h" | 26 | #include "xfs_bmap_btree.h" |
27 | #include "xfs_inode.h" | 27 | #include "xfs_inode.h" |
28 | #include "quota/xfs_qm.h" | 28 | #include "xfs_qm.h" |
29 | #include <linux/quota.h> | 29 | #include <linux/quota.h> |
30 | 30 | ||
31 | 31 | ||
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index df78c297d1a1..866de277079a 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
@@ -170,12 +170,12 @@ xfs_rename( | |||
170 | * we can rely on either trans_commit or trans_cancel to unlock | 170 | * we can rely on either trans_commit or trans_cancel to unlock |
171 | * them. | 171 | * them. |
172 | */ | 172 | */ |
173 | xfs_trans_ijoin_ref(tp, src_dp, XFS_ILOCK_EXCL); | 173 | xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); |
174 | if (new_parent) | 174 | if (new_parent) |
175 | xfs_trans_ijoin_ref(tp, target_dp, XFS_ILOCK_EXCL); | 175 | xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); |
176 | xfs_trans_ijoin_ref(tp, src_ip, XFS_ILOCK_EXCL); | 176 | xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); |
177 | if (target_ip) | 177 | if (target_ip) |
178 | xfs_trans_ijoin_ref(tp, target_ip, XFS_ILOCK_EXCL); | 178 | xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); |
179 | 179 | ||
180 | /* | 180 | /* |
181 | * If we are using project inheritance, we only allow renames | 181 | * If we are using project inheritance, we only allow renames |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 8f76fdff4f46..87323f1ded64 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -112,7 +112,7 @@ xfs_growfs_rt_alloc( | |||
112 | * Lock the inode. | 112 | * Lock the inode. |
113 | */ | 113 | */ |
114 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 114 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
115 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); | 115 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
116 | 116 | ||
117 | xfs_bmap_init(&flist, &firstblock); | 117 | xfs_bmap_init(&flist, &firstblock); |
118 | /* | 118 | /* |
@@ -120,9 +120,9 @@ xfs_growfs_rt_alloc( | |||
120 | */ | 120 | */ |
121 | nmap = 1; | 121 | nmap = 1; |
122 | cancelflags |= XFS_TRANS_ABORT; | 122 | cancelflags |= XFS_TRANS_ABORT; |
123 | error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks, | 123 | error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, |
124 | XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock, | 124 | XFS_BMAPI_METADATA, &firstblock, |
125 | resblks, &map, &nmap, &flist); | 125 | resblks, &map, &nmap, &flist); |
126 | if (!error && nmap < 1) | 126 | if (!error && nmap < 1) |
127 | error = XFS_ERROR(ENOSPC); | 127 | error = XFS_ERROR(ENOSPC); |
128 | if (error) | 128 | if (error) |
@@ -155,7 +155,7 @@ xfs_growfs_rt_alloc( | |||
155 | * Lock the bitmap inode. | 155 | * Lock the bitmap inode. |
156 | */ | 156 | */ |
157 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 157 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
158 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); | 158 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
159 | /* | 159 | /* |
160 | * Get a buffer for the block. | 160 | * Get a buffer for the block. |
161 | */ | 161 | */ |
@@ -168,7 +168,7 @@ error_cancel: | |||
168 | xfs_trans_cancel(tp, cancelflags); | 168 | xfs_trans_cancel(tp, cancelflags); |
169 | goto error; | 169 | goto error; |
170 | } | 170 | } |
171 | memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); | 171 | memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); |
172 | xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); | 172 | xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); |
173 | /* | 173 | /* |
174 | * Commit the transaction. | 174 | * Commit the transaction. |
@@ -856,34 +856,24 @@ xfs_rtbuf_get( | |||
856 | xfs_buf_t **bpp) /* output: buffer for the block */ | 856 | xfs_buf_t **bpp) /* output: buffer for the block */ |
857 | { | 857 | { |
858 | xfs_buf_t *bp; /* block buffer, result */ | 858 | xfs_buf_t *bp; /* block buffer, result */ |
859 | xfs_daddr_t d; /* disk addr of block */ | ||
860 | int error; /* error value */ | ||
861 | xfs_fsblock_t fsb; /* fs block number for block */ | ||
862 | xfs_inode_t *ip; /* bitmap or summary inode */ | 859 | xfs_inode_t *ip; /* bitmap or summary inode */ |
860 | xfs_bmbt_irec_t map; | ||
861 | int nmap; | ||
862 | int error; /* error value */ | ||
863 | 863 | ||
864 | ip = issum ? mp->m_rsumip : mp->m_rbmip; | 864 | ip = issum ? mp->m_rsumip : mp->m_rbmip; |
865 | /* | 865 | |
866 | * Map from the file offset (block) and inode number to the | 866 | error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); |
867 | * file system block. | 867 | if (error) |
868 | */ | ||
869 | error = xfs_bmapi_single(tp, ip, XFS_DATA_FORK, &fsb, block); | ||
870 | if (error) { | ||
871 | return error; | 868 | return error; |
872 | } | 869 | |
873 | ASSERT(fsb != NULLFSBLOCK); | 870 | ASSERT(map.br_startblock != NULLFSBLOCK); |
874 | /* | 871 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
875 | * Convert to disk address for buffer cache. | 872 | XFS_FSB_TO_DADDR(mp, map.br_startblock), |
876 | */ | ||
877 | d = XFS_FSB_TO_DADDR(mp, fsb); | ||
878 | /* | ||
879 | * Read the buffer. | ||
880 | */ | ||
881 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, | ||
882 | mp->m_bsize, 0, &bp); | 873 | mp->m_bsize, 0, &bp); |
883 | if (error) { | 874 | if (error) |
884 | return error; | 875 | return error; |
885 | } | 876 | ASSERT(!xfs_buf_geterror(bp)); |
886 | ASSERT(bp && !XFS_BUF_GETERROR(bp)); | ||
887 | *bpp = bp; | 877 | *bpp = bp; |
888 | return 0; | 878 | return 0; |
889 | } | 879 | } |
@@ -943,7 +933,7 @@ xfs_rtcheck_range( | |||
943 | if (error) { | 933 | if (error) { |
944 | return error; | 934 | return error; |
945 | } | 935 | } |
946 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 936 | bufp = bp->b_addr; |
947 | /* | 937 | /* |
948 | * Compute the starting word's address, and starting bit. | 938 | * Compute the starting word's address, and starting bit. |
949 | */ | 939 | */ |
@@ -994,7 +984,7 @@ xfs_rtcheck_range( | |||
994 | if (error) { | 984 | if (error) { |
995 | return error; | 985 | return error; |
996 | } | 986 | } |
997 | b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 987 | b = bufp = bp->b_addr; |
998 | word = 0; | 988 | word = 0; |
999 | } else { | 989 | } else { |
1000 | /* | 990 | /* |
@@ -1040,7 +1030,7 @@ xfs_rtcheck_range( | |||
1040 | if (error) { | 1030 | if (error) { |
1041 | return error; | 1031 | return error; |
1042 | } | 1032 | } |
1043 | b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1033 | b = bufp = bp->b_addr; |
1044 | word = 0; | 1034 | word = 0; |
1045 | } else { | 1035 | } else { |
1046 | /* | 1036 | /* |
@@ -1158,7 +1148,7 @@ xfs_rtfind_back( | |||
1158 | if (error) { | 1148 | if (error) { |
1159 | return error; | 1149 | return error; |
1160 | } | 1150 | } |
1161 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1151 | bufp = bp->b_addr; |
1162 | /* | 1152 | /* |
1163 | * Get the first word's index & point to it. | 1153 | * Get the first word's index & point to it. |
1164 | */ | 1154 | */ |
@@ -1210,7 +1200,7 @@ xfs_rtfind_back( | |||
1210 | if (error) { | 1200 | if (error) { |
1211 | return error; | 1201 | return error; |
1212 | } | 1202 | } |
1213 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1203 | bufp = bp->b_addr; |
1214 | word = XFS_BLOCKWMASK(mp); | 1204 | word = XFS_BLOCKWMASK(mp); |
1215 | b = &bufp[word]; | 1205 | b = &bufp[word]; |
1216 | } else { | 1206 | } else { |
@@ -1256,7 +1246,7 @@ xfs_rtfind_back( | |||
1256 | if (error) { | 1246 | if (error) { |
1257 | return error; | 1247 | return error; |
1258 | } | 1248 | } |
1259 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1249 | bufp = bp->b_addr; |
1260 | word = XFS_BLOCKWMASK(mp); | 1250 | word = XFS_BLOCKWMASK(mp); |
1261 | b = &bufp[word]; | 1251 | b = &bufp[word]; |
1262 | } else { | 1252 | } else { |
@@ -1333,7 +1323,7 @@ xfs_rtfind_forw( | |||
1333 | if (error) { | 1323 | if (error) { |
1334 | return error; | 1324 | return error; |
1335 | } | 1325 | } |
1336 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1326 | bufp = bp->b_addr; |
1337 | /* | 1327 | /* |
1338 | * Get the first word's index & point to it. | 1328 | * Get the first word's index & point to it. |
1339 | */ | 1329 | */ |
@@ -1384,7 +1374,7 @@ xfs_rtfind_forw( | |||
1384 | if (error) { | 1374 | if (error) { |
1385 | return error; | 1375 | return error; |
1386 | } | 1376 | } |
1387 | b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1377 | b = bufp = bp->b_addr; |
1388 | word = 0; | 1378 | word = 0; |
1389 | } else { | 1379 | } else { |
1390 | /* | 1380 | /* |
@@ -1429,7 +1419,7 @@ xfs_rtfind_forw( | |||
1429 | if (error) { | 1419 | if (error) { |
1430 | return error; | 1420 | return error; |
1431 | } | 1421 | } |
1432 | b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1422 | b = bufp = bp->b_addr; |
1433 | word = 0; | 1423 | word = 0; |
1434 | } else { | 1424 | } else { |
1435 | /* | 1425 | /* |
@@ -1649,7 +1639,7 @@ xfs_rtmodify_range( | |||
1649 | if (error) { | 1639 | if (error) { |
1650 | return error; | 1640 | return error; |
1651 | } | 1641 | } |
1652 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1642 | bufp = bp->b_addr; |
1653 | /* | 1643 | /* |
1654 | * Compute the starting word's address, and starting bit. | 1644 | * Compute the starting word's address, and starting bit. |
1655 | */ | 1645 | */ |
@@ -1694,7 +1684,7 @@ xfs_rtmodify_range( | |||
1694 | if (error) { | 1684 | if (error) { |
1695 | return error; | 1685 | return error; |
1696 | } | 1686 | } |
1697 | first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1687 | first = b = bufp = bp->b_addr; |
1698 | word = 0; | 1688 | word = 0; |
1699 | } else { | 1689 | } else { |
1700 | /* | 1690 | /* |
@@ -1734,7 +1724,7 @@ xfs_rtmodify_range( | |||
1734 | if (error) { | 1724 | if (error) { |
1735 | return error; | 1725 | return error; |
1736 | } | 1726 | } |
1737 | first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1727 | first = b = bufp = bp->b_addr; |
1738 | word = 0; | 1728 | word = 0; |
1739 | } else { | 1729 | } else { |
1740 | /* | 1730 | /* |
@@ -1832,8 +1822,8 @@ xfs_rtmodify_summary( | |||
1832 | */ | 1822 | */ |
1833 | sp = XFS_SUMPTR(mp, bp, so); | 1823 | sp = XFS_SUMPTR(mp, bp, so); |
1834 | *sp += delta; | 1824 | *sp += delta; |
1835 | xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)), | 1825 | xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), |
1836 | (uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1)); | 1826 | (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); |
1837 | return 0; | 1827 | return 0; |
1838 | } | 1828 | } |
1839 | 1829 | ||
@@ -1970,7 +1960,7 @@ xfs_growfs_rt( | |||
1970 | * Lock out other callers by grabbing the bitmap inode lock. | 1960 | * Lock out other callers by grabbing the bitmap inode lock. |
1971 | */ | 1961 | */ |
1972 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); | 1962 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); |
1973 | xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL); | 1963 | xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); |
1974 | /* | 1964 | /* |
1975 | * Update the bitmap inode's size. | 1965 | * Update the bitmap inode's size. |
1976 | */ | 1966 | */ |
@@ -1982,7 +1972,7 @@ xfs_growfs_rt( | |||
1982 | * Get the summary inode into the transaction. | 1972 | * Get the summary inode into the transaction. |
1983 | */ | 1973 | */ |
1984 | xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); | 1974 | xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); |
1985 | xfs_trans_ijoin_ref(tp, mp->m_rsumip, XFS_ILOCK_EXCL); | 1975 | xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); |
1986 | /* | 1976 | /* |
1987 | * Update the summary inode's size. | 1977 | * Update the summary inode's size. |
1988 | */ | 1978 | */ |
@@ -2153,7 +2143,7 @@ xfs_rtfree_extent( | |||
2153 | * Synchronize by locking the bitmap inode. | 2143 | * Synchronize by locking the bitmap inode. |
2154 | */ | 2144 | */ |
2155 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); | 2145 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); |
2156 | xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL); | 2146 | xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); |
2157 | 2147 | ||
2158 | #if defined(__KERNEL__) && defined(DEBUG) | 2148 | #if defined(__KERNEL__) && defined(DEBUG) |
2159 | /* | 2149 | /* |
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 09e1f4f35e97..f7f3a359c1c5 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h | |||
@@ -47,7 +47,7 @@ struct xfs_trans; | |||
47 | #define XFS_SUMOFFSTOBLOCK(mp,s) \ | 47 | #define XFS_SUMOFFSTOBLOCK(mp,s) \ |
48 | (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog) | 48 | (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog) |
49 | #define XFS_SUMPTR(mp,bp,so) \ | 49 | #define XFS_SUMPTR(mp,bp,so) \ |
50 | ((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \ | 50 | ((xfs_suminfo_t *)((bp)->b_addr + \ |
51 | (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp)))) | 51 | (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp)))) |
52 | 52 | ||
53 | #define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log) | 53 | #define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log) |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index d6d6fdfe9422..597d044a09a1 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
@@ -92,24 +92,6 @@ xfs_do_force_shutdown( | |||
92 | } | 92 | } |
93 | 93 | ||
94 | /* | 94 | /* |
95 | * Prints out an ALERT message about I/O error. | ||
96 | */ | ||
97 | void | ||
98 | xfs_ioerror_alert( | ||
99 | char *func, | ||
100 | struct xfs_mount *mp, | ||
101 | xfs_buf_t *bp, | ||
102 | xfs_daddr_t blkno) | ||
103 | { | ||
104 | xfs_alert(mp, | ||
105 | "I/O error occurred: meta-data dev %s block 0x%llx" | ||
106 | " (\"%s\") error %d buf count %zd", | ||
107 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | ||
108 | (__uint64_t)blkno, func, | ||
109 | XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * This isn't an absolute requirement, but it is | 95 | * This isn't an absolute requirement, but it is |
114 | * just a good idea to call xfs_read_buf instead of | 96 | * just a good idea to call xfs_read_buf instead of |
115 | * directly doing a read_buf call. For one, we shouldn't | 97 | * directly doing a read_buf call. For one, we shouldn't |
@@ -137,20 +119,19 @@ xfs_read_buf( | |||
137 | bp = xfs_buf_read(target, blkno, len, flags); | 119 | bp = xfs_buf_read(target, blkno, len, flags); |
138 | if (!bp) | 120 | if (!bp) |
139 | return XFS_ERROR(EIO); | 121 | return XFS_ERROR(EIO); |
140 | error = XFS_BUF_GETERROR(bp); | 122 | error = bp->b_error; |
141 | if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) { | 123 | if (!error && !XFS_FORCED_SHUTDOWN(mp)) { |
142 | *bpp = bp; | 124 | *bpp = bp; |
143 | } else { | 125 | } else { |
144 | *bpp = NULL; | 126 | *bpp = NULL; |
145 | if (error) { | 127 | if (error) { |
146 | xfs_ioerror_alert("xfs_read_buf", mp, bp, XFS_BUF_ADDR(bp)); | 128 | xfs_buf_ioerror_alert(bp, __func__); |
147 | } else { | 129 | } else { |
148 | error = XFS_ERROR(EIO); | 130 | error = XFS_ERROR(EIO); |
149 | } | 131 | } |
150 | if (bp) { | 132 | if (bp) { |
151 | XFS_BUF_UNDONE(bp); | 133 | XFS_BUF_UNDONE(bp); |
152 | XFS_BUF_UNDELAYWRITE(bp); | 134 | xfs_buf_stale(bp); |
153 | XFS_BUF_STALE(bp); | ||
154 | /* | 135 | /* |
155 | * brelse clears B_ERROR and b_error | 136 | * brelse clears B_ERROR and b_error |
156 | */ | 137 | */ |
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index 11c41ec6ed75..bbdb9ad6a4ba 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h | |||
@@ -42,8 +42,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) | |||
42 | extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, | 42 | extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, |
43 | xfs_daddr_t blkno, int len, uint flags, | 43 | xfs_daddr_t blkno, int len, uint flags, |
44 | struct xfs_buf **bpp); | 44 | struct xfs_buf **bpp); |
45 | extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, | ||
46 | xfs_buf_t *bp, xfs_daddr_t blkno); | ||
47 | extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); | 45 | extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); |
48 | 46 | ||
49 | #endif /* __XFS_RW_H__ */ | 47 | #endif /* __XFS_RW_H__ */ |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 1eb2ba586814..cb6ae715814a 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
@@ -509,7 +509,7 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) | |||
509 | 509 | ||
510 | #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ | 510 | #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ |
511 | #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) | 511 | #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) |
512 | #define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)XFS_BUF_PTR(bp)) | 512 | #define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr)) |
513 | 513 | ||
514 | #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) | 514 | #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) |
515 | #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ | 515 | #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ |
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/xfs_stats.c index 76fdc5861932..76fdc5861932 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/xfs_stats.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/xfs_stats.h index 736854b1ca1a..736854b1ca1a 100644 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ b/fs/xfs/xfs_stats.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/xfs_super.c index 9a72dda58bd0..3eca58f51ae9 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -356,6 +356,8 @@ xfs_parseargs( | |||
356 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | 356 | mp->m_flags |= XFS_MOUNT_DELAYLOG; |
357 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | 357 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { |
358 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | 358 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; |
359 | xfs_warn(mp, | ||
360 | "nodelaylog is deprecated and will be removed in Linux 3.3"); | ||
359 | } else if (!strcmp(this_char, MNTOPT_DISCARD)) { | 361 | } else if (!strcmp(this_char, MNTOPT_DISCARD)) { |
360 | mp->m_flags |= XFS_MOUNT_DISCARD; | 362 | mp->m_flags |= XFS_MOUNT_DISCARD; |
361 | } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { | 363 | } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { |
@@ -794,8 +796,6 @@ xfs_fs_destroy_inode( | |||
794 | if (is_bad_inode(inode)) | 796 | if (is_bad_inode(inode)) |
795 | goto out_reclaim; | 797 | goto out_reclaim; |
796 | 798 | ||
797 | xfs_ioend_wait(ip); | ||
798 | |||
799 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); | 799 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); |
800 | 800 | ||
801 | /* | 801 | /* |
@@ -835,7 +835,6 @@ xfs_fs_inode_init_once( | |||
835 | inode_init_once(VFS_I(ip)); | 835 | inode_init_once(VFS_I(ip)); |
836 | 836 | ||
837 | /* xfs inode */ | 837 | /* xfs inode */ |
838 | atomic_set(&ip->i_iocount, 0); | ||
839 | atomic_set(&ip->i_pincount, 0); | 838 | atomic_set(&ip->i_pincount, 0); |
840 | spin_lock_init(&ip->i_flags_lock); | 839 | spin_lock_init(&ip->i_flags_lock); |
841 | init_waitqueue_head(&ip->i_ipin_wait); | 840 | init_waitqueue_head(&ip->i_ipin_wait); |
@@ -877,33 +876,17 @@ xfs_log_inode( | |||
877 | struct xfs_trans *tp; | 876 | struct xfs_trans *tp; |
878 | int error; | 877 | int error; |
879 | 878 | ||
880 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
881 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | 879 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); |
882 | error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); | 880 | error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); |
883 | |||
884 | if (error) { | 881 | if (error) { |
885 | xfs_trans_cancel(tp, 0); | 882 | xfs_trans_cancel(tp, 0); |
886 | /* we need to return with the lock hold shared */ | ||
887 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
888 | return error; | 883 | return error; |
889 | } | 884 | } |
890 | 885 | ||
891 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 886 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
892 | 887 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | |
893 | /* | ||
894 | * Note - it's possible that we might have pushed ourselves out of the | ||
895 | * way during trans_reserve which would flush the inode. But there's | ||
896 | * no guarantee that the inode buffer has actually gone out yet (it's | ||
897 | * delwri). Plus the buffer could be pinned anyway if it's part of | ||
898 | * an inode in another recent transaction. So we play it safe and | ||
899 | * fire off the transaction anyway. | ||
900 | */ | ||
901 | xfs_trans_ijoin(tp, ip); | ||
902 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 888 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
903 | error = xfs_trans_commit(tp, 0); | 889 | return xfs_trans_commit(tp, 0); |
904 | xfs_ilock_demote(ip, XFS_ILOCK_EXCL); | ||
905 | |||
906 | return error; | ||
907 | } | 890 | } |
908 | 891 | ||
909 | STATIC int | 892 | STATIC int |
@@ -918,7 +901,9 @@ xfs_fs_write_inode( | |||
918 | trace_xfs_write_inode(ip); | 901 | trace_xfs_write_inode(ip); |
919 | 902 | ||
920 | if (XFS_FORCED_SHUTDOWN(mp)) | 903 | if (XFS_FORCED_SHUTDOWN(mp)) |
921 | return XFS_ERROR(EIO); | 904 | return -XFS_ERROR(EIO); |
905 | if (!ip->i_update_core) | ||
906 | return 0; | ||
922 | 907 | ||
923 | if (wbc->sync_mode == WB_SYNC_ALL) { | 908 | if (wbc->sync_mode == WB_SYNC_ALL) { |
924 | /* | 909 | /* |
@@ -926,15 +911,12 @@ xfs_fs_write_inode( | |||
926 | * of forcing it all the way to stable storage using a | 911 | * of forcing it all the way to stable storage using a |
927 | * synchronous transaction we let the log force inside the | 912 | * synchronous transaction we let the log force inside the |
928 | * ->sync_fs call do that for thus, which reduces the number | 913 | * ->sync_fs call do that for thus, which reduces the number |
929 | * of synchronous log foces dramatically. | 914 | * of synchronous log forces dramatically. |
930 | */ | 915 | */ |
931 | xfs_ioend_wait(ip); | 916 | error = xfs_log_inode(ip); |
932 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 917 | if (error) |
933 | if (ip->i_update_core) { | 918 | goto out; |
934 | error = xfs_log_inode(ip); | 919 | return 0; |
935 | if (error) | ||
936 | goto out_unlock; | ||
937 | } | ||
938 | } else { | 920 | } else { |
939 | /* | 921 | /* |
940 | * We make this non-blocking if the inode is contended, return | 922 | * We make this non-blocking if the inode is contended, return |
@@ -1033,7 +1015,7 @@ xfs_fs_put_super( | |||
1033 | */ | 1015 | */ |
1034 | xfs_filestream_unmount(mp); | 1016 | xfs_filestream_unmount(mp); |
1035 | 1017 | ||
1036 | XFS_bflush(mp->m_ddev_targp); | 1018 | xfs_flush_buftarg(mp->m_ddev_targp, 1); |
1037 | 1019 | ||
1038 | xfs_unmountfs(mp); | 1020 | xfs_unmountfs(mp); |
1039 | xfs_freesb(mp); | 1021 | xfs_freesb(mp); |
@@ -1457,7 +1439,7 @@ xfs_fs_fill_super( | |||
1457 | */ | 1439 | */ |
1458 | xfs_filestream_unmount(mp); | 1440 | xfs_filestream_unmount(mp); |
1459 | 1441 | ||
1460 | XFS_bflush(mp->m_ddev_targp); | 1442 | xfs_flush_buftarg(mp->m_ddev_targp, 1); |
1461 | 1443 | ||
1462 | xfs_unmountfs(mp); | 1444 | xfs_unmountfs(mp); |
1463 | goto out_free_sb; | 1445 | goto out_free_sb; |
@@ -1666,24 +1648,13 @@ xfs_init_workqueues(void) | |||
1666 | */ | 1648 | */ |
1667 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); | 1649 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); |
1668 | if (!xfs_syncd_wq) | 1650 | if (!xfs_syncd_wq) |
1669 | goto out; | 1651 | return -ENOMEM; |
1670 | |||
1671 | xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); | ||
1672 | if (!xfs_ail_wq) | ||
1673 | goto out_destroy_syncd; | ||
1674 | |||
1675 | return 0; | 1652 | return 0; |
1676 | |||
1677 | out_destroy_syncd: | ||
1678 | destroy_workqueue(xfs_syncd_wq); | ||
1679 | out: | ||
1680 | return -ENOMEM; | ||
1681 | } | 1653 | } |
1682 | 1654 | ||
1683 | STATIC void | 1655 | STATIC void |
1684 | xfs_destroy_workqueues(void) | 1656 | xfs_destroy_workqueues(void) |
1685 | { | 1657 | { |
1686 | destroy_workqueue(xfs_ail_wq); | ||
1687 | destroy_workqueue(xfs_syncd_wq); | 1658 | destroy_workqueue(xfs_syncd_wq); |
1688 | } | 1659 | } |
1689 | 1660 | ||
@@ -1695,7 +1666,6 @@ init_xfs_fs(void) | |||
1695 | printk(KERN_INFO XFS_VERSION_STRING " with " | 1666 | printk(KERN_INFO XFS_VERSION_STRING " with " |
1696 | XFS_BUILD_OPTIONS " enabled\n"); | 1667 | XFS_BUILD_OPTIONS " enabled\n"); |
1697 | 1668 | ||
1698 | xfs_ioend_init(); | ||
1699 | xfs_dir_startup(); | 1669 | xfs_dir_startup(); |
1700 | 1670 | ||
1701 | error = xfs_init_zones(); | 1671 | error = xfs_init_zones(); |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/xfs_super.h index 50a3266c999e..50a3266c999e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/xfs_super.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/xfs_sync.c index e4c938afb910..aa3dc1a4d53d 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -227,21 +227,17 @@ xfs_sync_inode_data( | |||
227 | int error = 0; | 227 | int error = 0; |
228 | 228 | ||
229 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 229 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
230 | goto out_wait; | 230 | return 0; |
231 | 231 | ||
232 | if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { | 232 | if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { |
233 | if (flags & SYNC_TRYLOCK) | 233 | if (flags & SYNC_TRYLOCK) |
234 | goto out_wait; | 234 | return 0; |
235 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 235 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
236 | } | 236 | } |
237 | 237 | ||
238 | error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? | 238 | error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? |
239 | 0 : XBF_ASYNC, FI_NONE); | 239 | 0 : XBF_ASYNC, FI_NONE); |
240 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 240 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
241 | |||
242 | out_wait: | ||
243 | if (flags & SYNC_WAIT) | ||
244 | xfs_ioend_wait(ip); | ||
245 | return error; | 241 | return error; |
246 | } | 242 | } |
247 | 243 | ||
@@ -322,6 +318,7 @@ xfs_sync_fsdata( | |||
322 | struct xfs_mount *mp) | 318 | struct xfs_mount *mp) |
323 | { | 319 | { |
324 | struct xfs_buf *bp; | 320 | struct xfs_buf *bp; |
321 | int error; | ||
325 | 322 | ||
326 | /* | 323 | /* |
327 | * If the buffer is pinned then push on the log so we won't get stuck | 324 | * If the buffer is pinned then push on the log so we won't get stuck |
@@ -332,10 +329,11 @@ xfs_sync_fsdata( | |||
332 | * between there and here. | 329 | * between there and here. |
333 | */ | 330 | */ |
334 | bp = xfs_getsb(mp, 0); | 331 | bp = xfs_getsb(mp, 0); |
335 | if (XFS_BUF_ISPINNED(bp)) | 332 | if (xfs_buf_ispinned(bp)) |
336 | xfs_log_force(mp, 0); | 333 | xfs_log_force(mp, 0); |
337 | 334 | error = xfs_bwrite(bp); | |
338 | return xfs_bwrite(mp, bp); | 335 | xfs_buf_relse(bp); |
336 | return error; | ||
339 | } | 337 | } |
340 | 338 | ||
341 | /* | 339 | /* |
@@ -379,7 +377,7 @@ xfs_quiesce_data( | |||
379 | 377 | ||
380 | /* flush data-only devices */ | 378 | /* flush data-only devices */ |
381 | if (mp->m_rtdev_targp) | 379 | if (mp->m_rtdev_targp) |
382 | XFS_bflush(mp->m_rtdev_targp); | 380 | xfs_flush_buftarg(mp->m_rtdev_targp, 1); |
383 | 381 | ||
384 | return error ? error : error2; | 382 | return error ? error : error2; |
385 | } | 383 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/xfs_sync.h index 941202e7ac6e..941202e7ac6e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/xfs_sync.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index ee2d2adaa438..ee2d2adaa438 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index b9937d450f8e..b9937d450f8e 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/xfs_trace.c index 88d25d4aa56e..9010ce885e6a 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/xfs_trace.c | |||
@@ -43,8 +43,8 @@ | |||
43 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
44 | #include "xfs_iomap.h" | 44 | #include "xfs_iomap.h" |
45 | #include "xfs_aops.h" | 45 | #include "xfs_aops.h" |
46 | #include "quota/xfs_dquot_item.h" | 46 | #include "xfs_dquot_item.h" |
47 | #include "quota/xfs_dquot.h" | 47 | #include "xfs_dquot.h" |
48 | #include "xfs_log_recover.h" | 48 | #include "xfs_log_recover.h" |
49 | #include "xfs_inode_item.h" | 49 | #include "xfs_inode_item.h" |
50 | 50 | ||
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/xfs_trace.h index 690fc7a7bd72..f1d2802b2f07 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -30,6 +30,7 @@ struct xfs_buf_log_item; | |||
30 | struct xfs_da_args; | 30 | struct xfs_da_args; |
31 | struct xfs_da_node_entry; | 31 | struct xfs_da_node_entry; |
32 | struct xfs_dquot; | 32 | struct xfs_dquot; |
33 | struct xfs_log_item; | ||
33 | struct xlog_ticket; | 34 | struct xlog_ticket; |
34 | struct log; | 35 | struct log; |
35 | struct xlog_recover; | 36 | struct xlog_recover; |
@@ -320,7 +321,6 @@ DEFINE_BUF_EVENT(xfs_buf_rele); | |||
320 | DEFINE_BUF_EVENT(xfs_buf_iodone); | 321 | DEFINE_BUF_EVENT(xfs_buf_iodone); |
321 | DEFINE_BUF_EVENT(xfs_buf_iorequest); | 322 | DEFINE_BUF_EVENT(xfs_buf_iorequest); |
322 | DEFINE_BUF_EVENT(xfs_buf_bawrite); | 323 | DEFINE_BUF_EVENT(xfs_buf_bawrite); |
323 | DEFINE_BUF_EVENT(xfs_buf_bdwrite); | ||
324 | DEFINE_BUF_EVENT(xfs_buf_lock); | 324 | DEFINE_BUF_EVENT(xfs_buf_lock); |
325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); | 325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); |
326 | DEFINE_BUF_EVENT(xfs_buf_trylock); | 326 | DEFINE_BUF_EVENT(xfs_buf_trylock); |
@@ -577,6 +577,7 @@ DEFINE_INODE_EVENT(xfs_vm_bmap); | |||
577 | DEFINE_INODE_EVENT(xfs_file_ioctl); | 577 | DEFINE_INODE_EVENT(xfs_file_ioctl); |
578 | DEFINE_INODE_EVENT(xfs_file_compat_ioctl); | 578 | DEFINE_INODE_EVENT(xfs_file_compat_ioctl); |
579 | DEFINE_INODE_EVENT(xfs_ioctl_setattr); | 579 | DEFINE_INODE_EVENT(xfs_ioctl_setattr); |
580 | DEFINE_INODE_EVENT(xfs_dir_fsync); | ||
580 | DEFINE_INODE_EVENT(xfs_file_fsync); | 581 | DEFINE_INODE_EVENT(xfs_file_fsync); |
581 | DEFINE_INODE_EVENT(xfs_destroy_inode); | 582 | DEFINE_INODE_EVENT(xfs_destroy_inode); |
582 | DEFINE_INODE_EVENT(xfs_write_inode); | 583 | DEFINE_INODE_EVENT(xfs_write_inode); |
@@ -853,6 +854,42 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); | |||
853 | DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); | 854 | DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); |
854 | DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); | 855 | DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); |
855 | 856 | ||
857 | DECLARE_EVENT_CLASS(xfs_log_item_class, | ||
858 | TP_PROTO(struct xfs_log_item *lip), | ||
859 | TP_ARGS(lip), | ||
860 | TP_STRUCT__entry( | ||
861 | __field(dev_t, dev) | ||
862 | __field(void *, lip) | ||
863 | __field(uint, type) | ||
864 | __field(uint, flags) | ||
865 | __field(xfs_lsn_t, lsn) | ||
866 | ), | ||
867 | TP_fast_assign( | ||
868 | __entry->dev = lip->li_mountp->m_super->s_dev; | ||
869 | __entry->lip = lip; | ||
870 | __entry->type = lip->li_type; | ||
871 | __entry->flags = lip->li_flags; | ||
872 | __entry->lsn = lip->li_lsn; | ||
873 | ), | ||
874 | TP_printk("dev %d:%d lip 0x%p lsn %d/%d type %s flags %s", | ||
875 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
876 | __entry->lip, | ||
877 | CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), | ||
878 | __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), | ||
879 | __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) | ||
880 | ) | ||
881 | |||
882 | #define DEFINE_LOG_ITEM_EVENT(name) \ | ||
883 | DEFINE_EVENT(xfs_log_item_class, name, \ | ||
884 | TP_PROTO(struct xfs_log_item *lip), \ | ||
885 | TP_ARGS(lip)) | ||
886 | DEFINE_LOG_ITEM_EVENT(xfs_ail_push); | ||
887 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf); | ||
888 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf_pinned); | ||
889 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); | ||
890 | DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); | ||
891 | |||
892 | |||
856 | DECLARE_EVENT_CLASS(xfs_file_class, | 893 | DECLARE_EVENT_CLASS(xfs_file_class, |
857 | TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), | 894 | TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), |
858 | TP_ARGS(ip, count, offset, flags), | 895 | TP_ARGS(ip, count, offset, flags), |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index efc147f0e9b6..1f35b2feca97 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -1790,9 +1790,7 @@ xfs_trans_commit_cil( | |||
1790 | } | 1790 | } |
1791 | 1791 | ||
1792 | /* | 1792 | /* |
1793 | * xfs_trans_commit | 1793 | * Commit the given transaction to the log. |
1794 | * | ||
1795 | * Commit the given transaction to the log a/synchronously. | ||
1796 | * | 1794 | * |
1797 | * XFS disk error handling mechanism is not based on a typical | 1795 | * XFS disk error handling mechanism is not based on a typical |
1798 | * transaction abort mechanism. Logically after the filesystem | 1796 | * transaction abort mechanism. Logically after the filesystem |
@@ -1804,10 +1802,9 @@ xfs_trans_commit_cil( | |||
1804 | * Do not reference the transaction structure after this call. | 1802 | * Do not reference the transaction structure after this call. |
1805 | */ | 1803 | */ |
1806 | int | 1804 | int |
1807 | _xfs_trans_commit( | 1805 | xfs_trans_commit( |
1808 | struct xfs_trans *tp, | 1806 | struct xfs_trans *tp, |
1809 | uint flags, | 1807 | uint flags) |
1810 | int *log_flushed) | ||
1811 | { | 1808 | { |
1812 | struct xfs_mount *mp = tp->t_mountp; | 1809 | struct xfs_mount *mp = tp->t_mountp; |
1813 | xfs_lsn_t commit_lsn = -1; | 1810 | xfs_lsn_t commit_lsn = -1; |
@@ -1866,7 +1863,7 @@ _xfs_trans_commit( | |||
1866 | if (sync) { | 1863 | if (sync) { |
1867 | if (!error) { | 1864 | if (!error) { |
1868 | error = _xfs_log_force_lsn(mp, commit_lsn, | 1865 | error = _xfs_log_force_lsn(mp, commit_lsn, |
1869 | XFS_LOG_SYNC, log_flushed); | 1866 | XFS_LOG_SYNC, NULL); |
1870 | } | 1867 | } |
1871 | XFS_STATS_INC(xs_trans_sync); | 1868 | XFS_STATS_INC(xs_trans_sync); |
1872 | } else { | 1869 | } else { |
@@ -2021,6 +2018,6 @@ xfs_trans_roll( | |||
2021 | if (error) | 2018 | if (error) |
2022 | return error; | 2019 | return error; |
2023 | 2020 | ||
2024 | xfs_trans_ijoin(trans, dp); | 2021 | xfs_trans_ijoin(trans, dp, 0); |
2025 | return 0; | 2022 | return 0; |
2026 | } | 2023 | } |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 06a9759b6352..603f3eb52041 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -350,7 +350,7 @@ typedef struct xfs_item_ops { | |||
350 | void (*iop_unlock)(xfs_log_item_t *); | 350 | void (*iop_unlock)(xfs_log_item_t *); |
351 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); | 351 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); |
352 | void (*iop_push)(xfs_log_item_t *); | 352 | void (*iop_push)(xfs_log_item_t *); |
353 | void (*iop_pushbuf)(xfs_log_item_t *); | 353 | bool (*iop_pushbuf)(xfs_log_item_t *); |
354 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); | 354 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); |
355 | } xfs_item_ops_t; | 355 | } xfs_item_ops_t; |
356 | 356 | ||
@@ -470,8 +470,7 @@ void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); | |||
470 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); | 470 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); |
471 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); | 471 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); |
472 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); | 472 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); |
473 | void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); | 473 | void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); |
474 | void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); | ||
475 | void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); | 474 | void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); |
476 | void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); | 475 | void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); |
477 | struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); | 476 | struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); |
@@ -487,10 +486,7 @@ void xfs_trans_log_efd_extent(xfs_trans_t *, | |||
487 | struct xfs_efd_log_item *, | 486 | struct xfs_efd_log_item *, |
488 | xfs_fsblock_t, | 487 | xfs_fsblock_t, |
489 | xfs_extlen_t); | 488 | xfs_extlen_t); |
490 | int _xfs_trans_commit(xfs_trans_t *, | 489 | int xfs_trans_commit(xfs_trans_t *, uint flags); |
491 | uint flags, | ||
492 | int *); | ||
493 | #define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) | ||
494 | void xfs_trans_cancel(xfs_trans_t *, int); | 490 | void xfs_trans_cancel(xfs_trans_t *, int); |
495 | int xfs_trans_ail_init(struct xfs_mount *); | 491 | int xfs_trans_ail_init(struct xfs_mount *); |
496 | void xfs_trans_ail_destroy(struct xfs_mount *); | 492 | void xfs_trans_ail_destroy(struct xfs_mount *); |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 43233e92f0f6..ed9252bcdac9 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -26,10 +26,9 @@ | |||
26 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
27 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
29 | #include "xfs_trace.h" | ||
29 | #include "xfs_error.h" | 30 | #include "xfs_error.h" |
30 | 31 | ||
31 | struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ | ||
32 | |||
33 | #ifdef DEBUG | 32 | #ifdef DEBUG |
34 | /* | 33 | /* |
35 | * Check that the list is sorted as it should be. | 34 | * Check that the list is sorted as it should be. |
@@ -299,7 +298,7 @@ xfs_trans_ail_cursor_last( | |||
299 | * Splice the log item list into the AIL at the given LSN. We splice to the | 298 | * Splice the log item list into the AIL at the given LSN. We splice to the |
300 | * tail of the given LSN to maintain insert order for push traversals. The | 299 | * tail of the given LSN to maintain insert order for push traversals. The |
301 | * cursor is optional, allowing repeated updates to the same LSN to avoid | 300 | * cursor is optional, allowing repeated updates to the same LSN to avoid |
302 | * repeated traversals. | 301 | * repeated traversals. This should not be called with an empty list. |
303 | */ | 302 | */ |
304 | static void | 303 | static void |
305 | xfs_ail_splice( | 304 | xfs_ail_splice( |
@@ -308,50 +307,39 @@ xfs_ail_splice( | |||
308 | struct list_head *list, | 307 | struct list_head *list, |
309 | xfs_lsn_t lsn) | 308 | xfs_lsn_t lsn) |
310 | { | 309 | { |
311 | struct xfs_log_item *lip = cur ? cur->item : NULL; | 310 | struct xfs_log_item *lip; |
312 | struct xfs_log_item *next_lip; | 311 | |
312 | ASSERT(!list_empty(list)); | ||
313 | 313 | ||
314 | /* | 314 | /* |
315 | * Get a new cursor if we don't have a placeholder or the existing one | 315 | * Use the cursor to determine the insertion point if one is |
316 | * has been invalidated. | 316 | * provided. If not, or if the one we got is not valid, |
317 | * find the place in the AIL where the items belong. | ||
317 | */ | 318 | */ |
318 | if (!lip || (__psint_t)lip & 1) { | 319 | lip = cur ? cur->item : NULL; |
320 | if (!lip || (__psint_t) lip & 1) | ||
319 | lip = __xfs_trans_ail_cursor_last(ailp, lsn); | 321 | lip = __xfs_trans_ail_cursor_last(ailp, lsn); |
320 | 322 | ||
321 | if (!lip) { | 323 | /* |
322 | /* The list is empty, so just splice and return. */ | 324 | * If a cursor is provided, we know we're processing the AIL |
323 | if (cur) | 325 | * in lsn order, and future items to be spliced in will |
324 | cur->item = NULL; | 326 | * follow the last one being inserted now. Update the |
325 | list_splice(list, &ailp->xa_ail); | 327 | * cursor to point to that last item, now while we have a |
326 | return; | 328 | * reliable pointer to it. |
327 | } | 329 | */ |
328 | } | 330 | if (cur) |
331 | cur->item = list_entry(list->prev, struct xfs_log_item, li_ail); | ||
329 | 332 | ||
330 | /* | 333 | /* |
331 | * Our cursor points to the item we want to insert _after_, so we have | 334 | * Finally perform the splice. Unless the AIL was empty, |
332 | * to update the cursor to point to the end of the list we are splicing | 335 | * lip points to the item in the AIL _after_ which the new |
333 | * in so that it points to the correct location for the next splice. | 336 | * items should go. If lip is null the AIL was empty, so |
334 | * i.e. before the splice | 337 | * the new items go at the head of the AIL. |
335 | * | ||
336 | * lsn -> lsn -> lsn + x -> lsn + x ... | ||
337 | * ^ | ||
338 | * | cursor points here | ||
339 | * | ||
340 | * After the splice we have: | ||
341 | * | ||
342 | * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ... | ||
343 | * ^ ^ | ||
344 | * | cursor points here | needs to move here | ||
345 | * | ||
346 | * So we set the cursor to the last item in the list to be spliced | ||
347 | * before we execute the splice, resulting in the cursor pointing to | ||
348 | * the correct item after the splice occurs. | ||
349 | */ | 338 | */ |
350 | if (cur) { | 339 | if (lip) |
351 | next_lip = list_entry(list->prev, struct xfs_log_item, li_ail); | 340 | list_splice(list, &lip->li_ail); |
352 | cur->item = next_lip; | 341 | else |
353 | } | 342 | list_splice(list, &ailp->xa_ail); |
354 | list_splice(list, &lip->li_ail); | ||
355 | } | 343 | } |
356 | 344 | ||
357 | /* | 345 | /* |
@@ -367,28 +355,34 @@ xfs_ail_delete( | |||
367 | xfs_trans_ail_cursor_clear(ailp, lip); | 355 | xfs_trans_ail_cursor_clear(ailp, lip); |
368 | } | 356 | } |
369 | 357 | ||
370 | /* | 358 | static long |
371 | * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself | 359 | xfsaild_push( |
372 | * to run at a later time if there is more work to do to complete the push. | 360 | struct xfs_ail *ailp) |
373 | */ | ||
374 | STATIC void | ||
375 | xfs_ail_worker( | ||
376 | struct work_struct *work) | ||
377 | { | 361 | { |
378 | struct xfs_ail *ailp = container_of(to_delayed_work(work), | ||
379 | struct xfs_ail, xa_work); | ||
380 | xfs_mount_t *mp = ailp->xa_mount; | 362 | xfs_mount_t *mp = ailp->xa_mount; |
381 | struct xfs_ail_cursor cur; | 363 | struct xfs_ail_cursor cur; |
382 | xfs_log_item_t *lip; | 364 | xfs_log_item_t *lip; |
383 | xfs_lsn_t lsn; | 365 | xfs_lsn_t lsn; |
384 | xfs_lsn_t target; | 366 | xfs_lsn_t target; |
385 | long tout = 10; | 367 | long tout = 10; |
386 | int flush_log = 0; | ||
387 | int stuck = 0; | 368 | int stuck = 0; |
388 | int count = 0; | 369 | int count = 0; |
389 | int push_xfsbufd = 0; | 370 | int push_xfsbufd = 0; |
390 | 371 | ||
372 | /* | ||
373 | * If last time we ran we encountered pinned items, force the log first | ||
374 | * and wait for it before pushing again. | ||
375 | */ | ||
391 | spin_lock(&ailp->xa_lock); | 376 | spin_lock(&ailp->xa_lock); |
377 | if (ailp->xa_last_pushed_lsn == 0 && ailp->xa_log_flush && | ||
378 | !list_empty(&ailp->xa_ail)) { | ||
379 | ailp->xa_log_flush = 0; | ||
380 | spin_unlock(&ailp->xa_lock); | ||
381 | XFS_STATS_INC(xs_push_ail_flush); | ||
382 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
383 | spin_lock(&ailp->xa_lock); | ||
384 | } | ||
385 | |||
392 | target = ailp->xa_target; | 386 | target = ailp->xa_target; |
393 | lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); | 387 | lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); |
394 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { | 388 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { |
@@ -432,26 +426,37 @@ xfs_ail_worker( | |||
432 | switch (lock_result) { | 426 | switch (lock_result) { |
433 | case XFS_ITEM_SUCCESS: | 427 | case XFS_ITEM_SUCCESS: |
434 | XFS_STATS_INC(xs_push_ail_success); | 428 | XFS_STATS_INC(xs_push_ail_success); |
429 | trace_xfs_ail_push(lip); | ||
430 | |||
435 | IOP_PUSH(lip); | 431 | IOP_PUSH(lip); |
436 | ailp->xa_last_pushed_lsn = lsn; | 432 | ailp->xa_last_pushed_lsn = lsn; |
437 | break; | 433 | break; |
438 | 434 | ||
439 | case XFS_ITEM_PUSHBUF: | 435 | case XFS_ITEM_PUSHBUF: |
440 | XFS_STATS_INC(xs_push_ail_pushbuf); | 436 | XFS_STATS_INC(xs_push_ail_pushbuf); |
441 | IOP_PUSHBUF(lip); | 437 | trace_xfs_ail_pushbuf(lip); |
442 | ailp->xa_last_pushed_lsn = lsn; | 438 | |
439 | if (!IOP_PUSHBUF(lip)) { | ||
440 | trace_xfs_ail_pushbuf_pinned(lip); | ||
441 | stuck++; | ||
442 | ailp->xa_log_flush++; | ||
443 | } else { | ||
444 | ailp->xa_last_pushed_lsn = lsn; | ||
445 | } | ||
443 | push_xfsbufd = 1; | 446 | push_xfsbufd = 1; |
444 | break; | 447 | break; |
445 | 448 | ||
446 | case XFS_ITEM_PINNED: | 449 | case XFS_ITEM_PINNED: |
447 | XFS_STATS_INC(xs_push_ail_pinned); | 450 | XFS_STATS_INC(xs_push_ail_pinned); |
451 | trace_xfs_ail_pinned(lip); | ||
452 | |||
448 | stuck++; | 453 | stuck++; |
449 | flush_log = 1; | 454 | ailp->xa_log_flush++; |
450 | break; | 455 | break; |
451 | 456 | ||
452 | case XFS_ITEM_LOCKED: | 457 | case XFS_ITEM_LOCKED: |
453 | XFS_STATS_INC(xs_push_ail_locked); | 458 | XFS_STATS_INC(xs_push_ail_locked); |
454 | ailp->xa_last_pushed_lsn = lsn; | 459 | trace_xfs_ail_locked(lip); |
455 | stuck++; | 460 | stuck++; |
456 | break; | 461 | break; |
457 | 462 | ||
@@ -491,16 +496,6 @@ xfs_ail_worker( | |||
491 | xfs_trans_ail_cursor_done(ailp, &cur); | 496 | xfs_trans_ail_cursor_done(ailp, &cur); |
492 | spin_unlock(&ailp->xa_lock); | 497 | spin_unlock(&ailp->xa_lock); |
493 | 498 | ||
494 | if (flush_log) { | ||
495 | /* | ||
496 | * If something we need to push out was pinned, then | ||
497 | * push out the log so it will become unpinned and | ||
498 | * move forward in the AIL. | ||
499 | */ | ||
500 | XFS_STATS_INC(xs_push_ail_flush); | ||
501 | xfs_log_force(mp, 0); | ||
502 | } | ||
503 | |||
504 | if (push_xfsbufd) { | 499 | if (push_xfsbufd) { |
505 | /* we've got delayed write buffers to flush */ | 500 | /* we've got delayed write buffers to flush */ |
506 | wake_up_process(mp->m_ddev_targp->bt_task); | 501 | wake_up_process(mp->m_ddev_targp->bt_task); |
@@ -511,20 +506,7 @@ out_done: | |||
511 | if (!count) { | 506 | if (!count) { |
512 | /* We're past our target or empty, so idle */ | 507 | /* We're past our target or empty, so idle */ |
513 | ailp->xa_last_pushed_lsn = 0; | 508 | ailp->xa_last_pushed_lsn = 0; |
514 | 509 | ailp->xa_log_flush = 0; | |
515 | /* | ||
516 | * We clear the XFS_AIL_PUSHING_BIT first before checking | ||
517 | * whether the target has changed. If the target has changed, | ||
518 | * this pushes the requeue race directly onto the result of the | ||
519 | * atomic test/set bit, so we are guaranteed that either the | ||
520 | * the pusher that changed the target or ourselves will requeue | ||
521 | * the work (but not both). | ||
522 | */ | ||
523 | clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); | ||
524 | smp_rmb(); | ||
525 | if (XFS_LSN_CMP(ailp->xa_target, target) == 0 || | ||
526 | test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) | ||
527 | return; | ||
528 | 510 | ||
529 | tout = 50; | 511 | tout = 50; |
530 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { | 512 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { |
@@ -543,14 +525,39 @@ out_done: | |||
543 | * were stuck. | 525 | * were stuck. |
544 | * | 526 | * |
545 | * Backoff a bit more to allow some I/O to complete before | 527 | * Backoff a bit more to allow some I/O to complete before |
546 | * continuing from where we were. | 528 | * restarting from the start of the AIL. This prevents us |
529 | * from spinning on the same items, and if they are pinned will | ||
530 | * all the restart to issue a log force to unpin the stuck | ||
531 | * items. | ||
547 | */ | 532 | */ |
548 | tout = 20; | 533 | tout = 20; |
534 | ailp->xa_last_pushed_lsn = 0; | ||
535 | } | ||
536 | |||
537 | return tout; | ||
538 | } | ||
539 | |||
540 | static int | ||
541 | xfsaild( | ||
542 | void *data) | ||
543 | { | ||
544 | struct xfs_ail *ailp = data; | ||
545 | long tout = 0; /* milliseconds */ | ||
546 | |||
547 | while (!kthread_should_stop()) { | ||
548 | if (tout && tout <= 20) | ||
549 | __set_current_state(TASK_KILLABLE); | ||
550 | else | ||
551 | __set_current_state(TASK_INTERRUPTIBLE); | ||
552 | schedule_timeout(tout ? | ||
553 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | ||
554 | |||
555 | try_to_freeze(); | ||
556 | |||
557 | tout = xfsaild_push(ailp); | ||
549 | } | 558 | } |
550 | 559 | ||
551 | /* There is more to do, requeue us. */ | 560 | return 0; |
552 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, | ||
553 | msecs_to_jiffies(tout)); | ||
554 | } | 561 | } |
555 | 562 | ||
556 | /* | 563 | /* |
@@ -585,8 +592,9 @@ xfs_ail_push( | |||
585 | */ | 592 | */ |
586 | smp_wmb(); | 593 | smp_wmb(); |
587 | xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); | 594 | xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); |
588 | if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) | 595 | smp_wmb(); |
589 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); | 596 | |
597 | wake_up_process(ailp->xa_task); | ||
590 | } | 598 | } |
591 | 599 | ||
592 | /* | 600 | /* |
@@ -682,6 +690,7 @@ xfs_trans_ail_update_bulk( | |||
682 | int i; | 690 | int i; |
683 | LIST_HEAD(tmp); | 691 | LIST_HEAD(tmp); |
684 | 692 | ||
693 | ASSERT(nr_items > 0); /* Not required, but true. */ | ||
685 | mlip = xfs_ail_min(ailp); | 694 | mlip = xfs_ail_min(ailp); |
686 | 695 | ||
687 | for (i = 0; i < nr_items; i++) { | 696 | for (i = 0; i < nr_items; i++) { |
@@ -701,7 +710,8 @@ xfs_trans_ail_update_bulk( | |||
701 | list_add(&lip->li_ail, &tmp); | 710 | list_add(&lip->li_ail, &tmp); |
702 | } | 711 | } |
703 | 712 | ||
704 | xfs_ail_splice(ailp, cur, &tmp, lsn); | 713 | if (!list_empty(&tmp)) |
714 | xfs_ail_splice(ailp, cur, &tmp, lsn); | ||
705 | 715 | ||
706 | if (!mlip_changed) { | 716 | if (!mlip_changed) { |
707 | spin_unlock(&ailp->xa_lock); | 717 | spin_unlock(&ailp->xa_lock); |
@@ -822,9 +832,18 @@ xfs_trans_ail_init( | |||
822 | INIT_LIST_HEAD(&ailp->xa_ail); | 832 | INIT_LIST_HEAD(&ailp->xa_ail); |
823 | INIT_LIST_HEAD(&ailp->xa_cursors); | 833 | INIT_LIST_HEAD(&ailp->xa_cursors); |
824 | spin_lock_init(&ailp->xa_lock); | 834 | spin_lock_init(&ailp->xa_lock); |
825 | INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); | 835 | |
836 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", | ||
837 | ailp->xa_mount->m_fsname); | ||
838 | if (IS_ERR(ailp->xa_task)) | ||
839 | goto out_free_ailp; | ||
840 | |||
826 | mp->m_ail = ailp; | 841 | mp->m_ail = ailp; |
827 | return 0; | 842 | return 0; |
843 | |||
844 | out_free_ailp: | ||
845 | kmem_free(ailp); | ||
846 | return ENOMEM; | ||
828 | } | 847 | } |
829 | 848 | ||
830 | void | 849 | void |
@@ -833,6 +852,6 @@ xfs_trans_ail_destroy( | |||
833 | { | 852 | { |
834 | struct xfs_ail *ailp = mp->m_ail; | 853 | struct xfs_ail *ailp = mp->m_ail; |
835 | 854 | ||
836 | cancel_delayed_work_sync(&ailp->xa_work); | 855 | kthread_stop(ailp->xa_task); |
837 | kmem_free(ailp); | 856 | kmem_free(ailp); |
838 | } | 857 | } |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 15584fc3ed7d..475a4ded4f41 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -54,7 +54,7 @@ xfs_trans_buf_item_match( | |||
54 | list_for_each_entry(lidp, &tp->t_items, lid_trans) { | 54 | list_for_each_entry(lidp, &tp->t_items, lid_trans) { |
55 | blip = (struct xfs_buf_log_item *)lidp->lid_item; | 55 | blip = (struct xfs_buf_log_item *)lidp->lid_item; |
56 | if (blip->bli_item.li_type == XFS_LI_BUF && | 56 | if (blip->bli_item.li_type == XFS_LI_BUF && |
57 | XFS_BUF_TARGET(blip->bli_buf) == target && | 57 | blip->bli_buf->b_target == target && |
58 | XFS_BUF_ADDR(blip->bli_buf) == blkno && | 58 | XFS_BUF_ADDR(blip->bli_buf) == blkno && |
59 | XFS_BUF_COUNT(blip->bli_buf) == len) | 59 | XFS_BUF_COUNT(blip->bli_buf) == len) |
60 | return blip->bli_buf; | 60 | return blip->bli_buf; |
@@ -80,7 +80,6 @@ _xfs_trans_bjoin( | |||
80 | { | 80 | { |
81 | struct xfs_buf_log_item *bip; | 81 | struct xfs_buf_log_item *bip; |
82 | 82 | ||
83 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
84 | ASSERT(bp->b_transp == NULL); | 83 | ASSERT(bp->b_transp == NULL); |
85 | 84 | ||
86 | /* | 85 | /* |
@@ -161,8 +160,10 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
161 | bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); | 160 | bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); |
162 | if (bp != NULL) { | 161 | if (bp != NULL) { |
163 | ASSERT(xfs_buf_islocked(bp)); | 162 | ASSERT(xfs_buf_islocked(bp)); |
164 | if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) | 163 | if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { |
165 | XFS_BUF_SUPER_STALE(bp); | 164 | xfs_buf_stale(bp); |
165 | XFS_BUF_DONE(bp); | ||
166 | } | ||
166 | 167 | ||
167 | /* | 168 | /* |
168 | * If the buffer is stale then it was binval'ed | 169 | * If the buffer is stale then it was binval'ed |
@@ -194,7 +195,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
194 | return NULL; | 195 | return NULL; |
195 | } | 196 | } |
196 | 197 | ||
197 | ASSERT(!XFS_BUF_GETERROR(bp)); | 198 | ASSERT(!bp->b_error); |
198 | 199 | ||
199 | _xfs_trans_bjoin(tp, bp, 1); | 200 | _xfs_trans_bjoin(tp, bp, 1); |
200 | trace_xfs_trans_get_buf(bp->b_fspriv); | 201 | trace_xfs_trans_get_buf(bp->b_fspriv); |
@@ -293,10 +294,9 @@ xfs_trans_read_buf( | |||
293 | return (flags & XBF_TRYLOCK) ? | 294 | return (flags & XBF_TRYLOCK) ? |
294 | EAGAIN : XFS_ERROR(ENOMEM); | 295 | EAGAIN : XFS_ERROR(ENOMEM); |
295 | 296 | ||
296 | if (XFS_BUF_GETERROR(bp) != 0) { | 297 | if (bp->b_error) { |
297 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 298 | error = bp->b_error; |
298 | bp, blkno); | 299 | xfs_buf_ioerror_alert(bp, __func__); |
299 | error = XFS_BUF_GETERROR(bp); | ||
300 | xfs_buf_relse(bp); | 300 | xfs_buf_relse(bp); |
301 | return error; | 301 | return error; |
302 | } | 302 | } |
@@ -330,7 +330,7 @@ xfs_trans_read_buf( | |||
330 | ASSERT(xfs_buf_islocked(bp)); | 330 | ASSERT(xfs_buf_islocked(bp)); |
331 | ASSERT(bp->b_transp == tp); | 331 | ASSERT(bp->b_transp == tp); |
332 | ASSERT(bp->b_fspriv != NULL); | 332 | ASSERT(bp->b_fspriv != NULL); |
333 | ASSERT((XFS_BUF_ISERROR(bp)) == 0); | 333 | ASSERT(!bp->b_error); |
334 | if (!(XFS_BUF_ISDONE(bp))) { | 334 | if (!(XFS_BUF_ISDONE(bp))) { |
335 | trace_xfs_trans_read_buf_io(bp, _RET_IP_); | 335 | trace_xfs_trans_read_buf_io(bp, _RET_IP_); |
336 | ASSERT(!XFS_BUF_ISASYNC(bp)); | 336 | ASSERT(!XFS_BUF_ISASYNC(bp)); |
@@ -338,8 +338,7 @@ xfs_trans_read_buf( | |||
338 | xfsbdstrat(tp->t_mountp, bp); | 338 | xfsbdstrat(tp->t_mountp, bp); |
339 | error = xfs_buf_iowait(bp); | 339 | error = xfs_buf_iowait(bp); |
340 | if (error) { | 340 | if (error) { |
341 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 341 | xfs_buf_ioerror_alert(bp, __func__); |
342 | bp, blkno); | ||
343 | xfs_buf_relse(bp); | 342 | xfs_buf_relse(bp); |
344 | /* | 343 | /* |
345 | * We can gracefully recover from most read | 344 | * We can gracefully recover from most read |
@@ -386,12 +385,11 @@ xfs_trans_read_buf( | |||
386 | return (flags & XBF_TRYLOCK) ? | 385 | return (flags & XBF_TRYLOCK) ? |
387 | 0 : XFS_ERROR(ENOMEM); | 386 | 0 : XFS_ERROR(ENOMEM); |
388 | } | 387 | } |
389 | if (XFS_BUF_GETERROR(bp) != 0) { | 388 | if (bp->b_error) { |
390 | XFS_BUF_SUPER_STALE(bp); | 389 | error = bp->b_error; |
391 | error = XFS_BUF_GETERROR(bp); | 390 | xfs_buf_stale(bp); |
392 | 391 | XFS_BUF_DONE(bp); | |
393 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 392 | xfs_buf_ioerror_alert(bp, __func__); |
394 | bp, blkno); | ||
395 | if (tp->t_flags & XFS_TRANS_DIRTY) | 393 | if (tp->t_flags & XFS_TRANS_DIRTY) |
396 | xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); | 394 | xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); |
397 | xfs_buf_relse(bp); | 395 | xfs_buf_relse(bp); |
@@ -430,7 +428,7 @@ shutdown_abort: | |||
430 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) | 428 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) |
431 | xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); | 429 | xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); |
432 | #endif | 430 | #endif |
433 | ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != | 431 | ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) != |
434 | (XBF_STALE|XBF_DELWRI)); | 432 | (XBF_STALE|XBF_DELWRI)); |
435 | 433 | ||
436 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); | 434 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); |
@@ -581,7 +579,6 @@ xfs_trans_bhold(xfs_trans_t *tp, | |||
581 | { | 579 | { |
582 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 580 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
583 | 581 | ||
584 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
585 | ASSERT(bp->b_transp == tp); | 582 | ASSERT(bp->b_transp == tp); |
586 | ASSERT(bip != NULL); | 583 | ASSERT(bip != NULL); |
587 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 584 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
@@ -602,7 +599,6 @@ xfs_trans_bhold_release(xfs_trans_t *tp, | |||
602 | { | 599 | { |
603 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 600 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
604 | 601 | ||
605 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
606 | ASSERT(bp->b_transp == tp); | 602 | ASSERT(bp->b_transp == tp); |
607 | ASSERT(bip != NULL); | 603 | ASSERT(bip != NULL); |
608 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 604 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
@@ -631,7 +627,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
631 | { | 627 | { |
632 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 628 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
633 | 629 | ||
634 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
635 | ASSERT(bp->b_transp == tp); | 630 | ASSERT(bp->b_transp == tp); |
636 | ASSERT(bip != NULL); | 631 | ASSERT(bip != NULL); |
637 | ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); | 632 | ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); |
@@ -648,13 +643,14 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
648 | * inside the b_bdstrat callback so that this won't get written to | 643 | * inside the b_bdstrat callback so that this won't get written to |
649 | * disk. | 644 | * disk. |
650 | */ | 645 | */ |
651 | XFS_BUF_DELAYWRITE(bp); | ||
652 | XFS_BUF_DONE(bp); | 646 | XFS_BUF_DONE(bp); |
653 | 647 | ||
654 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 648 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
655 | bp->b_iodone = xfs_buf_iodone_callbacks; | 649 | bp->b_iodone = xfs_buf_iodone_callbacks; |
656 | bip->bli_item.li_cb = xfs_buf_iodone; | 650 | bip->bli_item.li_cb = xfs_buf_iodone; |
657 | 651 | ||
652 | xfs_buf_delwri_queue(bp); | ||
653 | |||
658 | trace_xfs_trans_log_buf(bip); | 654 | trace_xfs_trans_log_buf(bip); |
659 | 655 | ||
660 | /* | 656 | /* |
@@ -702,7 +698,6 @@ xfs_trans_binval( | |||
702 | { | 698 | { |
703 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 699 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
704 | 700 | ||
705 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
706 | ASSERT(bp->b_transp == tp); | 701 | ASSERT(bp->b_transp == tp); |
707 | ASSERT(bip != NULL); | 702 | ASSERT(bip != NULL); |
708 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 703 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
@@ -744,8 +739,7 @@ xfs_trans_binval( | |||
744 | * We set the stale bit in the buffer as well since we're getting | 739 | * We set the stale bit in the buffer as well since we're getting |
745 | * rid of it. | 740 | * rid of it. |
746 | */ | 741 | */ |
747 | XFS_BUF_UNDELAYWRITE(bp); | 742 | xfs_buf_stale(bp); |
748 | XFS_BUF_STALE(bp); | ||
749 | bip->bli_flags |= XFS_BLI_STALE; | 743 | bip->bli_flags |= XFS_BLI_STALE; |
750 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); | 744 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); |
751 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; | 745 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; |
@@ -774,7 +768,6 @@ xfs_trans_inode_buf( | |||
774 | { | 768 | { |
775 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 769 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
776 | 770 | ||
777 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
778 | ASSERT(bp->b_transp == tp); | 771 | ASSERT(bp->b_transp == tp); |
779 | ASSERT(bip != NULL); | 772 | ASSERT(bip != NULL); |
780 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 773 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
@@ -798,7 +791,6 @@ xfs_trans_stale_inode_buf( | |||
798 | { | 791 | { |
799 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 792 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
800 | 793 | ||
801 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
802 | ASSERT(bp->b_transp == tp); | 794 | ASSERT(bp->b_transp == tp); |
803 | ASSERT(bip != NULL); | 795 | ASSERT(bip != NULL); |
804 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 796 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
@@ -823,7 +815,6 @@ xfs_trans_inode_alloc_buf( | |||
823 | { | 815 | { |
824 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 816 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
825 | 817 | ||
826 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
827 | ASSERT(bp->b_transp == tp); | 818 | ASSERT(bp->b_transp == tp); |
828 | ASSERT(bip != NULL); | 819 | ASSERT(bip != NULL); |
829 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 820 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
@@ -851,7 +842,6 @@ xfs_trans_dquot_buf( | |||
851 | { | 842 | { |
852 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 843 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
853 | 844 | ||
854 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
855 | ASSERT(bp->b_transp == tp); | 845 | ASSERT(bp->b_transp == tp); |
856 | ASSERT(bip != NULL); | 846 | ASSERT(bip != NULL); |
857 | ASSERT(type == XFS_BLF_UDQUOT_BUF || | 847 | ASSERT(type == XFS_BLF_UDQUOT_BUF || |
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 4d00ee67792d..4d00ee67792d 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index c8dea2fd7e68..32f0288ae10f 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -47,11 +47,13 @@ xfs_trans_inode_broot_debug( | |||
47 | * Add a locked inode to the transaction. | 47 | * Add a locked inode to the transaction. |
48 | * | 48 | * |
49 | * The inode must be locked, and it cannot be associated with any transaction. | 49 | * The inode must be locked, and it cannot be associated with any transaction. |
50 | * If lock_flags is non-zero the inode will be unlocked on transaction commit. | ||
50 | */ | 51 | */ |
51 | void | 52 | void |
52 | xfs_trans_ijoin( | 53 | xfs_trans_ijoin( |
53 | struct xfs_trans *tp, | 54 | struct xfs_trans *tp, |
54 | struct xfs_inode *ip) | 55 | struct xfs_inode *ip, |
56 | uint lock_flags) | ||
55 | { | 57 | { |
56 | xfs_inode_log_item_t *iip; | 58 | xfs_inode_log_item_t *iip; |
57 | 59 | ||
@@ -59,7 +61,9 @@ xfs_trans_ijoin( | |||
59 | if (ip->i_itemp == NULL) | 61 | if (ip->i_itemp == NULL) |
60 | xfs_inode_item_init(ip, ip->i_mount); | 62 | xfs_inode_item_init(ip, ip->i_mount); |
61 | iip = ip->i_itemp; | 63 | iip = ip->i_itemp; |
64 | |||
62 | ASSERT(iip->ili_lock_flags == 0); | 65 | ASSERT(iip->ili_lock_flags == 0); |
66 | iip->ili_lock_flags = lock_flags; | ||
63 | 67 | ||
64 | /* | 68 | /* |
65 | * Get a log_item_desc to point at the new item. | 69 | * Get a log_item_desc to point at the new item. |
@@ -70,25 +74,6 @@ xfs_trans_ijoin( | |||
70 | } | 74 | } |
71 | 75 | ||
72 | /* | 76 | /* |
73 | * Add a locked inode to the transaction. | ||
74 | * | ||
75 | * | ||
76 | * Grabs a reference to the inode which will be dropped when the transaction | ||
77 | * is committed. The inode will also be unlocked at that point. The inode | ||
78 | * must be locked, and it cannot be associated with any transaction. | ||
79 | */ | ||
80 | void | ||
81 | xfs_trans_ijoin_ref( | ||
82 | struct xfs_trans *tp, | ||
83 | struct xfs_inode *ip, | ||
84 | uint lock_flags) | ||
85 | { | ||
86 | xfs_trans_ijoin(tp, ip); | ||
87 | IHOLD(ip); | ||
88 | ip->i_itemp->ili_lock_flags = lock_flags; | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * Transactional inode timestamp update. Requires the inode to be locked and | 77 | * Transactional inode timestamp update. Requires the inode to be locked and |
93 | * joined to the transaction supplied. Relies on the transaction subsystem to | 78 | * joined to the transaction supplied. Relies on the transaction subsystem to |
94 | * track dirty state and update/writeback the inode accordingly. | 79 | * track dirty state and update/writeback the inode accordingly. |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 212946b97239..44820b9fcb43 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -64,23 +64,18 @@ struct xfs_ail_cursor { | |||
64 | */ | 64 | */ |
65 | struct xfs_ail { | 65 | struct xfs_ail { |
66 | struct xfs_mount *xa_mount; | 66 | struct xfs_mount *xa_mount; |
67 | struct task_struct *xa_task; | ||
67 | struct list_head xa_ail; | 68 | struct list_head xa_ail; |
68 | xfs_lsn_t xa_target; | 69 | xfs_lsn_t xa_target; |
69 | struct list_head xa_cursors; | 70 | struct list_head xa_cursors; |
70 | spinlock_t xa_lock; | 71 | spinlock_t xa_lock; |
71 | struct delayed_work xa_work; | ||
72 | xfs_lsn_t xa_last_pushed_lsn; | 72 | xfs_lsn_t xa_last_pushed_lsn; |
73 | unsigned long xa_flags; | 73 | int xa_log_flush; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | #define XFS_AIL_PUSHING_BIT 0 | ||
77 | |||
78 | /* | 76 | /* |
79 | * From xfs_trans_ail.c | 77 | * From xfs_trans_ail.c |
80 | */ | 78 | */ |
81 | |||
82 | extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ | ||
83 | |||
84 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, | 79 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, |
85 | struct xfs_ail_cursor *cur, | 80 | struct xfs_ail_cursor *cur, |
86 | struct xfs_log_item **log_items, int nr_items, | 81 | struct xfs_log_item **log_items, int nr_items, |
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/xfs_vnode.h index 7c220b4227bc..7c220b4227bc 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/xfs_vnode.h | |||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 9322e13f0c63..4ecf2a549060 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -72,8 +72,8 @@ xfs_readlink_bmap( | |||
72 | xfs_buf_t *bp; | 72 | xfs_buf_t *bp; |
73 | int error = 0; | 73 | int error = 0; |
74 | 74 | ||
75 | error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0, | 75 | error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, pathlen), mval, &nmaps, |
76 | mval, &nmaps, NULL); | 76 | 0); |
77 | if (error) | 77 | if (error) |
78 | goto out; | 78 | goto out; |
79 | 79 | ||
@@ -83,10 +83,11 @@ xfs_readlink_bmap( | |||
83 | 83 | ||
84 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), | 84 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), |
85 | XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); | 85 | XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); |
86 | error = XFS_BUF_GETERROR(bp); | 86 | if (!bp) |
87 | return XFS_ERROR(ENOMEM); | ||
88 | error = bp->b_error; | ||
87 | if (error) { | 89 | if (error) { |
88 | xfs_ioerror_alert("xfs_readlink", | 90 | xfs_buf_ioerror_alert(bp, __func__); |
89 | ip->i_mount, bp, XFS_BUF_ADDR(bp)); | ||
90 | xfs_buf_relse(bp); | 91 | xfs_buf_relse(bp); |
91 | goto out; | 92 | goto out; |
92 | } | 93 | } |
@@ -94,7 +95,7 @@ xfs_readlink_bmap( | |||
94 | byte_cnt = pathlen; | 95 | byte_cnt = pathlen; |
95 | pathlen -= byte_cnt; | 96 | pathlen -= byte_cnt; |
96 | 97 | ||
97 | memcpy(link, XFS_BUF_PTR(bp), byte_cnt); | 98 | memcpy(link, bp->b_addr, byte_cnt); |
98 | xfs_buf_relse(bp); | 99 | xfs_buf_relse(bp); |
99 | } | 100 | } |
100 | 101 | ||
@@ -176,8 +177,7 @@ xfs_free_eofblocks( | |||
176 | 177 | ||
177 | nimaps = 1; | 178 | nimaps = 1; |
178 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 179 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
179 | error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, | 180 | error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); |
180 | NULL, 0, &imap, &nimaps, NULL); | ||
181 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 181 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
182 | 182 | ||
183 | if (!error && (nimaps != 0) && | 183 | if (!error && (nimaps != 0) && |
@@ -218,7 +218,7 @@ xfs_free_eofblocks( | |||
218 | } | 218 | } |
219 | 219 | ||
220 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 220 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
221 | xfs_trans_ijoin(tp, ip); | 221 | xfs_trans_ijoin(tp, ip, 0); |
222 | 222 | ||
223 | error = xfs_itruncate_data(&tp, ip, ip->i_size); | 223 | error = xfs_itruncate_data(&tp, ip, ip->i_size); |
224 | if (error) { | 224 | if (error) { |
@@ -287,7 +287,7 @@ xfs_inactive_symlink_rmt( | |||
287 | xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 287 | xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); |
288 | size = (int)ip->i_d.di_size; | 288 | size = (int)ip->i_d.di_size; |
289 | ip->i_d.di_size = 0; | 289 | ip->i_d.di_size = 0; |
290 | xfs_trans_ijoin(tp, ip); | 290 | xfs_trans_ijoin(tp, ip, 0); |
291 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 291 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
292 | /* | 292 | /* |
293 | * Find the block(s) so we can inval and unmap them. | 293 | * Find the block(s) so we can inval and unmap them. |
@@ -295,9 +295,9 @@ xfs_inactive_symlink_rmt( | |||
295 | done = 0; | 295 | done = 0; |
296 | xfs_bmap_init(&free_list, &first_block); | 296 | xfs_bmap_init(&free_list, &first_block); |
297 | nmaps = ARRAY_SIZE(mval); | 297 | nmaps = ARRAY_SIZE(mval); |
298 | if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), | 298 | error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, size), |
299 | XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, | 299 | mval, &nmaps, 0); |
300 | &free_list))) | 300 | if (error) |
301 | goto error0; | 301 | goto error0; |
302 | /* | 302 | /* |
303 | * Invalidate the block(s). | 303 | * Invalidate the block(s). |
@@ -306,6 +306,10 @@ xfs_inactive_symlink_rmt( | |||
306 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, | 306 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, |
307 | XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), | 307 | XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), |
308 | XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); | 308 | XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); |
309 | if (!bp) { | ||
310 | error = ENOMEM; | ||
311 | goto error1; | ||
312 | } | ||
309 | xfs_trans_binval(tp, bp); | 313 | xfs_trans_binval(tp, bp); |
310 | } | 314 | } |
311 | /* | 315 | /* |
@@ -331,7 +335,7 @@ xfs_inactive_symlink_rmt( | |||
331 | * Mark it dirty so it will be logged and moved forward in the log as | 335 | * Mark it dirty so it will be logged and moved forward in the log as |
332 | * part of every commit. | 336 | * part of every commit. |
333 | */ | 337 | */ |
334 | xfs_trans_ijoin(tp, ip); | 338 | xfs_trans_ijoin(tp, ip, 0); |
335 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 339 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
336 | /* | 340 | /* |
337 | * Get a new, empty transaction to return to our caller. | 341 | * Get a new, empty transaction to return to our caller. |
@@ -464,7 +468,7 @@ xfs_inactive_attrs( | |||
464 | goto error_cancel; | 468 | goto error_cancel; |
465 | 469 | ||
466 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 470 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
467 | xfs_trans_ijoin(tp, ip); | 471 | xfs_trans_ijoin(tp, ip, 0); |
468 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | 472 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); |
469 | 473 | ||
470 | ASSERT(ip->i_d.di_anextents == 0); | 474 | ASSERT(ip->i_d.di_anextents == 0); |
@@ -645,8 +649,6 @@ xfs_inactive( | |||
645 | if (truncate) { | 649 | if (truncate) { |
646 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 650 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
647 | 651 | ||
648 | xfs_ioend_wait(ip); | ||
649 | |||
650 | error = xfs_trans_reserve(tp, 0, | 652 | error = xfs_trans_reserve(tp, 0, |
651 | XFS_ITRUNCATE_LOG_RES(mp), | 653 | XFS_ITRUNCATE_LOG_RES(mp), |
652 | 0, XFS_TRANS_PERM_LOG_RES, | 654 | 0, XFS_TRANS_PERM_LOG_RES, |
@@ -660,7 +662,7 @@ xfs_inactive( | |||
660 | } | 662 | } |
661 | 663 | ||
662 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 664 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
663 | xfs_trans_ijoin(tp, ip); | 665 | xfs_trans_ijoin(tp, ip, 0); |
664 | 666 | ||
665 | error = xfs_itruncate_data(&tp, ip, 0); | 667 | error = xfs_itruncate_data(&tp, ip, 0); |
666 | if (error) { | 668 | if (error) { |
@@ -684,7 +686,7 @@ xfs_inactive( | |||
684 | return VN_INACTIVE_CACHE; | 686 | return VN_INACTIVE_CACHE; |
685 | } | 687 | } |
686 | 688 | ||
687 | xfs_trans_ijoin(tp, ip); | 689 | xfs_trans_ijoin(tp, ip, 0); |
688 | } else { | 690 | } else { |
689 | error = xfs_trans_reserve(tp, 0, | 691 | error = xfs_trans_reserve(tp, 0, |
690 | XFS_IFREE_LOG_RES(mp), | 692 | XFS_IFREE_LOG_RES(mp), |
@@ -697,7 +699,7 @@ xfs_inactive( | |||
697 | } | 699 | } |
698 | 700 | ||
699 | xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 701 | xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
700 | xfs_trans_ijoin(tp, ip); | 702 | xfs_trans_ijoin(tp, ip, 0); |
701 | } | 703 | } |
702 | 704 | ||
703 | /* | 705 | /* |
@@ -937,7 +939,7 @@ xfs_create( | |||
937 | * the transaction cancel unlocking dp so don't do it explicitly in the | 939 | * the transaction cancel unlocking dp so don't do it explicitly in the |
938 | * error path. | 940 | * error path. |
939 | */ | 941 | */ |
940 | xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); | 942 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
941 | unlock_dp_on_error = B_FALSE; | 943 | unlock_dp_on_error = B_FALSE; |
942 | 944 | ||
943 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, | 945 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, |
@@ -1258,8 +1260,8 @@ xfs_remove( | |||
1258 | 1260 | ||
1259 | xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); | 1261 | xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); |
1260 | 1262 | ||
1261 | xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); | 1263 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
1262 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); | 1264 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
1263 | 1265 | ||
1264 | /* | 1266 | /* |
1265 | * If we're removing a directory perform some additional validation. | 1267 | * If we're removing a directory perform some additional validation. |
@@ -1404,8 +1406,8 @@ xfs_link( | |||
1404 | 1406 | ||
1405 | xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); | 1407 | xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); |
1406 | 1408 | ||
1407 | xfs_trans_ijoin_ref(tp, sip, XFS_ILOCK_EXCL); | 1409 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); |
1408 | xfs_trans_ijoin_ref(tp, tdp, XFS_ILOCK_EXCL); | 1410 | xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); |
1409 | 1411 | ||
1410 | /* | 1412 | /* |
1411 | * If the source has too many links, we can't make any more to it. | 1413 | * If the source has too many links, we can't make any more to it. |
@@ -1599,7 +1601,7 @@ xfs_symlink( | |||
1599 | * transaction cancel unlocking dp so don't do it explicitly in the | 1601 | * transaction cancel unlocking dp so don't do it explicitly in the |
1600 | * error path. | 1602 | * error path. |
1601 | */ | 1603 | */ |
1602 | xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); | 1604 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
1603 | unlock_dp_on_error = B_FALSE; | 1605 | unlock_dp_on_error = B_FALSE; |
1604 | 1606 | ||
1605 | /* | 1607 | /* |
@@ -1630,10 +1632,9 @@ xfs_symlink( | |||
1630 | first_fsb = 0; | 1632 | first_fsb = 0; |
1631 | nmaps = SYMLINK_MAPS; | 1633 | nmaps = SYMLINK_MAPS; |
1632 | 1634 | ||
1633 | error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, | 1635 | error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, |
1634 | XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, | 1636 | XFS_BMAPI_METADATA, &first_block, resblks, |
1635 | &first_block, resblks, mval, &nmaps, | 1637 | mval, &nmaps, &free_list); |
1636 | &free_list); | ||
1637 | if (error) | 1638 | if (error) |
1638 | goto error2; | 1639 | goto error2; |
1639 | 1640 | ||
@@ -1648,13 +1649,16 @@ xfs_symlink( | |||
1648 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); | 1649 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); |
1649 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | 1650 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, |
1650 | BTOBB(byte_cnt), 0); | 1651 | BTOBB(byte_cnt), 0); |
1651 | ASSERT(bp && !XFS_BUF_GETERROR(bp)); | 1652 | if (!bp) { |
1653 | error = ENOMEM; | ||
1654 | goto error2; | ||
1655 | } | ||
1652 | if (pathlen < byte_cnt) { | 1656 | if (pathlen < byte_cnt) { |
1653 | byte_cnt = pathlen; | 1657 | byte_cnt = pathlen; |
1654 | } | 1658 | } |
1655 | pathlen -= byte_cnt; | 1659 | pathlen -= byte_cnt; |
1656 | 1660 | ||
1657 | memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); | 1661 | memcpy(bp->b_addr, cur_chunk, byte_cnt); |
1658 | cur_chunk += byte_cnt; | 1662 | cur_chunk += byte_cnt; |
1659 | 1663 | ||
1660 | xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); | 1664 | xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); |
@@ -1730,7 +1734,7 @@ xfs_set_dmattrs( | |||
1730 | return error; | 1734 | return error; |
1731 | } | 1735 | } |
1732 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1736 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
1733 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); | 1737 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
1734 | 1738 | ||
1735 | ip->i_d.di_dmevmask = evmask; | 1739 | ip->i_d.di_dmevmask = evmask; |
1736 | ip->i_d.di_dmstate = state; | 1740 | ip->i_d.di_dmstate = state; |
@@ -1776,7 +1780,6 @@ xfs_alloc_file_space( | |||
1776 | xfs_fileoff_t startoffset_fsb; | 1780 | xfs_fileoff_t startoffset_fsb; |
1777 | xfs_fsblock_t firstfsb; | 1781 | xfs_fsblock_t firstfsb; |
1778 | int nimaps; | 1782 | int nimaps; |
1779 | int bmapi_flag; | ||
1780 | int quota_flag; | 1783 | int quota_flag; |
1781 | int rt; | 1784 | int rt; |
1782 | xfs_trans_t *tp; | 1785 | xfs_trans_t *tp; |
@@ -1804,7 +1807,6 @@ xfs_alloc_file_space( | |||
1804 | count = len; | 1807 | count = len; |
1805 | imapp = &imaps[0]; | 1808 | imapp = &imaps[0]; |
1806 | nimaps = 1; | 1809 | nimaps = 1; |
1807 | bmapi_flag = XFS_BMAPI_WRITE | alloc_type; | ||
1808 | startoffset_fsb = XFS_B_TO_FSBT(mp, offset); | 1810 | startoffset_fsb = XFS_B_TO_FSBT(mp, offset); |
1809 | allocatesize_fsb = XFS_B_TO_FSB(mp, count); | 1811 | allocatesize_fsb = XFS_B_TO_FSB(mp, count); |
1810 | 1812 | ||
@@ -1875,16 +1877,12 @@ xfs_alloc_file_space( | |||
1875 | if (error) | 1877 | if (error) |
1876 | goto error1; | 1878 | goto error1; |
1877 | 1879 | ||
1878 | xfs_trans_ijoin(tp, ip); | 1880 | xfs_trans_ijoin(tp, ip, 0); |
1879 | 1881 | ||
1880 | /* | ||
1881 | * Issue the xfs_bmapi() call to allocate the blocks | ||
1882 | */ | ||
1883 | xfs_bmap_init(&free_list, &firstfsb); | 1882 | xfs_bmap_init(&free_list, &firstfsb); |
1884 | error = xfs_bmapi(tp, ip, startoffset_fsb, | 1883 | error = xfs_bmapi_write(tp, ip, startoffset_fsb, |
1885 | allocatesize_fsb, bmapi_flag, | 1884 | allocatesize_fsb, alloc_type, &firstfsb, |
1886 | &firstfsb, 0, imapp, &nimaps, | 1885 | 0, imapp, &nimaps, &free_list); |
1887 | &free_list); | ||
1888 | if (error) { | 1886 | if (error) { |
1889 | goto error0; | 1887 | goto error0; |
1890 | } | 1888 | } |
@@ -1974,8 +1972,7 @@ xfs_zero_remaining_bytes( | |||
1974 | for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { | 1972 | for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { |
1975 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 1973 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
1976 | nimap = 1; | 1974 | nimap = 1; |
1977 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, | 1975 | error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); |
1978 | NULL, 0, &imap, &nimap, NULL); | ||
1979 | if (error || nimap < 1) | 1976 | if (error || nimap < 1) |
1980 | break; | 1977 | break; |
1981 | ASSERT(imap.br_blockcount >= 1); | 1978 | ASSERT(imap.br_blockcount >= 1); |
@@ -1995,11 +1992,11 @@ xfs_zero_remaining_bytes( | |||
1995 | xfsbdstrat(mp, bp); | 1992 | xfsbdstrat(mp, bp); |
1996 | error = xfs_buf_iowait(bp); | 1993 | error = xfs_buf_iowait(bp); |
1997 | if (error) { | 1994 | if (error) { |
1998 | xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", | 1995 | xfs_buf_ioerror_alert(bp, |
1999 | mp, bp, XFS_BUF_ADDR(bp)); | 1996 | "xfs_zero_remaining_bytes(read)"); |
2000 | break; | 1997 | break; |
2001 | } | 1998 | } |
2002 | memset(XFS_BUF_PTR(bp) + | 1999 | memset(bp->b_addr + |
2003 | (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), | 2000 | (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), |
2004 | 0, lastoffset - offset + 1); | 2001 | 0, lastoffset - offset + 1); |
2005 | XFS_BUF_UNDONE(bp); | 2002 | XFS_BUF_UNDONE(bp); |
@@ -2008,8 +2005,8 @@ xfs_zero_remaining_bytes( | |||
2008 | xfsbdstrat(mp, bp); | 2005 | xfsbdstrat(mp, bp); |
2009 | error = xfs_buf_iowait(bp); | 2006 | error = xfs_buf_iowait(bp); |
2010 | if (error) { | 2007 | if (error) { |
2011 | xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", | 2008 | xfs_buf_ioerror_alert(bp, |
2012 | mp, bp, XFS_BUF_ADDR(bp)); | 2009 | "xfs_zero_remaining_bytes(write)"); |
2013 | break; | 2010 | break; |
2014 | } | 2011 | } |
2015 | } | 2012 | } |
@@ -2074,7 +2071,7 @@ xfs_free_file_space( | |||
2074 | if (need_iolock) { | 2071 | if (need_iolock) { |
2075 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 2072 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
2076 | /* wait for the completion of any pending DIOs */ | 2073 | /* wait for the completion of any pending DIOs */ |
2077 | xfs_ioend_wait(ip); | 2074 | inode_dio_wait(VFS_I(ip)); |
2078 | } | 2075 | } |
2079 | 2076 | ||
2080 | rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); | 2077 | rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); |
@@ -2094,8 +2091,8 @@ xfs_free_file_space( | |||
2094 | */ | 2091 | */ |
2095 | if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { | 2092 | if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { |
2096 | nimap = 1; | 2093 | nimap = 1; |
2097 | error = xfs_bmapi(NULL, ip, startoffset_fsb, | 2094 | error = xfs_bmapi_read(ip, startoffset_fsb, 1, |
2098 | 1, 0, NULL, 0, &imap, &nimap, NULL); | 2095 | &imap, &nimap, 0); |
2099 | if (error) | 2096 | if (error) |
2100 | goto out_unlock_iolock; | 2097 | goto out_unlock_iolock; |
2101 | ASSERT(nimap == 0 || nimap == 1); | 2098 | ASSERT(nimap == 0 || nimap == 1); |
@@ -2109,8 +2106,8 @@ xfs_free_file_space( | |||
2109 | startoffset_fsb += mp->m_sb.sb_rextsize - mod; | 2106 | startoffset_fsb += mp->m_sb.sb_rextsize - mod; |
2110 | } | 2107 | } |
2111 | nimap = 1; | 2108 | nimap = 1; |
2112 | error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, | 2109 | error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, |
2113 | 1, 0, NULL, 0, &imap, &nimap, NULL); | 2110 | &imap, &nimap, 0); |
2114 | if (error) | 2111 | if (error) |
2115 | goto out_unlock_iolock; | 2112 | goto out_unlock_iolock; |
2116 | ASSERT(nimap == 0 || nimap == 1); | 2113 | ASSERT(nimap == 0 || nimap == 1); |
@@ -2178,7 +2175,7 @@ xfs_free_file_space( | |||
2178 | if (error) | 2175 | if (error) |
2179 | goto error1; | 2176 | goto error1; |
2180 | 2177 | ||
2181 | xfs_trans_ijoin(tp, ip); | 2178 | xfs_trans_ijoin(tp, ip, 0); |
2182 | 2179 | ||
2183 | /* | 2180 | /* |
2184 | * issue the bunmapi() call to free the blocks | 2181 | * issue the bunmapi() call to free the blocks |
@@ -2351,8 +2348,7 @@ xfs_change_file_space( | |||
2351 | } | 2348 | } |
2352 | 2349 | ||
2353 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 2350 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
2354 | 2351 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | |
2355 | xfs_trans_ijoin(tp, ip); | ||
2356 | 2352 | ||
2357 | if ((attr_flags & XFS_ATTR_DMI) == 0) { | 2353 | if ((attr_flags & XFS_ATTR_DMI) == 0) { |
2358 | ip->i_d.di_mode &= ~S_ISUID; | 2354 | ip->i_d.di_mode &= ~S_ISUID; |
@@ -2377,10 +2373,5 @@ xfs_change_file_space( | |||
2377 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2373 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
2378 | if (attr_flags & XFS_ATTR_SYNC) | 2374 | if (attr_flags & XFS_ATTR_SYNC) |
2379 | xfs_trans_set_sync(tp); | 2375 | xfs_trans_set_sync(tp); |
2380 | 2376 | return xfs_trans_commit(tp, 0); | |
2381 | error = xfs_trans_commit(tp, 0); | ||
2382 | |||
2383 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
2384 | |||
2385 | return error; | ||
2386 | } | 2377 | } |
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 87d3e03878c8..87d3e03878c8 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c | |||